diff --git a/chd/bitstream.go b/chd/bitstream.go new file mode 100644 index 0000000..5da9735 --- /dev/null +++ b/chd/bitstream.go @@ -0,0 +1,190 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +// bitReader reads bits from a byte slice. +type bitReader struct { + data []byte + offset int // bit offset + bits uint // accumulated bits + avail int // bits available in accumulator +} + +// newBitReader creates a new bit reader. +func newBitReader(data []byte) *bitReader { + return &bitReader{data: data} +} + +// read reads count bits from the stream. +func (br *bitReader) read(count int) uint32 { + // Fill accumulator as needed + for br.avail < count { + byteOff := br.offset / 8 + if byteOff >= len(br.data) { + br.bits <<= 8 + br.avail += 8 + continue + } + br.bits = (br.bits << 8) | uint(br.data[byteOff]) + br.avail += 8 + br.offset += 8 + } + + // Extract the bits + br.avail -= count + //nolint:gosec // Safe: bits accumulator is bounded by count which is at most 32 + result := uint32((br.bits >> br.avail) & ((1 << count) - 1)) + return result +} + +// huffmanDecoder decodes Huffman-encoded data for CHD V5 maps. +type huffmanDecoder struct { + lookup []uint32 + nodeBits []uint8 + numCodes int + maxBits int +} + +// newHuffmanDecoder creates a Huffman decoder for the given parameters. +func newHuffmanDecoder(numCodes, maxBits int) *huffmanDecoder { + return &huffmanDecoder{ + numCodes: numCodes, + maxBits: maxBits, + nodeBits: make([]uint8, numCodes), + lookup: make([]uint32, 1<= 16: + numBits = 5 + case hd.maxBits >= 8: + numBits = 4 + default: + numBits = 3 + } + + // Read the tree with RLE decoding + for curNode := 0; curNode < hd.numCodes; { + nodeBits := br.read(numBits) + if nodeBits != 1 { + //nolint:gosec // Safe: nodeBits from Huffman tree is bounded to 0-32 + hd.nodeBits[curNode] = uint8(nodeBits) + curNode++ + continue + } + // RLE encoding: read actual value + nodeBits = br.read(numBits) + if nodeBits == 1 { + // Literal 1 + hd.nodeBits[curNode] = 1 + curNode++ + continue + } + // Repeat count follows + repCount := int(br.read(numBits)) + 3 + //nolint:gosec // Safe: nodeBits from Huffman tree is bounded to 0-32 + curNode = hd.fillNodeBits(curNode, uint8(nodeBits), repCount) + } + + // Build lookup table + return hd.buildLookup() +} + +// fillNodeBits fills nodeBits with a repeated value, returning the new curNode. +func (hd *huffmanDecoder) fillNodeBits(curNode int, value uint8, repCount int) int { + for i := 0; i < repCount && curNode < hd.numCodes; i++ { + hd.nodeBits[curNode] = value + curNode++ + } + return curNode +} + +// buildLookup builds the lookup table from node bits. +// This follows MAME's canonical code assignment which processes from highest to lowest bit length. +func (hd *huffmanDecoder) buildLookup() error { + // Build histogram of bit lengths + bithisto := make([]uint32, 33) + for i := range hd.numCodes { + if hd.nodeBits[i] <= 32 { + bithisto[hd.nodeBits[i]]++ + } + } + + // For each code length, determine the starting code number + // Process from highest to lowest bit length (MAME convention) + var curstart uint32 + for codelen := 32; codelen > 0; codelen-- { + nextstart := (curstart + bithisto[codelen]) >> 1 + bithisto[codelen] = curstart + curstart = nextstart + } + + // Now assign canonical codes and build lookup table + // nodeBits stores the assigned code for each symbol + nodeCodes := make([]uint32, hd.numCodes) + for i := range hd.numCodes { + bits := hd.nodeBits[i] + if bits > 0 { + nodeCodes[i] = bithisto[bits] + bithisto[bits]++ + } + } + + // Build lookup table + for i := range hd.numCodes { + bits := int(hd.nodeBits[i]) + if bits > 0 { + // Set up the entry: (symbol << 5) | numbits + //nolint:gosec // Safe: i bounded by numCodes (16), bits bounded by maxBits (8) + value := uint32((i << 5) | bits) + + // Fill all matching entries + shift := hd.maxBits - bits + base := int(nodeCodes[i]) << shift + end := int(nodeCodes[i]+1)<> 5) + bits := int(entry & 0x1f) + + // Put back unused bits by adjusting the bit reader + if bits < hd.maxBits { + br.avail += hd.maxBits - bits + } + + return symbol +} diff --git a/chd/chd.go b/chd/chd.go new file mode 100644 index 0000000..2ed7025 --- /dev/null +++ b/chd/chd.go @@ -0,0 +1,408 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +// Package chd provides parsing for CHD (Compressed Hunks of Data) disc images. +// CHD is MAME's compressed disc image format, widely used by RetroArch and other emulators. +package chd + +import ( + "fmt" + "io" + "os" +) + +// CHD represents a CHD (Compressed Hunks of Data) disc image. +type CHD struct { + file *os.File + header *Header + hunkMap *HunkMap + tracks []Track +} + +// Open opens a CHD file and parses its header and metadata. +func Open(path string) (*CHD, error) { + file, err := os.Open(path) //nolint:gosec // Path from user input is expected + if err != nil { + return nil, fmt.Errorf("open CHD file: %w", err) + } + + chd := &CHD{file: file} + + if err := chd.init(); err != nil { + _ = file.Close() + return nil, err + } + + return chd, nil +} + +// init initializes the CHD by parsing header, hunk map, and metadata. +func (c *CHD) init() error { + // Parse header + header, err := parseHeader(c.file) + if err != nil { + return fmt.Errorf("parse header: %w", err) + } + c.header = header + + // Create hunk map + hunkMap, err := NewHunkMap(c.file, header) + if err != nil { + return fmt.Errorf("create hunk map: %w", err) + } + c.hunkMap = hunkMap + + // Parse metadata for track information + if header.MetaOffset > 0 { + entries, parseErr := parseMetadata(c.file, header.MetaOffset) + if parseErr != nil { + // Metadata parsing failure is not fatal, continue without track info + c.tracks = nil + return nil //nolint:nilerr // Intentional: metadata parsing failure is non-fatal + } + + tracks, trackErr := parseTracks(entries) + if trackErr != nil { + // Track parsing failure is not fatal, continue without track info + c.tracks = nil + return nil //nolint:nilerr // Intentional: track parsing failure is non-fatal + } + c.tracks = tracks + } + + return nil +} + +// Close closes the CHD file. +func (c *CHD) Close() error { + if c.file != nil { + if err := c.file.Close(); err != nil { + return fmt.Errorf("close CHD file: %w", err) + } + } + return nil +} + +// Header returns the parsed CHD header. +func (c *CHD) Header() *Header { + return c.header +} + +// Tracks returns the parsed track information. +func (c *CHD) Tracks() []Track { + return c.tracks +} + +// Size returns the total logical size (uncompressed) of the CHD data. +func (c *CHD) Size() int64 { + return int64(c.header.LogicalBytes) //nolint:gosec // LogicalBytes is bounded by file size +} + +// SectorReader returns an io.ReaderAt that provides access to decompressed +// sector data with 2048-byte logical sectors (Mode1/Mode2 data portion only). +// This is suitable for ISO9660 filesystem parsing. +// Note: For multi-track CDs with audio tracks first, use DataTrackSectorReader() instead. +func (c *CHD) SectorReader() io.ReaderAt { + return §orReader{ + chd: c, + sectorSize: 2048, + rawMode: false, + } +} + +// DataTrackSectorReader returns an io.ReaderAt for the first data track, +// providing 2048-byte logical sectors. This is essential for discs like +// Neo Geo CD that have audio tracks before the data track. +func (c *CHD) DataTrackSectorReader() io.ReaderAt { + return §orReader{ + chd: c, + sectorSize: 2048, + rawMode: false, + dataTrackStart: c.firstDataTrackSector(), + } +} + +// DataTrackSize returns the logical size of the first data track in bytes. +// For ISO9660 parsing, this is the size in 2048-byte sectors. +func (c *CHD) DataTrackSize() int64 { + for _, track := range c.tracks { + if track.IsDataTrack() { + return int64(track.Frames) * 2048 + } + } + // No data track found, return full size + return int64(c.header.LogicalBytes) //nolint:gosec // LogicalBytes is bounded by CHD format +} + +// firstDataTrackSector returns the sector number where the first data track starts. +// If metadata indicates the data starts at frame 0 but the first hunks contain audio +// (zeros from FLAC fallback), we search for the actual ISO9660 PVD location. +func (c *CHD) firstDataTrackSector() int64 { + // First, check track metadata + if start := c.dataTrackStartFromMetadata(); start > 0 { + return start + } + + // Metadata says data starts at frame 0, search for PVD to verify + return c.searchForPVD() +} + +// dataTrackStartFromMetadata returns the data track start from track metadata, or 0 if unknown. +func (c *CHD) dataTrackStartFromMetadata() int64 { + for _, track := range c.tracks { + if track.IsDataTrack() { + metaStart := int64(track.StartFrame + track.Pregap) + if metaStart > 0 { + return metaStart + } + break // Data track found but starts at 0, need to search for PVD + } + } + return 0 +} + +// searchForPVD searches for an ISO9660 Primary Volume Descriptor in the first hunks. +// Returns the calculated data track start sector, or 0 if not found. +func (c *CHD) searchForPVD() int64 { + unitBytes := int64(c.header.UnitBytes) + if unitBytes == 0 { + unitBytes = 2448 + } + sectorsPerHunk := int64(c.header.HunkBytes) / unitBytes + maxHunks := c.calculateMaxHunksToSearch(sectorsPerHunk) + + for hunkIdx := range maxHunks { + hunkData, err := c.hunkMap.ReadHunk(hunkIdx) + if err != nil { + continue + } + if sector := c.findPVDInHunk(hunkData, hunkIdx, sectorsPerHunk, unitBytes); sector >= 0 { + return sector + } + } + return 0 +} + +// calculateMaxHunksToSearch determines how many hunks to search for PVD. +func (c *CHD) calculateMaxHunksToSearch(sectorsPerHunk int64) uint32 { + // Check first few hunks (up to ~100 sectors worth) + maxHunks := uint32(100 / sectorsPerHunk) //nolint:gosec // sectorsPerHunk is small and positive + if maxHunks < 5 { + maxHunks = 5 + } + if maxHunks > c.hunkMap.NumHunks() { + maxHunks = c.hunkMap.NumHunks() + } + return maxHunks +} + +// pvdMagic is the ISO9660 Primary Volume Descriptor signature. +var pvdMagic = []byte{0x01, 'C', 'D', '0', '0', '1'} + +// findPVDInHunk searches for the PVD signature within a single hunk. +// Returns the data track start sector if found, or -1 if not found. +func (*CHD) findPVDInHunk(hunkData []byte, hunkIdx uint32, sectorsPerHunk, unitBytes int64) int64 { + for sectorInHunk := range sectorsPerHunk { + offset := sectorInHunk * unitBytes + if offset+6 > int64(len(hunkData)) { + break + } + if matchesPVD(hunkData, offset) { + // Found PVD - sector 16 of the ISO, so data track starts 16 sectors before + absoluteSector := int64(hunkIdx)*sectorsPerHunk + sectorInHunk + dataTrackStart := absoluteSector - 16 + if dataTrackStart < 0 { + dataTrackStart = 0 + } + return dataTrackStart + } + } + return -1 +} + +// matchesPVD checks if the data at offset matches the PVD magic bytes. +func matchesPVD(data []byte, offset int64) bool { + if len(data) <= int(offset)+len(pvdMagic) { + return false + } + for i, b := range pvdMagic { + if data[offset+int64(i)] != b { + return false + } + } + return true +} + +// RawSectorReader returns an io.ReaderAt that provides access to raw +// 2352-byte sectors. This is useful for reading disc headers that may +// be at the start of raw sector data. +func (c *CHD) RawSectorReader() io.ReaderAt { + return §orReader{ + chd: c, + sectorSize: 2352, + rawMode: true, + } +} + +// sectorReader implements io.ReaderAt for CHD sector data. +type sectorReader struct { + chd *CHD + sectorSize int + rawMode bool // If true, read raw 2352-byte sectors; if false, extract 2048-byte data + dataTrackStart int64 // Sector offset to the first data track (for multi-track CDs) +} + +// sectorLocation holds the computed location of a sector within CHD hunks. +type sectorLocation struct { + hunkIdx uint32 + sectorInHunk int64 + offsetInSector int64 +} + +// rawSectorSize is the size of raw CD sector data (without subchannel). +const rawSectorSize = 2352 + +// computeSectorLocation calculates which hunk and sector contains the given offset. +func (sr *sectorReader) computeSectorLocation(offset, hunkBytes, unitBytes int64) sectorLocation { + sectorsPerHunk := hunkBytes / unitBytes + + if sr.rawMode { + sector := offset / rawSectorSize + return sectorLocation{ + hunkIdx: uint32(sector / sectorsPerHunk), //nolint:gosec // Sector index bounded by file size + sectorInHunk: sector % sectorsPerHunk, + offsetInSector: offset % rawSectorSize, + } + } + + // ISO mode: offset is in terms of 2048-byte logical sectors + // Apply data track offset for multi-track CDs + logicalSector := offset/2048 + sr.dataTrackStart + return sectorLocation{ + hunkIdx: uint32(logicalSector / sectorsPerHunk), //nolint:gosec // Sector index bounded by file size + sectorInHunk: logicalSector % sectorsPerHunk, + offsetInSector: offset % 2048, + } +} + +// extractSectorData extracts data from a hunk at the given sector location. +func (sr *sectorReader) extractSectorData(hunkData []byte, loc sectorLocation, unitBytes int64) (start, length int64) { + sectorOffset := loc.sectorInHunk * unitBytes + + if sr.rawMode { + return sectorOffset + loc.offsetInSector, rawSectorSize - loc.offsetInSector + } + + // For CD CHD files, the codec returns data at a consistent offset within each unit. + // Check if this looks like raw sector data (starts with sync header) or user data. + dataOffset := int64(0) + if sectorOffset+12 <= int64(len(hunkData)) { + // Check for CD sync header pattern: 00 FF FF FF FF FF FF FF FF FF FF 00 + hasSyncHeader := hunkData[sectorOffset] == 0x00 && + hunkData[sectorOffset+1] == 0xFF && + hunkData[sectorOffset+11] == 0x00 + + if hasSyncHeader { + // Raw sector with sync header - user data at offset 16 (Mode1) or 24 (Mode2) + dataOffset = 16 + if sectorOffset+15 < int64(len(hunkData)) && hunkData[sectorOffset+15] == 2 { + dataOffset = 24 + } + } + // Otherwise: CD codec returned pre-extracted user data, no offset needed + } + + return sectorOffset + dataOffset + loc.offsetInSector, 2048 - loc.offsetInSector +} + +// clampDataLength bounds the data length to available data and sector limits. +func (sr *sectorReader) clampDataLength(dataStart, dataLen int64, hunkLen int, loc sectorLocation) int64 { + if dataStart+dataLen > int64(hunkLen) { + dataLen = int64(hunkLen) - dataStart + } + if sr.rawMode && dataLen > rawSectorSize-loc.offsetInSector { + dataLen = rawSectorSize - loc.offsetInSector + } + return dataLen +} + +// ReadAt reads sector data at the given offset. +// For ISO9660, this provides virtual 2048-byte sectors extracted from the +// CHD's raw sector storage. +func (sr *sectorReader) ReadAt(dest []byte, off int64) (int, error) { + if len(dest) == 0 { + return 0, nil + } + + hunkBytes := int64(sr.chd.hunkMap.HunkBytes()) + unitBytes := int64(sr.chd.header.UnitBytes) + if unitBytes == 0 { + unitBytes = 2448 // Default CD sector + subchannel + } + + totalRead := 0 + remaining := len(dest) + currentOff := off + + for remaining > 0 { + loc := sr.computeSectorLocation(currentOff, hunkBytes, unitBytes) + + hunkData, err := sr.chd.hunkMap.ReadHunk(loc.hunkIdx) + if err != nil { + if totalRead > 0 { + return totalRead, nil + } + return 0, fmt.Errorf("read hunk %d: %w", loc.hunkIdx, err) + } + + dataStart, dataLen := sr.extractSectorData(hunkData, loc, unitBytes) + if dataStart >= int64(len(hunkData)) { + break + } + + dataLen = sr.clampDataLength(dataStart, dataLen, len(hunkData), loc) + toCopy := min(int(dataLen), remaining) + + copy(dest[totalRead:], hunkData[dataStart:dataStart+int64(toCopy)]) + totalRead += toCopy + remaining -= toCopy + currentOff += int64(toCopy) + } + + if totalRead == 0 { + return 0, io.EOF + } + + return totalRead, nil +} + +// FirstDataTrackOffset returns the byte offset to the first data track. +// This is useful for reading disc headers for Sega Saturn/CD identification. +func (c *CHD) FirstDataTrackOffset() int64 { + for _, track := range c.tracks { + if track.IsDataTrack() { + // Return offset including pregap + unitBytes := int64(c.header.UnitBytes) + if unitBytes == 0 { + unitBytes = 2448 + } + return int64(track.StartFrame) * unitBytes + } + } + return 0 +} diff --git a/chd/chd_test.go b/chd/chd_test.go new file mode 100644 index 0000000..45b24db --- /dev/null +++ b/chd/chd_test.go @@ -0,0 +1,1175 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "errors" + "os" + "strings" + "testing" +) + +func TestOpenSegaCDCHD(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + t.Logf("Header version: %d", chdFile.Header().Version) + t.Logf("Logical bytes: %d", chdFile.Header().LogicalBytes) + t.Logf("Hunk bytes: %d", chdFile.Header().HunkBytes) + t.Logf("Unit bytes: %d", chdFile.Header().UnitBytes) + t.Logf("Map offset: %d", chdFile.Header().MapOffset) + t.Logf("Compressors: %v", chdFile.Header().Compressors) + + // Try reading some data + reader := chdFile.RawSectorReader() + buf := make([]byte, 256) + bytesRead, err := reader.ReadAt(buf, 0) + if err != nil { + t.Fatalf("ReadAt failed: %v", err) + } + t.Logf("Read %d bytes", bytesRead) + t.Logf("First 32 bytes: %x", buf[:32]) +} + +func TestHunkMapDebug(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + // Print first 10 hunk map entries + t.Logf("Number of hunks: %d", chdFile.hunkMap.NumHunks()) + for idx := uint32(0); idx < 10 && idx < chdFile.hunkMap.NumHunks(); idx++ { + entry := chdFile.hunkMap.entries[idx] + t.Logf("Hunk %d: CompType=%d, CompLength=%d, Offset=%d", + idx, entry.CompType, entry.CompLength, entry.Offset) + } +} + +// TestOpenNonExistent verifies error handling for missing files. +func TestOpenNonExistent(t *testing.T) { + t.Parallel() + + _, err := Open("/nonexistent/path/to/file.chd") + if err == nil { + t.Fatal("expected error for nonexistent file") + } + if !os.IsNotExist(errors.Unwrap(err)) && !strings.Contains(err.Error(), "no such file") { + t.Logf("Got error (acceptable): %v", err) + } +} + +// TestOpenInvalidMagic verifies error handling for non-CHD files. +func TestOpenInvalidMagic(t *testing.T) { + t.Parallel() + + // Try opening a non-CHD file (use the test file itself as it's not a CHD) + _, err := Open("chd_test.go") + if err == nil { + t.Fatal("expected error for invalid magic") + } + if !errors.Is(err, ErrInvalidMagic) && !strings.Contains(err.Error(), "invalid CHD magic") { + t.Errorf("expected ErrInvalidMagic, got: %v", err) + } +} + +// TestCHDSize verifies Size() returns correct logical size. +func TestCHDSize(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + size := chdFile.Size() + if size <= 0 { + t.Errorf("expected positive size, got %d", size) + } + // Size should match LogicalBytes + //nolint:gosec // Test only: LogicalBytes from valid test file + if size != int64(chdFile.Header().LogicalBytes) { + t.Errorf("Size() %d != LogicalBytes %d", size, chdFile.Header().LogicalBytes) + } +} + +// TestSectorReader verifies SectorReader returns 2048-byte sectors. +func TestSectorReader(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + reader := chdFile.SectorReader() + buf := make([]byte, 2048) + n, err := reader.ReadAt(buf, 0) + if err != nil { + t.Fatalf("ReadAt failed: %v", err) + } + if n != 2048 { + t.Errorf("expected 2048 bytes, got %d", n) + } +} + +// TestFirstDataTrackOffset verifies track offset calculation. +func TestFirstDataTrackOffset(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + offset := chdFile.FirstDataTrackOffset() + // For a standard CD with data track first, offset should be 0 or small + t.Logf("FirstDataTrackOffset: %d", offset) + // Just verify it doesn't panic and returns something reasonable + if offset < 0 { + t.Errorf("expected non-negative offset, got %d", offset) + } +} + +// TestHeaderIsCompressed verifies compression detection. +func TestHeaderIsCompressed(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + // Test files should be compressed + if !chdFile.Header().IsCompressed() { + t.Error("expected compressed CHD") + } +} + +// TestTrackIsDataTrack verifies track type detection. +func TestTrackIsDataTrack(t *testing.T) { + t.Parallel() + + tests := []struct { + trackType string + want bool + }{ + {"MODE1", true}, + {"MODE1_RAW", true}, + {"MODE2_RAW", true}, + {"AUDIO", false}, + {"audio", false}, + {"Audio", false}, + } + + for _, tt := range tests { + track := Track{Type: tt.trackType} + if got := track.IsDataTrack(); got != tt.want { + t.Errorf("Track{Type: %q}.IsDataTrack() = %v, want %v", tt.trackType, got, tt.want) + } + } +} + +// TestTrackSectorSize verifies sector size calculation. +func TestTrackSectorSize(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + track Track + wantSize int + }{ + {"default", Track{}, 2352}, + {"mode1_raw", Track{DataSize: 2352}, 2352}, + {"mode1_raw_sub", Track{DataSize: 2352, SubSize: 96}, 2448}, + {"mode1_2048", Track{DataSize: 2048}, 2048}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := tt.track.SectorSize(); got != tt.wantSize { + t.Errorf("SectorSize() = %d, want %d", got, tt.wantSize) + } + }) + } +} + +// TestCodecTagToString verifies codec tag formatting. +func TestCodecTagToString(t *testing.T) { + t.Parallel() + + //nolint:govet // fieldalignment not important in test structs + tests := []struct { + tag uint32 + want string + }{ + {CodecZlib, "zlib"}, + {CodecLZMA, "lzma"}, + {CodecFLAC, "flac"}, + {CodecZstd, "zstd"}, + {CodecCDZlib, "cdzl"}, + {CodecCDLZMA, "cdlz"}, + {CodecCDFLAC, "cdfl"}, + {CodecCDZstd, "cdzs"}, + {0, "none"}, + } + + for _, tt := range tests { + if got := codecTagToString(tt.tag); got != tt.want { + t.Errorf("codecTagToString(0x%x) = %q, want %q", tt.tag, got, tt.want) + } + } +} + +// TestIsCDCodec verifies CD codec detection. +func TestIsCDCodec(t *testing.T) { + t.Parallel() + + tests := []struct { + tag uint32 + want bool + }{ + {CodecCDZlib, true}, + {CodecCDLZMA, true}, + {CodecCDFLAC, true}, + {CodecCDZstd, true}, + {CodecZlib, false}, + {CodecLZMA, false}, + {CodecFLAC, false}, + {CodecZstd, false}, + {0, false}, + } + + for _, tt := range tests { + if got := IsCDCodec(tt.tag); got != tt.want { + t.Errorf("IsCDCodec(0x%x) = %v, want %v", tt.tag, got, tt.want) + } + } +} + +//nolint:gocognit,revive // Table-driven test with multiple assertions +func TestParseCHT2(t *testing.T) { + t.Parallel() + + //nolint:govet // fieldalignment not important in test structs + tests := []struct { + name string + data string + wantErr bool + wantNum int + wantTyp string + wantFrm int + }{ + { + name: "standard", + data: "TRACK:1 TYPE:MODE1_RAW SUBTYPE:RW FRAMES:1000 PREGAP:150 POSTGAP:0", + wantNum: 1, + wantTyp: "MODE1_RAW", + wantFrm: 1000, + }, + { + name: "audio", + data: "TRACK:2 TYPE:AUDIO SUBTYPE:NONE FRAMES:5000", + wantNum: 2, + wantTyp: "AUDIO", + wantFrm: 5000, + }, + { + name: "invalid_track_number", + data: "TRACK:abc TYPE:MODE1", + wantErr: true, + }, + { + name: "invalid_frames", + data: "TRACK:1 FRAMES:notanumber", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, err := parseCHT2([]byte(tt.data)) + if tt.wantErr { + if err == nil { + t.Error("expected error") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Number != tt.wantNum { + t.Errorf("Number = %d, want %d", got.Number, tt.wantNum) + } + if got.Type != tt.wantTyp { + t.Errorf("Type = %q, want %q", got.Type, tt.wantTyp) + } + if got.Frames != tt.wantFrm { + t.Errorf("Frames = %d, want %d", got.Frames, tt.wantFrm) + } + }) + } +} + +// TestTrackTypeToDataSize verifies track type to data size mapping. +func TestTrackTypeToDataSize(t *testing.T) { + t.Parallel() + + tests := []struct { + trackType string + want int + }{ + {"MODE1/2048", 2048}, + {"MODE1/2352", 2352}, + {"MODE1_RAW", 2352}, + {"MODE2/2352", 2352}, + {"MODE2_RAW", 2352}, + {"AUDIO", 2352}, + {"unknown", 2352}, // Default + } + + for _, tt := range tests { + if got := trackTypeToDataSize(tt.trackType); got != tt.want { + t.Errorf("trackTypeToDataSize(%q) = %d, want %d", tt.trackType, got, tt.want) + } + } +} + +// TestSubTypeToSize verifies subtype to size mapping. +func TestSubTypeToSize(t *testing.T) { + t.Parallel() + + tests := []struct { + subType string + want int + }{ + {"NONE", 0}, + {"RW", 96}, + {"RW_RAW", 96}, + {"unknown", 0}, // Default + } + + for _, tt := range tests { + if got := subTypeToSize(tt.subType); got != tt.want { + t.Errorf("subTypeToSize(%q) = %d, want %d", tt.subType, got, tt.want) + } + } +} + +// TestCDTypeToString verifies binary CD type conversion. +func TestCDTypeToString(t *testing.T) { + t.Parallel() + + //nolint:govet // fieldalignment not important in test structs + tests := []struct { + cdType uint32 + want string + }{ + {0, "MODE1/2048"}, + {1, "MODE1/2352"}, + {2, "MODE2/2048"}, + {3, "MODE2/2336"}, + {4, "MODE2/2352"}, + {5, "AUDIO"}, + {99, "UNKNOWN"}, + } + + for _, tt := range tests { + if got := cdTypeToString(tt.cdType); got != tt.want { + t.Errorf("cdTypeToString(%d) = %q, want %q", tt.cdType, got, tt.want) + } + } +} + +// TestCDSubTypeToString verifies binary CD subtype conversion. +func TestCDSubTypeToString(t *testing.T) { + t.Parallel() + + //nolint:govet // fieldalignment not important in test structs + tests := []struct { + subType uint32 + want string + }{ + {0, "RW"}, + {1, "RW_RAW"}, + {2, "NONE"}, + {99, "NONE"}, // Default + } + + for _, tt := range tests { + if got := cdSubTypeToString(tt.subType); got != tt.want { + t.Errorf("cdSubTypeToString(%d) = %q, want %q", tt.subType, got, tt.want) + } + } +} + +// TestGetCodecUnknown verifies error for unknown codec. +func TestGetCodecUnknown(t *testing.T) { + t.Parallel() + + _, err := GetCodec(0x12345678) + if err == nil { + t.Error("expected error for unknown codec") + } + if !errors.Is(err, ErrUnsupportedCodec) { + t.Errorf("expected ErrUnsupportedCodec, got: %v", err) + } +} + +// TestReadAtEmptyBuffer verifies ReadAt with empty buffer. +func TestReadAtEmptyBuffer(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + reader := chdFile.SectorReader() + buf := make([]byte, 0) + n, err := reader.ReadAt(buf, 0) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if n != 0 { + t.Errorf("expected 0 bytes, got %d", n) + } +} + +// TestDataTrackSizeNoTracks verifies DataTrackSize fallback. +func TestDataTrackSizeNoTracks(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + // DataTrackSize should return something reasonable + size := chdFile.DataTrackSize() + if size <= 0 { + t.Errorf("expected positive size, got %d", size) + } +} + +// TestGameCubeCHD verifies GameCube CHD support. +func TestGameCubeCHD(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/GC/GameCube-240pSuite-1.17.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + // Verify header + header := chdFile.Header() + if header.Version != 5 { + t.Errorf("expected version 5, got %d", header.Version) + } + + // GameCube should have significant size + if chdFile.Size() < 1000000 { + t.Errorf("expected larger size for GameCube, got %d", chdFile.Size()) + } + + // Try reading raw sector data + reader := chdFile.RawSectorReader() + buf := make([]byte, 256) + _, err = reader.ReadAt(buf, 0) + if err != nil { + t.Fatalf("ReadAt failed: %v", err) + } + + // GameCube discs don't have standard CD sync headers + t.Logf("First 32 bytes: %x", buf[:32]) +} + +// TestZlibCodecDecompress verifies zlib codec decompression. +func TestZlibCodecDecompress(t *testing.T) { + t.Parallel() + + codec := &zlibCodec{} + + // Create test data: compress "hello world" with deflate + original := []byte("hello world hello world hello world hello world") + var compressed bytes.Buffer + writer, _ := flate.NewWriter(&compressed, flate.DefaultCompression) + _, _ = writer.Write(original) + _ = writer.Close() + + dst := make([]byte, len(original)) + decompLen, err := codec.Decompress(dst, compressed.Bytes()) + if err != nil { + t.Fatalf("Decompress failed: %v", err) + } + if decompLen != len(original) { + t.Errorf("Decompress returned %d bytes, want %d", decompLen, len(original)) + } + if !bytes.Equal(dst[:decompLen], original) { + t.Error("Decompressed data mismatch") + } +} + +// TestZlibCodecDecompressInvalid verifies error handling for invalid data. +func TestZlibCodecDecompressInvalid(t *testing.T) { + t.Parallel() + + codec := &zlibCodec{} + dst := make([]byte, 100) + _, err := codec.Decompress(dst, []byte{0x00, 0x01, 0x02, 0x03}) + // Invalid data should error + if err == nil { + t.Log("Note: deflate accepted invalid data (may have partial decode)") + } +} + +// TestCDZlibCodecSourceTooSmall verifies error for truncated source. +func TestCDZlibCodecSourceTooSmall(t *testing.T) { + t.Parallel() + + codec := &cdZlibCodec{} + dst := make([]byte, 2448) + _, err := codec.DecompressCD(dst, []byte{0x00}, 2448, 1) + if err == nil { + t.Error("expected error for truncated source") + } + if !strings.Contains(err.Error(), "source too small") { + t.Errorf("expected 'source too small' error, got: %v", err) + } +} + +// TestCDZlibCodecInvalidBaseLength verifies error for invalid base length. +func TestCDZlibCodecInvalidBaseLength(t *testing.T) { + t.Parallel() + + codec := &cdZlibCodec{} + dst := make([]byte, 2448) + // Header: 1 byte ECC bitmap + 2 bytes length (0xFFFF = 65535, way too big) + src := []byte{0x00, 0xFF, 0xFF} + _, err := codec.DecompressCD(dst, src, 2448, 1) + if err == nil { + t.Error("expected error for invalid base length") + } + if !strings.Contains(err.Error(), "invalid base length") { + t.Errorf("expected 'invalid base length' error, got: %v", err) + } +} + +// TestLZMADictSizeComputation verifies LZMA dictionary size calculation. +func TestLZMADictSizeComputation(t *testing.T) { + t.Parallel() + + tests := []struct { + hunkBytes uint32 + minDict uint32 + }{ + {4096, 4096}, // Small hunk + {8192, 8192}, // 8KB + {19584, 24576}, // Typical CD hunk (19584 -> next power) + {1 << 20, 1 << 20}, // 1MB + } + + for _, tt := range tests { + got := computeLZMADictSize(tt.hunkBytes) + if got < tt.hunkBytes { + t.Errorf("computeLZMADictSize(%d) = %d, should be >= %d", tt.hunkBytes, got, tt.hunkBytes) + } + } +} + +// TestLZMACodecEmptySource verifies error for empty source. +func TestLZMACodecEmptySource(t *testing.T) { + t.Parallel() + + codec := &lzmaCodec{} + dst := make([]byte, 100) + _, err := codec.Decompress(dst, []byte{}) + if err == nil { + t.Error("expected error for empty source") + } + if !strings.Contains(err.Error(), "empty source") { + t.Errorf("expected 'empty source' error, got: %v", err) + } +} + +// TestCDLZMACodecSourceTooSmall verifies error for truncated source. +func TestCDLZMACodecSourceTooSmall(t *testing.T) { + t.Parallel() + + codec := &cdLZMACodec{} + dst := make([]byte, 2448) + _, err := codec.DecompressCD(dst, []byte{0x00}, 2448, 1) + if err == nil { + t.Error("expected error for truncated source") + } + if !strings.Contains(err.Error(), "source too small") { + t.Errorf("expected 'source too small' error, got: %v", err) + } +} + +// TestHeaderV4Parsing verifies V4 header parsing. +func TestHeaderV4Parsing(t *testing.T) { + t.Parallel() + + // Construct a valid V4 header buffer (after magic+size+version already read) + // V4 header is 108 bytes, we need headerSizeV4-12 = 96 bytes + buf := make([]byte, 96) + + // Flags at offset 4 + binary.BigEndian.PutUint32(buf[4:8], 0x00000001) + // Compression at offset 8 + binary.BigEndian.PutUint32(buf[8:12], 0x00000005) + // Total hunks at offset 12 + binary.BigEndian.PutUint32(buf[12:16], 1000) + // Logical bytes at offset 16 + binary.BigEndian.PutUint64(buf[16:24], 1000000) + // Meta offset at offset 24 + binary.BigEndian.PutUint64(buf[24:32], 500) + // Hunk bytes at offset 32 + binary.BigEndian.PutUint32(buf[32:36], 4096) + + header := &Header{Version: 4} + err := parseHeaderV4(header, buf) + if err != nil { + t.Fatalf("parseHeaderV4 failed: %v", err) + } + + if header.Flags != 1 { + t.Errorf("Flags = %d, want 1", header.Flags) + } + if header.Compression != 5 { + t.Errorf("Compression = %d, want 5", header.Compression) + } + if header.TotalHunks != 1000 { + t.Errorf("TotalHunks = %d, want 1000", header.TotalHunks) + } + if header.LogicalBytes != 1000000 { + t.Errorf("LogicalBytes = %d, want 1000000", header.LogicalBytes) + } + if header.HunkBytes != 4096 { + t.Errorf("HunkBytes = %d, want 4096", header.HunkBytes) + } + // V4 sets default UnitBytes + if header.UnitBytes != 2448 { + t.Errorf("UnitBytes = %d, want 2448", header.UnitBytes) + } +} + +// TestHeaderV4TooSmall verifies error for truncated V4 buffer. +func TestHeaderV4TooSmall(t *testing.T) { + t.Parallel() + + header := &Header{Version: 4} + err := parseHeaderV4(header, make([]byte, 10)) + if err == nil { + t.Error("expected error for truncated buffer") + } + if !errors.Is(err, ErrInvalidHeader) { + t.Errorf("expected ErrInvalidHeader, got: %v", err) + } +} + +// TestHeaderV3Parsing verifies V3 header parsing. +func TestHeaderV3Parsing(t *testing.T) { + t.Parallel() + + // V3 header is 120 bytes, we need headerSizeV3-12 = 108 bytes + buf := make([]byte, 108) + + // Flags at offset 4 + binary.BigEndian.PutUint32(buf[4:8], 0x00000002) + // Compression at offset 8 + binary.BigEndian.PutUint32(buf[8:12], 0x00000003) + // Total hunks at offset 12 + binary.BigEndian.PutUint32(buf[12:16], 500) + // Logical bytes at offset 16 + binary.BigEndian.PutUint64(buf[16:24], 500000) + // Meta offset at offset 24 + binary.BigEndian.PutUint64(buf[24:32], 250) + // MD5 hashes at offset 32-64 (skip) + // Hunk bytes at offset 64 + binary.BigEndian.PutUint32(buf[64:68], 8192) + + header := &Header{Version: 3} + err := parseHeaderV3(header, buf) + if err != nil { + t.Fatalf("parseHeaderV3 failed: %v", err) + } + + if header.Flags != 2 { + t.Errorf("Flags = %d, want 2", header.Flags) + } + if header.Compression != 3 { + t.Errorf("Compression = %d, want 3", header.Compression) + } + if header.TotalHunks != 500 { + t.Errorf("TotalHunks = %d, want 500", header.TotalHunks) + } + if header.HunkBytes != 8192 { + t.Errorf("HunkBytes = %d, want 8192", header.HunkBytes) + } +} + +// TestHeaderV3TooSmall verifies error for truncated V3 buffer. +func TestHeaderV3TooSmall(t *testing.T) { + t.Parallel() + + header := &Header{Version: 3} + err := parseHeaderV3(header, make([]byte, 50)) + if err == nil { + t.Error("expected error for truncated buffer") + } + if !errors.Is(err, ErrInvalidHeader) { + t.Errorf("expected ErrInvalidHeader, got: %v", err) + } +} + +// TestNumHunksCalculation verifies hunk count calculation. +func TestNumHunksCalculation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + header Header + expectedHunk uint32 + }{ + { + name: "from_total_hunks", + header: Header{TotalHunks: 100, HunkBytes: 4096, LogicalBytes: 1000000}, + expectedHunk: 100, // Uses TotalHunks when set + }, + { + name: "calculated", + header: Header{TotalHunks: 0, HunkBytes: 4096, LogicalBytes: 16384}, + expectedHunk: 4, // exact fit: 16384 bytes at 4096 per hunk + }, + { + name: "calculated_with_remainder", + header: Header{TotalHunks: 0, HunkBytes: 4096, LogicalBytes: 17000}, + expectedHunk: 5, // rounds up: 17000 bytes needs 5 hunks at 4096 + }, + { + name: "zero_hunk_bytes", + header: Header{TotalHunks: 0, HunkBytes: 0, LogicalBytes: 16384}, + expectedHunk: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := tt.header.NumHunks() + if got != tt.expectedHunk { + t.Errorf("NumHunks() = %d, want %d", got, tt.expectedHunk) + } + }) + } +} + +// TestParseCHTR verifies CHTR (v1 track) parsing. +func TestParseCHTR(t *testing.T) { + t.Parallel() + + // CHTR uses same format as CHT2 + data := []byte("TRACK:1 TYPE:MODE1_RAW FRAMES:500") + track, err := parseCHTR(data) + if err != nil { + t.Fatalf("parseCHTR failed: %v", err) + } + if track.Number != 1 { + t.Errorf("Number = %d, want 1", track.Number) + } + if track.Type != "MODE1_RAW" { + t.Errorf("Type = %q, want MODE1_RAW", track.Type) + } + if track.Frames != 500 { + t.Errorf("Frames = %d, want 500", track.Frames) + } +} + +// TestParseCHCD verifies CHCD (binary track metadata) parsing. +func TestParseCHCD(t *testing.T) { + t.Parallel() + + // Build a valid CHCD buffer + // Format: numTracks (4 bytes) + track entries (24 bytes each) + buf := make([]byte, 4+24*2) // 2 tracks + + // Number of tracks + binary.BigEndian.PutUint32(buf[0:4], 2) + + // Track 1: MODE1/2048, RW subchannel, 1000 frames + offset := 4 + binary.BigEndian.PutUint32(buf[offset:offset+4], 0) // Type (0 = MODE1/2048) + binary.BigEndian.PutUint32(buf[offset+4:offset+8], 0) // SubType = RW + binary.BigEndian.PutUint32(buf[offset+8:offset+12], 2048) + binary.BigEndian.PutUint32(buf[offset+12:offset+16], 96) + binary.BigEndian.PutUint32(buf[offset+16:offset+20], 1000) + binary.BigEndian.PutUint32(buf[offset+20:offset+24], 0) // Pad frames + + // Track 2: AUDIO + offset = 4 + 24 + binary.BigEndian.PutUint32(buf[offset:offset+4], 5) // Type (5 is AUDIO) + binary.BigEndian.PutUint32(buf[offset+4:offset+8], 2) // SubType (2 is NONE) + binary.BigEndian.PutUint32(buf[offset+8:offset+12], 2352) + binary.BigEndian.PutUint32(buf[offset+12:offset+16], 0) + binary.BigEndian.PutUint32(buf[offset+16:offset+20], 2000) + binary.BigEndian.PutUint32(buf[offset+20:offset+24], 0) + + tracks, err := parseCHCD(buf) + if err != nil { + t.Fatalf("parseCHCD failed: %v", err) + } + if len(tracks) != 2 { + t.Fatalf("expected 2 tracks, got %d", len(tracks)) + } + + // Check track 1 + if tracks[0].Number != 1 { + t.Errorf("Track 1 Number = %d, want 1", tracks[0].Number) + } + if tracks[0].Type != "MODE1/2048" { + t.Errorf("Track 1 Type = %q, want MODE1/2048", tracks[0].Type) + } + if tracks[0].Frames != 1000 { + t.Errorf("Track 1 Frames = %d, want 1000", tracks[0].Frames) + } + + // Check track 2 + if tracks[1].Number != 2 { + t.Errorf("Track 2 Number = %d, want 2", tracks[1].Number) + } + if tracks[1].Type != "AUDIO" { + t.Errorf("Track 2 Type = %q, want AUDIO", tracks[1].Type) + } +} + +// TestParseCHCDTooSmall verifies error for truncated CHCD. +func TestParseCHCDTooSmall(t *testing.T) { + t.Parallel() + + // Buffer too small for header + _, err := parseCHCD([]byte{0x00, 0x00}) + if err == nil { + t.Error("expected error for truncated buffer") + } + if !errors.Is(err, ErrInvalidMetadata) { + t.Errorf("expected ErrInvalidMetadata, got: %v", err) + } +} + +// TestParseCHCDTooManyTracks verifies error for excessive track count. +func TestParseCHCDTooManyTracks(t *testing.T) { + t.Parallel() + + buf := make([]byte, 4) + binary.BigEndian.PutUint32(buf[0:4], 1000) // Way more than MaxNumTracks + _, err := parseCHCD(buf) + if err == nil { + t.Error("expected error for too many tracks") + } + if !strings.Contains(err.Error(), "too many tracks") { + t.Errorf("expected 'too many tracks' error, got: %v", err) + } +} + +// TestParseCHCDInsufficientData verifies error when data too small for tracks. +func TestParseCHCDInsufficientData(t *testing.T) { + t.Parallel() + + buf := make([]byte, 4+10) // Header says 1 track but not enough data + binary.BigEndian.PutUint32(buf[0:4], 1) + _, err := parseCHCD(buf) + if err == nil { + t.Error("expected error for insufficient data") + } + if !errors.Is(err, ErrInvalidMetadata) { + t.Errorf("expected ErrInvalidMetadata, got: %v", err) + } +} + +// TestMetadataCircularChain verifies detection of circular metadata chains. +func TestMetadataCircularChain(t *testing.T) { + t.Parallel() + + // Create a mock reader that returns metadata entries pointing to each other + // Entry at offset 100 points to offset 200, which points back to 100 + data := make([]byte, 300) + + // Entry at offset 100: Tag=CHT2, Next=200 + binary.BigEndian.PutUint32(data[100:104], MetaTagCHT2) + data[104] = 0 // flags + data[105] = 0 + data[106] = 0 + data[107] = 10 // length = 10 + binary.BigEndian.PutUint64(data[108:116], 200) // next = 200 + + // Entry at offset 200: Tag=CHT2, Next=100 (circular!) + binary.BigEndian.PutUint32(data[200:204], MetaTagCHT2) + data[204] = 0 // flags + data[205] = 0 + data[206] = 0 + data[207] = 10 // length = 10 + binary.BigEndian.PutUint64(data[208:216], 100) // next = 100 (circular) + + reader := bytes.NewReader(data) + _, err := parseMetadata(reader, 100) + if err == nil { + t.Error("expected error for circular chain") + } + if !strings.Contains(err.Error(), "circular") { + t.Errorf("expected 'circular' error, got: %v", err) + } +} + +// TestMetadataEntryTooLarge verifies MaxMetadataLen validation. +// Note: The CHD format uses 3 bytes for length (max 0xFFFFFF = 16,777,215) +// and MaxMetadataLen is 16*1024*1024 = 16,777,216. Since the max encodable +// value is less than the limit, this check can never trigger from valid format. +func TestMetadataEntryTooLarge(t *testing.T) { + t.Parallel() + + t.Skip("MaxMetadataLen (16MB) exceeds 24-bit max (16MB-1), so this case cannot be triggered via format") +} + +// TestRegisterAndGetCodec verifies codec registration. +func TestRegisterAndGetCodec(t *testing.T) { + t.Parallel() + + // Test that registered codecs can be retrieved + codecs := []uint32{ + CodecZlib, CodecLZMA, CodecFLAC, CodecZstd, + CodecCDZlib, CodecCDLZMA, CodecCDFLAC, CodecCDZstd, + } + + for _, tag := range codecs { + codec, err := GetCodec(tag) + if err != nil { + t.Errorf("GetCodec(0x%x) failed: %v", tag, err) + continue + } + if codec == nil { + t.Errorf("GetCodec(0x%x) returned nil codec", tag) + } + } +} + +//nolint:gocognit,gocyclo,cyclop,funlen,nestif,revive,govet // Debug test with extensive diagnostic output +func TestNeoGeoCDCHD(t *testing.T) { + t.Parallel() + + chdFile, err := Open("../testdata/NeoGeoCD/240pTestSuite.chd") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = chdFile.Close() }() + + header := chdFile.Header() + t.Logf("Version: %d", header.Version) + t.Logf("HunkBytes: %d", header.HunkBytes) + t.Logf("UnitBytes: %d", header.UnitBytes) + t.Logf("Compressors: %x", header.Compressors) + + frames := int(header.HunkBytes) / int(header.UnitBytes) + t.Logf("Frames per hunk: %d", frames) + t.Logf("Sector bytes per hunk: %d", frames*2352) + + // Print track information + t.Log("\nTrack information:") + t.Logf("MetaOffset: %d", header.MetaOffset) + tracks := chdFile.Tracks() + t.Logf("Number of tracks: %d", len(tracks)) + for i, track := range tracks { + t.Logf("Track %d: Type=%s, Frames=%d, StartFrame=%d, Pregap=%d, IsData=%v", + i+1, track.Type, track.Frames, track.StartFrame, track.Pregap, track.IsDataTrack()) + } + + // Debug: Try parsing metadata directly + if header.MetaOffset > 0 { + metaBuf := make([]byte, 100) + metaBytes, metaErr := chdFile.file.ReadAt(metaBuf, int64(header.MetaOffset)) //nolint:gosec // Test only + if metaErr != nil { + t.Logf("Read metadata raw failed: %v", metaErr) + } else { + t.Logf("Raw metadata (%d bytes): %x", metaBytes, metaBuf[:metaBytes]) + t.Logf("Tag: %s", string(metaBuf[0:4])) + t.Logf("Data: %s", string(metaBuf[16:80])) + } + + // Debug: Try parseMetadata directly + entries, parseErr := parseMetadata(chdFile.file, header.MetaOffset) + if parseErr != nil { + t.Logf("parseMetadata failed: %v", parseErr) + } else { + t.Logf("Parsed %d metadata entries", len(entries)) + for i, entry := range entries { + t.Logf("Entry %d: Tag=%x, Flags=%d, DataLen=%d, Next=%d", + i, entry.Tag, entry.Flags, len(entry.Data), entry.Next) + t.Logf(" Data: %s", string(entry.Data)) + } + + // Try parseTracks + parsedTracks, err := parseTracks(entries) + if err != nil { + t.Logf("parseTracks failed: %v", err) + } else { + t.Logf("Parsed %d tracks", len(parsedTracks)) + for i, track := range parsedTracks { + t.Logf(" Track %d: Type=%s Frames=%d IsData=%v", + i+1, track.Type, track.Frames, track.IsDataTrack()) + } + } + } + } + + // Test firstDataTrackSector + t.Log("\nData track sector offset:", chdFile.firstDataTrackSector()) + t.Log("Data track size:", chdFile.DataTrackSize()) + + // Print first 20 hunk map entries to see the pattern + t.Log("\nHunk map entries:") + t.Logf("Number of hunks: %d", chdFile.hunkMap.NumHunks()) + for idx := uint32(0); idx < 20 && idx < chdFile.hunkMap.NumHunks(); idx++ { + entry := chdFile.hunkMap.entries[idx] + codecName := "?" + if int(entry.CompType) < len(header.Compressors) { + tag := header.Compressors[entry.CompType] + codecName = string([]byte{byte(tag >> 24), byte(tag >> 16), byte(tag >> 8), byte(tag)}) + } + t.Logf("Hunk %d: CompType=%d (%s), CompLength=%d, Offset=%d", + idx, entry.CompType, codecName, entry.CompLength, entry.Offset) + } + + // Try to read data from a hunk that uses LZMA (comptype 0) + // Skip the FLAC hunks and read from hunk 2 which is LZMA + t.Log("\nTrying to read hunk 2 (LZMA)...") + hunkData, err := chdFile.hunkMap.ReadHunk(2) + if err != nil { + t.Logf("Read hunk 2 failed: %v", err) + } else { + t.Logf("Hunk 2 data length: %d", len(hunkData)) + if len(hunkData) > 32 { + t.Logf("First 32 bytes: %x", hunkData[:32]) + } + } + + // Test reading sector 0 (where PVD actually starts for this disc) + t.Log("\nTrying to read sector 0 using DataTrackSectorReader...") + reader := chdFile.DataTrackSectorReader() + sector0Data := make([]byte, 2048) + readBytes, err := reader.ReadAt(sector0Data, 0) // Sector 0 + if err != nil { + t.Logf("Read sector 0 failed: %v", err) + } else { + t.Logf("Read %d bytes", readBytes) + t.Logf("First 32 bytes: %x", sector0Data[:32]) + t.Logf("String view: %s", string(sector0Data[1:6])) + } + + // Test reading sector 16 using DataTrackSectorReader (where PVD should be) + t.Log("\nTrying to read sector 16 (PVD) using DataTrackSectorReader...") + pvdData := make([]byte, 2048) + readBytes, err = reader.ReadAt(pvdData, 16*2048) // Sector 16 + if err != nil { + t.Logf("Read PVD sector failed: %v", err) + } else { + t.Logf("Read %d bytes", readBytes) + t.Logf("First 32 bytes: %x", pvdData[:32]) + t.Logf("String view: %s", string(pvdData[1:6])) + } + + // Check hunk 0 data to understand the disc layout + t.Log("\nReading hunk 0 (audio track) to see what's there...") + hunk0Data, err := chdFile.hunkMap.ReadHunk(0) + if err != nil { + t.Logf("Read hunk 0 failed: %v", err) + } else { + t.Logf("Hunk 0 length: %d", len(hunk0Data)) + t.Logf("Hunk 0 first 32 bytes: %x", hunk0Data[:32]) + } + + // Simulate what iso9660.OpenCHD does - read first ~50KB via DataTrackSectorReader + t.Log("\nSimulating ISO9660 init read (first 50KB)...") + isoReader := chdFile.DataTrackSectorReader() + isoData := make([]byte, 50000) + readBytes, err = isoReader.ReadAt(isoData, 0) + if err != nil && err.Error() != "EOF" { + t.Logf("ISO reader failed: %v", err) + } + t.Logf("Read %d bytes", readBytes) + // Search for CD001 in the data + for i := range len(isoData) - 6 { + if isoData[i] == 0x01 && isoData[i+1] == 'C' && isoData[i+2] == 'D' && + isoData[i+3] == '0' && isoData[i+4] == '0' && isoData[i+5] == '1' { + t.Logf("Found PVD at offset %d (sector %d)", i, i/2048) + t.Logf("Data at PVD: %x", isoData[i:i+32]) + break + } + } + + // The PVD \x01CD001 was seen at the start of hunk 2 + // Hunk 2 contains frames 16-23 + // Sector 0 of the data should contain the system area (16 reserved sectors) + // Then PVD at sector 16 + // So if hunk 2 starts the data track, the PVD should be at the start + // of the sector data in hunk 2 (after extracting from raw sector) + t.Log("\nChecking raw hunk 2 data structure...") + hunk2, err := chdFile.hunkMap.ReadHunk(2) + if err != nil { + t.Logf("Read hunk 2 failed: %v", err) + } else { + // Check the first sector in hunk 2 + // Raw sector = 2352 bytes, subchannel = 96 bytes, unit = 2448 bytes + unitBytes := int(header.UnitBytes) + t.Logf("Unit bytes: %d", unitBytes) + for sectorIdx := range 3 { + sectorStart := sectorIdx * unitBytes + if sectorStart+32 > len(hunk2) { + break + } + // Check sync header (first 12 bytes of raw sector) + t.Logf("Sector %d raw start: %x", sectorIdx, hunk2[sectorStart:sectorStart+32]) + // User data starts at offset 16 (Mode1) + dataStart := sectorStart + 16 + if dataStart+32 <= len(hunk2) { + t.Logf("Sector %d user data (+16): %x", sectorIdx, hunk2[dataStart:dataStart+32]) + } + } + } +} diff --git a/chd/codec.go b/chd/codec.go new file mode 100644 index 0000000..d4f38c9 --- /dev/null +++ b/chd/codec.go @@ -0,0 +1,131 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "fmt" + "sync" +) + +// Codec tag constants (as 4-byte big-endian integers representing ASCII strings). +// CD-ROM specific codecs handle both data and subchannel compression. +const ( + // CodecNone indicates uncompressed data. + CodecNone uint32 = 0x00000000 + + // CodecZlib is the standard zlib codec ("zlib"). + CodecZlib uint32 = 0x7a6c6962 + + // CodecLZMA is the LZMA codec ("lzma"). + CodecLZMA uint32 = 0x6c7a6d61 + + // CodecHuff is the CHD Huffman codec ("huff"). + CodecHuff uint32 = 0x68756666 + + // CodecFLAC is the FLAC audio codec ("flac"). + CodecFLAC uint32 = 0x666c6163 + + // CodecZstd is the Zstandard codec ("zstd"). + CodecZstd uint32 = 0x7a737464 + + // CodecCDZlib is the CD zlib codec ("cdzl"). + // Compresses CD data sectors with zlib, subchannel with zlib. + CodecCDZlib uint32 = 0x63647a6c + + // CodecCDLZMA is the CD LZMA codec ("cdlz"). + // Compresses CD data sectors with LZMA, subchannel with zlib. + CodecCDLZMA uint32 = 0x63646c7a + + // CodecCDFLAC is the CD FLAC codec ("cdfl"). + // Compresses CD audio sectors with FLAC, subchannel with zlib. + CodecCDFLAC uint32 = 0x6364666c + + // CodecCDZstd is the CD Zstandard codec ("cdzs"). + // Compresses CD data sectors with Zstandard, subchannel with zlib. + CodecCDZstd uint32 = 0x63647a73 +) + +// Codec decompresses CHD hunk data. +type Codec interface { + // Decompress decompresses src into dst. + // dst must be pre-allocated to the expected decompressed size. + // Returns the number of bytes written to dst. + Decompress(dst, src []byte) (int, error) +} + +// CDCodec decompresses CD-ROM specific hunk data. +// CD codecs handle the separation of sector data and subchannel data. +type CDCodec interface { + Codec + + // DecompressCD decompresses CD-ROM data with sector/subchannel handling. + // hunkBytes is the total size of a decompressed hunk. + // frames is the number of CD frames (sectors) in the hunk. + DecompressCD(dst, src []byte, hunkBytes, frames int) (int, error) +} + +// codecRegistry holds registered codecs. +var ( + codecRegistry = make(map[uint32]func() Codec) + codecRegistryMu sync.RWMutex +) + +// RegisterCodec registers a codec factory for the given tag. +func RegisterCodec(tag uint32, factory func() Codec) { + codecRegistryMu.Lock() + defer codecRegistryMu.Unlock() + codecRegistry[tag] = factory +} + +// GetCodec returns a codec instance for the given tag. +func GetCodec(tag uint32) (Codec, error) { + codecRegistryMu.RLock() + factory, ok := codecRegistry[tag] + codecRegistryMu.RUnlock() + + if !ok { + return nil, fmt.Errorf("%w: 0x%08x (%s)", ErrUnsupportedCodec, tag, codecTagToString(tag)) + } + + return factory(), nil +} + +// codecTagToString converts a codec tag to its ASCII representation. +func codecTagToString(tag uint32) string { + if tag == 0 { + return "none" + } + tagBytes := []byte{ + byte(tag >> 24), + byte(tag >> 16), + byte(tag >> 8), + byte(tag), + } + return string(tagBytes) +} + +// IsCDCodec returns true if the codec tag is a CD-ROM specific codec. +func IsCDCodec(tag uint32) bool { + switch tag { + case CodecCDZlib, CodecCDLZMA, CodecCDFLAC, CodecCDZstd: + return true + default: + return false + } +} diff --git a/chd/codec_flac.go b/chd/codec_flac.go new file mode 100644 index 0000000..41eb4d9 --- /dev/null +++ b/chd/codec_flac.go @@ -0,0 +1,291 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "bytes" + "compress/flate" + "errors" + "fmt" + "io" + + "github.com/mewkiz/flac" + "github.com/mewkiz/flac/frame" +) + +func init() { + RegisterCodec(CodecFLAC, func() Codec { return &flacCodec{} }) + RegisterCodec(CodecCDFLAC, func() Codec { return &cdFLACCodec{} }) +} + +// flacCodec implements FLAC decompression for CHD hunks. +type flacCodec struct{} + +// Decompress decompresses FLAC compressed data. +func (*flacCodec) Decompress(dst, src []byte) (int, error) { + stream, err := flac.New(bytes.NewReader(src)) + if err != nil { + return 0, fmt.Errorf("%w: flac init: %w", ErrDecompressFailed, err) + } + defer func() { _ = stream.Close() }() + + return decodeFLACFrames(stream, dst) +} + +// decodeFLACFrames decodes all FLAC frames into the destination buffer. +func decodeFLACFrames(stream *flac.Stream, dst []byte) (int, error) { + offset := 0 + for { + audioFrame, err := stream.ParseNext() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return offset, fmt.Errorf("%w: flac frame: %w", ErrDecompressFailed, err) + } + + offset = writeFLACFrameSamples(audioFrame, dst, offset) + } + return offset, nil +} + +// writeFLACFrameSamples writes samples from a FLAC frame to the destination buffer. +func writeFLACFrameSamples(audioFrame *frame.Frame, dst []byte, offset int) int { + if len(audioFrame.Subframes) == 0 { + return offset + } + + numChannels := min(len(audioFrame.Subframes), 2) + for i := range audioFrame.Subframes[0].NSamples { + for ch := range numChannels { + sample := audioFrame.Subframes[ch].Samples[i] + if offset+2 <= len(dst) { + dst[offset] = byte(sample >> 8) + dst[offset+1] = byte(sample) + offset += 2 + } + } + } + return offset +} + +// cdFLACCodec implements CD-ROM FLAC decompression. +// CD FLAC compresses audio sectors with FLAC and subchannel data with zlib. +type cdFLACCodec struct{} + +// Decompress implements basic decompression. +func (c *cdFLACCodec) Decompress(dst, src []byte) (int, error) { + return c.DecompressCD(dst, src, len(dst), len(dst)/2448) +} + +// CD audio constants. +const ( + cdSectorSize = 2352 + cdSubSize = 96 +) + +// DecompressCD decompresses CD audio data with FLAC and subchannel with zlib. +// CD FLAC format (from MAME chdcodec.cpp): +// - FLAC stream starts directly at offset 0 (NO length header) +// - FLAC decoder determines where the stream ends +// - Remaining bytes after FLAC: zlib-compressed subchannel data +// +// Note: FLAC decompression may fail for headerless streams that the Go library +// cannot parse. In that case, we return zeros for the audio data since game +// identification only needs data tracks, not audio tracks. +func (*cdFLACCodec) DecompressCD(dst, src []byte, _, frames int) (int, error) { + if len(src) == 0 { + return 0, fmt.Errorf("%w: cdfl: empty source", ErrDecompressFailed) + } + + totalSectorBytes := frames * cdSectorSize + totalSubBytes := frames * cdSubSize + + // Decompress FLAC audio - returns both data and bytes consumed + sectorDst, flacBytesConsumed, err := decompressCDFLACAudioWithOffset(src, totalSectorBytes) + if err != nil { + // FLAC decompression failed - this is likely an audio track. + // Return zeros for the audio data since we only need data tracks for identification. + sectorDst = make([]byte, totalSectorBytes) + flacBytesConsumed = len(src) // Assume all data is FLAC, no subchannel + } + + // Subchannel data starts after FLAC data + var subDst []byte + if flacBytesConsumed < len(src) { + subData := src[flacBytesConsumed:] + subDst = decompressCDSubchannel(subData, totalSubBytes) + } else { + subDst = make([]byte, totalSubBytes) + } + + return interleaveCDData(dst, sectorDst, subDst, frames), nil +} + +// countingReader wraps a reader and tracks bytes read from the original data. +type countingReader struct { + header []byte + data []byte + headerPos int + dataPos int + bytesFromData int +} + +func (cr *countingReader) Read(buf []byte) (int, error) { + totalRead := 0 + + // First read from synthetic header + if cr.headerPos < len(cr.header) { + n := copy(buf, cr.header[cr.headerPos:]) + cr.headerPos += n + totalRead += n + buf = buf[n:] + } + + // Then read from actual data + if len(buf) > 0 && cr.dataPos < len(cr.data) { + n := copy(buf, cr.data[cr.dataPos:]) + cr.dataPos += n + cr.bytesFromData += n + totalRead += n + } + + if totalRead == 0 { + return 0, io.EOF + } + return totalRead, nil +} + +// flacHeaderTemplate is the synthetic FLAC header used by MAME for CHD. +// This is a minimal valid FLAC stream header with STREAMINFO metadata. +// From MAME's src/lib/util/flac.cpp s_header_template. +// +//nolint:gochecknoglobals // Template constant for FLAC header generation +var flacHeaderTemplate = []byte{ + 0x66, 0x4C, 0x61, 0x43, // "fLaC" magic + 0x80, 0x00, 0x00, 0x22, // STREAMINFO block header (last=1, type=0, length=34) + 0x00, 0x00, // min block size (will be patched) + 0x00, 0x00, // max block size (will be patched) + 0x00, 0x00, 0x00, // min frame size + 0x00, 0x00, 0x00, // max frame size + 0x00, 0x00, 0x0A, 0xC4, 0x42, 0xF0, // sample rate, channels, bits (will be patched) + 0x00, 0x00, 0x00, 0x00, // total samples (upper) + 0x00, 0x00, 0x00, 0x00, // total samples (lower) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // MD5 signature + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // MD5 signature continued +} + +// buildFLACHeader creates a synthetic FLAC header for CD audio. +// Parameters match MAME's flac_decoder::reset(sample_rate, num_channels, block_size, ...). +func buildFLACHeader(sampleRate uint32, numChannels uint8, blockSize uint16) []byte { + header := make([]byte, len(flacHeaderTemplate)) + copy(header, flacHeaderTemplate) + + // Patch block sizes at offsets 0x08 and 0x0A (big-endian 16-bit) + header[0x08] = byte(blockSize >> 8) + header[0x09] = byte(blockSize) + header[0x0A] = byte(blockSize >> 8) + header[0x0B] = byte(blockSize) + + // Patch sample rate, channels, bits at offset 0x12 (big-endian 24-bit) + // Format: (sample_rate << 4) | ((num_channels - 1) << 1) | (bits_per_sample - 1 >> 4) + // For 16-bit audio: bits_per_sample = 16, so (16-1) >> 4 = 0 + val := (sampleRate << 4) | (uint32(numChannels-1) << 1) + header[0x12] = byte(val >> 16) + header[0x13] = byte(val >> 8) + header[0x14] = byte(val) + + return header +} + +// cdFLACBlockSize calculates the FLAC block size for CD audio. +// From MAME's chd_cd_flac_compressor::blocksize(). +func cdFLACBlockSize(totalBytes int) uint16 { + // MAME: blocksize = bytes / 4; while (blocksize > MAX_SECTOR_DATA) blocksize /= 2; + // MAX_SECTOR_DATA = 2352 + blocksize := totalBytes / 4 + for blocksize > 2352 { + blocksize /= 2 + } + //nolint:gosec // Safe: blocksize bounded to <= 2352 + return uint16(blocksize) +} + +// decompressCDFLACAudioWithOffset decompresses FLAC audio and returns bytes consumed. +func decompressCDFLACAudioWithOffset(audioData []byte, totalBytes int) (decoded []byte, bytesConsumed int, err error) { + sectorDst := make([]byte, totalBytes) + + // Build synthetic FLAC header (CD audio: 44100 Hz, stereo, 16-bit) + blockSize := cdFLACBlockSize(totalBytes) + header := buildFLACHeader(44100, 2, blockSize) + + cr := &countingReader{ + header: header, + data: audioData, + } + + stream, err := flac.New(cr) + if err != nil { + return nil, 0, fmt.Errorf("%w: cdfl flac init: %w", ErrDecompressFailed, err) + } + defer func() { _ = stream.Close() }() + + _, err = decodeFLACFrames(stream, sectorDst) + if err != nil { + return nil, 0, err + } + + return sectorDst, cr.bytesFromData, nil +} + +// decompressCDSubchannel decompresses zlib-compressed subchannel data. +func decompressCDSubchannel(subData []byte, totalBytes int) []byte { + if len(subData) == 0 || totalBytes == 0 { + return make([]byte, totalBytes) + } + + subDst := make([]byte, totalBytes) + reader := flate.NewReader(bytes.NewReader(subData)) + _, err := io.ReadFull(reader, subDst) + _ = reader.Close() + + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + return make([]byte, totalBytes) + } + return subDst +} + +// interleaveCDData interleaves sector and subchannel data into the destination. +func interleaveCDData(dst, sectorDst, subDst []byte, frames int) int { + dstOffset := 0 + for i := range frames { + srcSectorOffset := i * cdSectorSize + if srcSectorOffset+cdSectorSize <= len(sectorDst) { + copy(dst[dstOffset:], sectorDst[srcSectorOffset:srcSectorOffset+cdSectorSize]) + } + dstOffset += cdSectorSize + + srcSubOffset := i * cdSubSize + if srcSubOffset+cdSubSize <= len(subDst) { + copy(dst[dstOffset:], subDst[srcSubOffset:srcSubOffset+cdSubSize]) + } + dstOffset += cdSubSize + } + return dstOffset +} diff --git a/chd/codec_lzma.go b/chd/codec_lzma.go new file mode 100644 index 0000000..0107010 --- /dev/null +++ b/chd/codec_lzma.go @@ -0,0 +1,216 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/ulikunitz/xz/lzma" +) + +func init() { + RegisterCodec(CodecLZMA, func() Codec { return &lzmaCodec{} }) + RegisterCodec(CodecCDLZMA, func() Codec { return &cdLZMACodec{} }) +} + +// lzmaCodec implements LZMA decompression for CHD hunks. +// CHD LZMA uses raw LZMA data with NO header - properties are computed from hunkbytes. +type lzmaCodec struct { + hunkBytes uint32 // Set by the hunk map when initializing +} + +// computeLZMAProps computes LZMA properties matching MAME's configure_properties. +// MAME uses level=8 and reduceSize=hunkbytes, then normalizes to get dictSize. +// Default properties: lc=3, lp=0, pb=2 (encoded as 0x5D). +func computeLZMADictSize(hunkBytes uint32) uint32 { + // For level 8, initial dictSize would be 1<<26, but reduced based on hunkbytes. + // From LzmaEncProps_Normalize: find smallest 2<= hunkBytes + reduceSize := hunkBytes + for i := uint32(11); i <= 30; i++ { + if reduceSize <= (2 << i) { + return 2 << i + } + if reduceSize <= (3 << i) { + return 3 << i + } + } + return 1 << 26 // fallback to level 8 default +} + +// Decompress decompresses LZMA compressed data. +// CHD LZMA format: raw LZMA stream with NO header. +// Properties are computed from the decompressed size (dst length). +func (c *lzmaCodec) Decompress(dst, src []byte) (int, error) { + if len(src) == 0 { + return 0, fmt.Errorf("%w: lzma: empty source", ErrDecompressFailed) + } + + // Compute properties like MAME does + // MAME's configure_properties uses level=8, reduceSize=hunkbytes + // After normalization: lc=3, lp=0, pb=2, dictSize computed from reduceSize + hunkBytes := c.hunkBytes + if hunkBytes == 0 { + //nolint:gosec // Safe: len(dst) is hunk size, bounded by uint32 + hunkBytes = uint32(len(dst)) + } + dictSize := computeLZMADictSize(hunkBytes) + + // Properties byte: lc + lp*9 + pb*45 = 3 + 0 + 90 = 93 = 0x5D + const propsLcLpPb = 0x5D + + // Construct a full 13-byte LZMA header for the library: + // Byte 0: Properties (lc=3, lp=0, pb=2 encoded as 0x5D) + // Bytes 1-4: Dictionary size (little-endian) + // Bytes 5-12: Uncompressed size (little-endian) + header := make([]byte, 13) + header[0] = propsLcLpPb + binary.LittleEndian.PutUint32(header[1:5], dictSize) + binary.LittleEndian.PutUint64(header[5:13], uint64(len(dst))) + + // Combine header with compressed data + fullStream := make([]byte, 13+len(src)) + copy(fullStream[0:13], header) + copy(fullStream[13:], src) + + reader, err := lzma.NewReader(bytes.NewReader(fullStream)) + if err != nil { + return 0, fmt.Errorf("%w: lzma init: %w", ErrDecompressFailed, err) + } + + n, err := io.ReadFull(reader, dst) + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + return n, fmt.Errorf("%w: lzma read: %w", ErrDecompressFailed, err) + } + + return n, nil +} + +// cdLZMACodec implements CD-ROM LZMA decompression. +// CD LZMA compresses sector data with LZMA and subchannel data with zlib. +type cdLZMACodec struct{} + +// Decompress implements basic decompression. +func (c *cdLZMACodec) Decompress(dst, src []byte) (int, error) { + return c.DecompressCD(dst, src, len(dst), len(dst)/2448) +} + +// DecompressCD decompresses CD-ROM data with LZMA for sectors and zlib for subchannel. +// CD codec format (from MAME chdcodec.cpp): +// - ECC bitmap: (frames + 7) / 8 bytes - indicates which frames have ECC data cleared +// - Compressed length: 2 bytes (if destlen < 65536) or 3 bytes +// - Base compressed data (LZMA) +// - Subcode compressed data (zlib) +// +//nolint:gocognit,gocyclo,cyclop,revive // CD LZMA decompression requires complex sector/subchannel interleaving +func (*cdLZMACodec) DecompressCD(dst, src []byte, destLen, frames int) (int, error) { + // Calculate header sizes (matching MAME's chd_cd_decompressor) + compLenBytes := 2 + if destLen >= 65536 { + compLenBytes = 3 + } + eccBytes := (frames + 7) / 8 + headerBytes := eccBytes + compLenBytes + + if len(src) < headerBytes { + return 0, fmt.Errorf("%w: cdlz: source too small for header", ErrDecompressFailed) + } + + // Extract ECC bitmap (for later reconstruction) + eccBitmap := src[:eccBytes] + + // Extract compressed base length + var compLenBase int + if compLenBytes > 2 { + //nolint:gosec // G602: bounds checked via headerBytes = eccBytes + compLenBytes check above + compLenBase = int(src[eccBytes])<<16 | int(src[eccBytes+1])<<8 | int(src[eccBytes+2]) + } else { + compLenBase = int(binary.BigEndian.Uint16(src[eccBytes : eccBytes+2])) + } + + if headerBytes+compLenBase > len(src) { + return 0, fmt.Errorf("%w: cdlz: invalid base length %d", ErrDecompressFailed, compLenBase) + } + + baseData := src[headerBytes : headerBytes+compLenBase] + subData := src[headerBytes+compLenBase:] + + // Calculate expected sizes + sectorSize := 2352 + subSize := 96 + totalSectorBytes := frames * sectorSize + totalSubBytes := frames * subSize + + // Decompress sector data with LZMA + // Note: For CD codecs, the LZMA properties are computed from totalSectorBytes + sectorDst := make([]byte, totalSectorBytes) + //nolint:gosec // Safe: totalSectorBytes = frames * 2352, bounded by hunk size + lzmaCodec := &lzmaCodec{hunkBytes: uint32(totalSectorBytes)} + sectorN, err := lzmaCodec.Decompress(sectorDst, baseData) + if err != nil { + return 0, fmt.Errorf("%w: cdlz sector: %w", ErrDecompressFailed, err) + } + + // Decompress subchannel data with zlib if present + var subDst []byte + if len(subData) > 0 && totalSubBytes > 0 { + subDst = make([]byte, totalSubBytes) + reader := flate.NewReader(bytes.NewReader(subData)) + _, err = io.ReadFull(reader, subDst) + _ = reader.Close() + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + // Subchannel decompression failure is not fatal + subDst = make([]byte, totalSubBytes) + } + } else { + subDst = make([]byte, totalSubBytes) + } + + // Reassemble the data with ECC reconstruction + dstOffset := 0 + for i := range frames { + srcSectorOffset := i * sectorSize + if srcSectorOffset+sectorSize <= sectorN { + copy(dst[dstOffset:], sectorDst[srcSectorOffset:srcSectorOffset+sectorSize]) + } + + // Reconstitute ECC data and sync header if bit is set + if (eccBitmap[i/8] & (1 << (i % 8))) != 0 { + // Copy sync header + copy(dst[dstOffset:], cdSyncHeader[:]) + // ECC generation would go here but we skip it for identification purposes + } + + dstOffset += sectorSize + + if subSize > 0 { + srcSubOffset := i * subSize + if srcSubOffset+subSize <= len(subDst) { + copy(dst[dstOffset:], subDst[srcSubOffset:srcSubOffset+subSize]) + } + dstOffset += subSize + } + } + + return dstOffset, nil +} diff --git a/chd/codec_zlib.go b/chd/codec_zlib.go new file mode 100644 index 0000000..50e922f --- /dev/null +++ b/chd/codec_zlib.go @@ -0,0 +1,165 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "errors" + "fmt" + "io" +) + +func init() { + RegisterCodec(CodecZlib, func() Codec { return &zlibCodec{} }) + RegisterCodec(CodecCDZlib, func() Codec { return &cdZlibCodec{} }) +} + +// zlibCodec implements zlib decompression for CHD hunks. +// Note: CHD uses raw deflate (RFC 1951), not zlib wrapper. +type zlibCodec struct{} + +// Decompress decompresses zlib/deflate compressed data. +func (*zlibCodec) Decompress(dst, src []byte) (int, error) { + reader := flate.NewReader(bytes.NewReader(src)) + defer func() { _ = reader.Close() }() + + n, err := io.ReadFull(reader, dst) + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + return n, fmt.Errorf("%w: zlib: %w", ErrDecompressFailed, err) + } + + return n, nil +} + +// cdZlibCodec implements CD-ROM zlib decompression. +// CD zlib compresses sector data with deflate and subchannel data separately. +type cdZlibCodec struct{} + +// Decompress implements basic decompression (delegates to DecompressCD with defaults). +func (c *cdZlibCodec) Decompress(dst, src []byte) (int, error) { + // For generic decompression, assume standard CD sector size + // This is a fallback; normally DecompressCD should be called + return c.DecompressCD(dst, src, len(dst), len(dst)/2448) +} + +// DecompressCD decompresses CD-ROM data with sector/subchannel handling. +// CD codec format (from MAME chdcodec.cpp): +// - ECC bitmap: (frames + 7) / 8 bytes - indicates which frames have ECC data cleared +// - Compressed length: 2 bytes (if destlen < 65536) or 3 bytes +// - Base compressed data (deflate) +// - Subcode compressed data (deflate) +// +//nolint:gocognit,gocyclo,cyclop,revive // CD zlib decompression requires complex sector/subchannel interleaving +func (*cdZlibCodec) DecompressCD(dst, src []byte, destLen, frames int) (int, error) { + // Calculate header sizes (matching MAME's chd_cd_decompressor) + compLenBytes := 2 + if destLen >= 65536 { + compLenBytes = 3 + } + eccBytes := (frames + 7) / 8 + headerBytes := eccBytes + compLenBytes + + if len(src) < headerBytes { + return 0, fmt.Errorf("%w: cdzl: source too small for header", ErrDecompressFailed) + } + + // Extract ECC bitmap (for later reconstruction) + eccBitmap := src[:eccBytes] + + // Extract compressed base length + var compLenBase int + if compLenBytes > 2 { + //nolint:gosec // G602: bounds checked via headerBytes = eccBytes + compLenBytes check above + compLenBase = int(src[eccBytes])<<16 | int(src[eccBytes+1])<<8 | int(src[eccBytes+2]) + } else { + compLenBase = int(binary.BigEndian.Uint16(src[eccBytes : eccBytes+2])) + } + + if headerBytes+compLenBase > len(src) { + return 0, fmt.Errorf("%w: cdzl: invalid base length %d", ErrDecompressFailed, compLenBase) + } + + baseData := src[headerBytes : headerBytes+compLenBase] + subData := src[headerBytes+compLenBase:] + + // Calculate expected sizes + sectorSize := 2352 + subSize := 96 + totalSectorBytes := frames * sectorSize + totalSubBytes := frames * subSize + + // Decompress sector data + sectorDst := make([]byte, totalSectorBytes) + reader := flate.NewReader(bytes.NewReader(baseData)) + sectorN, err := io.ReadFull(reader, sectorDst) + _ = reader.Close() + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + return 0, fmt.Errorf("%w: cdzl sector: %w", ErrDecompressFailed, err) + } + + // Decompress subchannel data if present + var subDst []byte + if len(subData) > 0 && totalSubBytes > 0 { + subDst = make([]byte, totalSubBytes) + reader = flate.NewReader(bytes.NewReader(subData)) + _, err = io.ReadFull(reader, subDst) + _ = reader.Close() + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + // Subchannel decompression failure is not fatal - may be zero-filled + subDst = make([]byte, totalSubBytes) + } + } else { + subDst = make([]byte, totalSubBytes) + } + + // Reassemble the data with ECC reconstruction + dstOffset := 0 + for i := range frames { + // Copy sector data + srcSectorOffset := i * sectorSize + if srcSectorOffset+sectorSize <= sectorN { + copy(dst[dstOffset:], sectorDst[srcSectorOffset:srcSectorOffset+sectorSize]) + } + + // Reconstitute ECC data and sync header if bit is set + if (eccBitmap[i/8] & (1 << (i % 8))) != 0 { + // Copy sync header + copy(dst[dstOffset:], cdSyncHeader[:]) + // ECC generation would go here but we skip it for identification purposes + } + + dstOffset += sectorSize + + // Copy subchannel data + if subSize > 0 { + srcSubOffset := i * subSize + if srcSubOffset+subSize <= len(subDst) { + copy(dst[dstOffset:], subDst[srcSubOffset:srcSubOffset+subSize]) + } + dstOffset += subSize + } + } + + return dstOffset, nil +} + +// cdSyncHeader is the standard CD-ROM sync header. +var cdSyncHeader = [12]byte{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00} diff --git a/chd/codec_zstd.go b/chd/codec_zstd.go new file mode 100644 index 0000000..5a4eb59 --- /dev/null +++ b/chd/codec_zstd.go @@ -0,0 +1,155 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/klauspost/compress/zstd" +) + +func init() { + RegisterCodec(CodecZstd, func() Codec { return &zstdCodec{} }) + RegisterCodec(CodecCDZstd, func() Codec { return &cdZstdCodec{} }) +} + +// zstdCodec implements Zstandard decompression for CHD hunks. +type zstdCodec struct { + decoder *zstd.Decoder +} + +// Decompress decompresses Zstandard compressed data. +func (z *zstdCodec) Decompress(dst, src []byte) (int, error) { + if z.decoder == nil { + decoder, err := zstd.NewReader(nil) + if err != nil { + return 0, fmt.Errorf("%w: zstd init: %w", ErrDecompressFailed, err) + } + z.decoder = decoder + } + + result, err := z.decoder.DecodeAll(src, dst[:0]) + if err != nil { + return 0, fmt.Errorf("%w: zstd: %w", ErrDecompressFailed, err) + } + + // Copy result to dst if needed + if len(result) > len(dst) { + return 0, fmt.Errorf("%w: zstd: output too large", ErrDecompressFailed) + } + if &result[0] != &dst[0] { + copy(dst, result) + } + + return len(result), nil +} + +// cdZstdCodec implements CD-ROM Zstandard decompression. +// CD Zstd compresses sector data with Zstandard and subchannel data with zlib. +type cdZstdCodec struct { + decoder *zstd.Decoder +} + +// Decompress implements basic decompression. +func (c *cdZstdCodec) Decompress(dst, src []byte) (int, error) { + return c.DecompressCD(dst, src, len(dst), len(dst)/2448) +} + +// DecompressCD decompresses CD-ROM data with Zstandard for sectors and zlib for subchannel. +// CD Zstd format: +// - First 4 bytes: compressed sector data length (big-endian) +// - Next N bytes: Zstd-compressed sector data +// - Remaining bytes: zlib-compressed subchannel data +// +//nolint:gocognit,revive // CD Zstd decompression requires complex sector/subchannel interleaving +func (c *cdZstdCodec) DecompressCD(dst, src []byte, _, frames int) (int, error) { + if len(src) < 4 { + return 0, fmt.Errorf("%w: cdzs: source too small", ErrDecompressFailed) + } + + // Read compressed sector data length + sectorCompLen := binary.BigEndian.Uint32(src[0:4]) + if int(sectorCompLen) > len(src)-4 { + return 0, fmt.Errorf("%w: cdzs: invalid sector length %d", ErrDecompressFailed, sectorCompLen) + } + + sectorData := src[4 : 4+sectorCompLen] + subData := src[4+sectorCompLen:] + + // Calculate expected sizes + sectorSize := 2352 + subSize := 96 + totalSectorBytes := frames * sectorSize + totalSubBytes := frames * subSize + + // Initialize decoder if needed + if c.decoder == nil { + decoder, err := zstd.NewReader(nil) + if err != nil { + return 0, fmt.Errorf("%w: cdzs init: %w", ErrDecompressFailed, err) + } + c.decoder = decoder + } + + // Decompress sector data with Zstandard + sectorDst, err := c.decoder.DecodeAll(sectorData, make([]byte, 0, totalSectorBytes)) + if err != nil { + return 0, fmt.Errorf("%w: cdzs sector: %w", ErrDecompressFailed, err) + } + + // Decompress subchannel data with zlib if present + var subDst []byte + if len(subData) > 0 && totalSubBytes > 0 { + subDst = make([]byte, totalSubBytes) + reader := flate.NewReader(bytes.NewReader(subData)) + _, err = io.ReadFull(reader, subDst) + _ = reader.Close() + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + // Subchannel decompression failure is not fatal + subDst = make([]byte, totalSubBytes) + } + } else { + subDst = make([]byte, totalSubBytes) + } + + // Interleave sector and subchannel data + dstOffset := 0 + for i := range frames { + srcSectorOffset := i * sectorSize + if srcSectorOffset+sectorSize <= len(sectorDst) { + copy(dst[dstOffset:], sectorDst[srcSectorOffset:srcSectorOffset+sectorSize]) + } + dstOffset += sectorSize + + if subSize > 0 { + srcSubOffset := i * subSize + if srcSubOffset+subSize <= len(subDst) { + copy(dst[dstOffset:], subDst[srcSubOffset:srcSubOffset+subSize]) + } + dstOffset += subSize + } + } + + return dstOffset, nil +} diff --git a/chd/errors.go b/chd/errors.go new file mode 100644 index 0000000..eeef7cf --- /dev/null +++ b/chd/errors.go @@ -0,0 +1,69 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import "errors" + +// Allocation limits to prevent DoS from malicious CHD files. +const ( + // MaxCompMapLen is the maximum compressed map size (100MB). + MaxCompMapLen = 100 * 1024 * 1024 + + // MaxNumHunks is the maximum number of hunks (10M = ~200GB uncompressed). + MaxNumHunks = 10_000_000 + + // MaxMetadataLen is the maximum metadata entry size (16MB, matches 24-bit limit). + MaxMetadataLen = 16 * 1024 * 1024 + + // MaxNumTracks is the maximum number of tracks (200, generous for any disc). + MaxNumTracks = 200 + + // MaxMetadataEntries is the maximum metadata chain entries (prevents loops). + MaxMetadataEntries = 1000 +) + +// Common errors for CHD parsing. +var ( + // ErrInvalidMagic indicates the file does not have a valid CHD magic word. + ErrInvalidMagic = errors.New("invalid CHD magic: expected MComprHD") + + // ErrInvalidHeader indicates the header structure is invalid. + ErrInvalidHeader = errors.New("invalid CHD header") + + // ErrUnsupportedVersion indicates an unsupported CHD version. + ErrUnsupportedVersion = errors.New("unsupported CHD version") + + // ErrUnsupportedCodec indicates an unsupported compression codec. + ErrUnsupportedCodec = errors.New("unsupported compression codec") + + // ErrInvalidHunk indicates an invalid hunk index. + ErrInvalidHunk = errors.New("invalid hunk index") + + // ErrDecompressFailed indicates decompression failed. + ErrDecompressFailed = errors.New("decompression failed") + + // ErrCorruptData indicates data corruption was detected. + ErrCorruptData = errors.New("data corruption detected") + + // ErrNoTracks indicates no track metadata was found. + ErrNoTracks = errors.New("no track metadata found") + + // ErrInvalidMetadata indicates invalid metadata format. + ErrInvalidMetadata = errors.New("invalid metadata format") +) diff --git a/chd/header.go b/chd/header.go new file mode 100644 index 0000000..f84dc81 --- /dev/null +++ b/chd/header.go @@ -0,0 +1,298 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +// Package chd provides parsing for CHD (Compressed Hunks of Data) disc images. +package chd + +import ( + "encoding/binary" + "fmt" + "io" +) + +// CHD format magic word +var chdMagic = [8]byte{'M', 'C', 'o', 'm', 'p', 'r', 'H', 'D'} + +// Header sizes for different CHD versions +const ( + headerSizeV3 = 120 + headerSizeV4 = 108 + headerSizeV5 = 124 +) + +// Header represents a CHD file header. +// This struct supports V5 format (current standard) with fields for earlier versions. +type Header struct { + Magic [8]byte // "MComprHD" + HeaderSize uint32 // Header length in bytes + Version uint32 // CHD version (3, 4, or 5) + Compressors [4]uint32 // Compression codec tags (V5) + LogicalBytes uint64 // Total uncompressed size + MapOffset uint64 // Offset to hunk map + MetaOffset uint64 // Offset to metadata + HunkBytes uint32 // Bytes per hunk + UnitBytes uint32 // Bytes per unit (sector size) + RawSHA1 [20]byte // SHA1 of raw data + SHA1 [20]byte // SHA1 of raw + metadata + ParentSHA1 [20]byte // Parent SHA1 (for delta CHDs) + + // V3/V4 specific fields + Flags uint32 // V3/V4 flags + Compression uint32 // V3/V4 compression type + TotalHunks uint32 // V3/V4 total number of hunks +} + +// parseHeader reads and parses a CHD header from the given reader. +func parseHeader(reader io.Reader) (*Header, error) { + // Read magic and header size first + magicBuf := make([]byte, 12) + if _, err := io.ReadFull(reader, magicBuf); err != nil { + return nil, fmt.Errorf("read magic: %w", err) + } + + var header Header + copy(header.Magic[:], magicBuf[:8]) + + // Verify magic + if header.Magic != chdMagic { + return nil, ErrInvalidMagic + } + + header.HeaderSize = binary.BigEndian.Uint32(magicBuf[8:12]) + + // Read rest of header based on size + remaining := int(header.HeaderSize) - 12 + if remaining <= 0 { + return nil, fmt.Errorf("%w: header size %d", ErrInvalidHeader, header.HeaderSize) + } + + headerBuf := make([]byte, remaining) + if _, err := io.ReadFull(reader, headerBuf); err != nil { + return nil, fmt.Errorf("read header: %w", err) + } + + // Parse version + header.Version = binary.BigEndian.Uint32(headerBuf[0:4]) + + switch header.Version { + case 5: + if err := parseHeaderV5(&header, headerBuf); err != nil { + return nil, err + } + case 4: + if err := parseHeaderV4(&header, headerBuf); err != nil { + return nil, err + } + case 3: + if err := parseHeaderV3(&header, headerBuf); err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("%w: version %d", ErrUnsupportedVersion, header.Version) + } + + return &header, nil +} + +// parseHeaderV5 parses a V5 CHD header. +// V5 header layout (after magic + size + version, total 124 bytes): +// +// Offset 0x00: Magic (8 bytes) +// Offset 0x08: Header size (4 bytes) +// Offset 0x0C: Version (4 bytes) +// Offset 0x10: Compressor 0 (4 bytes) +// Offset 0x14: Compressor 1 (4 bytes) +// Offset 0x18: Compressor 2 (4 bytes) +// Offset 0x1C: Compressor 3 (4 bytes) +// Offset 0x20: Logical bytes (8 bytes) +// Offset 0x28: Map offset (8 bytes) +// Offset 0x30: Meta offset (8 bytes) +// Offset 0x38: Hunk bytes (4 bytes) +// Offset 0x3C: Unit bytes (4 bytes) +// Offset 0x40: Raw SHA1 (20 bytes) +// Offset 0x54: SHA1 (20 bytes) +// Offset 0x68: Parent SHA1 (20 bytes) +func parseHeaderV5(header *Header, buf []byte) error { + if len(buf) < headerSizeV5-12 { + return fmt.Errorf("%w: buffer too small for V5", ErrInvalidHeader) + } + + // Compressors (4 x 4 bytes starting at offset 4 in buf, which is offset 0x10 in file) + header.Compressors[0] = binary.BigEndian.Uint32(buf[4:8]) + header.Compressors[1] = binary.BigEndian.Uint32(buf[8:12]) + header.Compressors[2] = binary.BigEndian.Uint32(buf[12:16]) + header.Compressors[3] = binary.BigEndian.Uint32(buf[16:20]) + + // Logical bytes (8 bytes at offset 20 in buf) + header.LogicalBytes = binary.BigEndian.Uint64(buf[20:28]) + + // Map offset (8 bytes at offset 28 in buf) + header.MapOffset = binary.BigEndian.Uint64(buf[28:36]) + + // Meta offset (8 bytes at offset 36 in buf) + header.MetaOffset = binary.BigEndian.Uint64(buf[36:44]) + + // Hunk bytes (4 bytes at offset 44 in buf) + header.HunkBytes = binary.BigEndian.Uint32(buf[44:48]) + + // Unit bytes (4 bytes at offset 48 in buf) + header.UnitBytes = binary.BigEndian.Uint32(buf[48:52]) + + // Raw SHA1 (20 bytes at offset 52 in buf) + copy(header.RawSHA1[:], buf[52:72]) + + // SHA1 (20 bytes at offset 72 in buf) + copy(header.SHA1[:], buf[72:92]) + + // Parent SHA1 (20 bytes at offset 92 in buf) + copy(header.ParentSHA1[:], buf[92:112]) + + return nil +} + +// parseHeaderV4 parses a V4 CHD header. +// V4 header layout (108 bytes total): +// +// Offset 0x00: Magic (8 bytes) +// Offset 0x08: Header size (4 bytes) +// Offset 0x0C: Version (4 bytes) +// Offset 0x10: Flags (4 bytes) +// Offset 0x14: Compression (4 bytes) +// Offset 0x18: Total hunks (4 bytes) +// Offset 0x1C: Logical bytes (8 bytes) +// Offset 0x24: Meta offset (8 bytes) +// Offset 0x2C: Hunk bytes (4 bytes) +// Offset 0x30: SHA1 (20 bytes) +// Offset 0x44: Parent SHA1 (20 bytes) +// Offset 0x58: Raw SHA1 (20 bytes) +func parseHeaderV4(header *Header, buf []byte) error { + if len(buf) < headerSizeV4-12 { + return fmt.Errorf("%w: buffer too small for V4", ErrInvalidHeader) + } + + // Flags (4 bytes at offset 4 in buf) + header.Flags = binary.BigEndian.Uint32(buf[4:8]) + + // Compression (4 bytes at offset 8 in buf) + header.Compression = binary.BigEndian.Uint32(buf[8:12]) + + // Total hunks (4 bytes at offset 12 in buf) + header.TotalHunks = binary.BigEndian.Uint32(buf[12:16]) + + // Logical bytes (8 bytes at offset 16 in buf) + header.LogicalBytes = binary.BigEndian.Uint64(buf[16:24]) + + // Meta offset (8 bytes at offset 24 in buf) + header.MetaOffset = binary.BigEndian.Uint64(buf[24:32]) + + // Hunk bytes (4 bytes at offset 32 in buf) + header.HunkBytes = binary.BigEndian.Uint32(buf[32:36]) + + // SHA1 (20 bytes at offset 36 in buf) + copy(header.SHA1[:], buf[36:56]) + + // Parent SHA1 (20 bytes at offset 56 in buf) + copy(header.ParentSHA1[:], buf[56:76]) + + // Raw SHA1 (20 bytes at offset 76 in buf) + copy(header.RawSHA1[:], buf[76:96]) + + // V4 doesn't have unit bytes - calculate from typical CD sector size + header.UnitBytes = 2448 // Default for CD-ROM + + // Map offset for V4 is right after header + header.MapOffset = uint64(header.HeaderSize) + + return nil +} + +// parseHeaderV3 parses a V3 CHD header. +// V3 header layout (120 bytes total): +// +// Offset 0x00: Magic (8 bytes) +// Offset 0x08: Header size (4 bytes) +// Offset 0x0C: Version (4 bytes) +// Offset 0x10: Flags (4 bytes) +// Offset 0x14: Compression (4 bytes) +// Offset 0x18: Total hunks (4 bytes) +// Offset 0x1C: Logical bytes (8 bytes) +// Offset 0x24: Meta offset (8 bytes) +// Offset 0x2C: MD5 (16 bytes) +// Offset 0x3C: Parent MD5 (16 bytes) +// Offset 0x4C: Hunk bytes (4 bytes) +// Offset 0x50: SHA1 (20 bytes) +// Offset 0x64: Parent SHA1 (20 bytes) +func parseHeaderV3(header *Header, buf []byte) error { + if len(buf) < headerSizeV3-12 { + return fmt.Errorf("%w: buffer too small for V3", ErrInvalidHeader) + } + + // Flags (4 bytes at offset 4 in buf) + header.Flags = binary.BigEndian.Uint32(buf[4:8]) + + // Compression (4 bytes at offset 8 in buf) + header.Compression = binary.BigEndian.Uint32(buf[8:12]) + + // Total hunks (4 bytes at offset 12 in buf) + header.TotalHunks = binary.BigEndian.Uint32(buf[12:16]) + + // Logical bytes (8 bytes at offset 16 in buf) + header.LogicalBytes = binary.BigEndian.Uint64(buf[16:24]) + + // Meta offset (8 bytes at offset 24 in buf) + header.MetaOffset = binary.BigEndian.Uint64(buf[24:32]) + + // MD5 hashes skipped (16 + 16 = 32 bytes at offset 32) + + // Hunk bytes (4 bytes at offset 64 in buf) + header.HunkBytes = binary.BigEndian.Uint32(buf[64:68]) + + // SHA1 (20 bytes at offset 68 in buf) + copy(header.SHA1[:], buf[68:88]) + + // Parent SHA1 (20 bytes at offset 88 in buf) + copy(header.ParentSHA1[:], buf[88:108]) + + // V3 doesn't have unit bytes - calculate from typical CD sector size + header.UnitBytes = 2448 // Default for CD-ROM + + // Map offset for V3 is right after header + header.MapOffset = uint64(header.HeaderSize) + + return nil +} + +// NumHunks returns the total number of hunks in the CHD file. +func (h *Header) NumHunks() uint32 { + if h.TotalHunks > 0 { + return h.TotalHunks + } + if h.HunkBytes == 0 { + return 0 + } + //nolint:gosec // Safe: result bounded by file size, will not overflow for valid CHD files + return uint32((h.LogicalBytes + uint64(h.HunkBytes) - 1) / uint64(h.HunkBytes)) +} + +// IsCompressed returns true if the CHD uses compression. +func (h *Header) IsCompressed() bool { + if h.Version == 5 { + return h.Compressors[0] != 0 + } + return h.Compression != 0 +} diff --git a/chd/hunk.go b/chd/hunk.go new file mode 100644 index 0000000..8924307 --- /dev/null +++ b/chd/hunk.go @@ -0,0 +1,405 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "encoding/binary" + "fmt" + "io" + "sync" +) + +// Hunk compression types (V5 map entry types). +const ( + HunkCompTypeCodec0 = 0 // Compressed with compressor 0 + HunkCompTypeCodec1 = 1 // Compressed with compressor 1 + HunkCompTypeCodec2 = 2 // Compressed with compressor 2 + HunkCompTypeCodec3 = 3 // Compressed with compressor 3 + HunkCompTypeNone = 4 // Uncompressed + HunkCompTypeSelf = 5 // Reference to another hunk in this CHD + HunkCompTypeParent = 6 // Reference to parent CHD + HunkCompTypeRLESmall = 7 // RLE: repeat last compression type (small count) + HunkCompTypeRLELarge = 8 // RLE: repeat last compression type (large count) + HunkCompTypeSelf0 = 9 // Self reference to same hunk as last + HunkCompTypeSelf1 = 10 // Self reference to last+1 + HunkCompTypeParSelf = 11 // Parent reference to self + HunkCompTypePar0 = 12 // Parent reference same as last + HunkCompTypePar1 = 13 // Parent reference last+1 +) + +// HunkMapEntry represents a single entry in the V5 hunk map. +type HunkMapEntry struct { + Offset uint64 + CompLength uint32 + CRC16 uint16 + CompType uint8 +} + +// HunkMap manages the hunk map and caching for a CHD file. +type HunkMap struct { + reader io.ReaderAt + header *Header + cache map[uint32][]byte + entries []HunkMapEntry + codecs []Codec + cacheSize int + maxCache int + cacheMu sync.RWMutex +} + +// NewHunkMap creates a new hunk map from the CHD header and reader. +func NewHunkMap(reader io.ReaderAt, header *Header) (*HunkMap, error) { + hm := &HunkMap{ + reader: reader, + header: header, + cache: make(map[uint32][]byte), + maxCache: 16, // Cache up to 16 hunks + } + + // Initialize codecs for V5 + if header.Version == 5 { + for _, tag := range header.Compressors { + if tag == 0 { + hm.codecs = append(hm.codecs, nil) + continue + } + codec, err := GetCodec(tag) + if err != nil { + // Codec not available - continue without it. If a hunk actually + // needs this codec, decompressWithCodec will return a clear error. + hm.codecs = append(hm.codecs, nil) + continue + } + hm.codecs = append(hm.codecs, codec) + } + } + + // Parse hunk map + if err := hm.parseMap(); err != nil { + return nil, fmt.Errorf("parse hunk map: %w", err) + } + + return hm, nil +} + +// parseMap parses the hunk map from the CHD file. +func (hm *HunkMap) parseMap() error { + numHunks := hm.header.NumHunks() + if numHunks > MaxNumHunks { + return fmt.Errorf("%w: too many hunks (%d > %d)", ErrInvalidHeader, numHunks, MaxNumHunks) + } + hm.entries = make([]HunkMapEntry, numHunks) + + switch hm.header.Version { + case 5: + return hm.parseMapV5() + case 4, 3: + return hm.parseMapV4() + default: + return fmt.Errorf("%w: version %d", ErrUnsupportedVersion, hm.header.Version) + } +} + +// parseMapV5 parses a V5 compressed hunk map. +// V5 map header (16 bytes): +// +// Offset 0: Compressed map length (4 bytes) +// Offset 4: First block offset (6 bytes, 48-bit) +// Offset 10: CRC16 (2 bytes) +// Offset 12: Bits for length (1 byte) +// Offset 13: Bits for self-ref (1 byte) +// Offset 14: Bits for parent-ref (1 byte) +// Offset 15: Reserved (1 byte) +// +//nolint:gosec,gocyclo,cyclop,funlen,revive // Safe: MapOffset validated; complexity needed for CHD format +func (hm *HunkMap) parseMapV5() error { + // Read map header + mapHeader := make([]byte, 16) + if _, err := hm.reader.ReadAt(mapHeader, int64(hm.header.MapOffset)); err != nil { + return fmt.Errorf("read map header: %w", err) + } + + compMapLen := binary.BigEndian.Uint32(mapHeader[0:4]) + if compMapLen > MaxCompMapLen { + return fmt.Errorf("%w: compressed map too large (%d > %d)", ErrInvalidHeader, compMapLen, MaxCompMapLen) + } + firstOffs := uint64(mapHeader[4])<<40 | uint64(mapHeader[5])<<32 | + uint64(mapHeader[6])<<24 | uint64(mapHeader[7])<<16 | + uint64(mapHeader[8])<<8 | uint64(mapHeader[9]) + lengthBits := int(mapHeader[12]) + selfBits := int(mapHeader[13]) + parentBits := int(mapHeader[14]) + + // Read compressed map data + compMap := make([]byte, compMapLen) + if _, err := hm.reader.ReadAt(compMap, int64(hm.header.MapOffset)+16); err != nil { + return fmt.Errorf("read compressed map: %w", err) + } + + // Create bit reader and Huffman decoder + br := newBitReader(compMap) + decoder := newHuffmanDecoder(16, 8) // 16 codes, 8-bit max + + if err := decoder.importTreeRLE(br); err != nil { + return fmt.Errorf("import huffman tree: %w", err) + } + + // Phase 1: Decode compression types with RLE + numHunks := hm.header.NumHunks() + compTypes := make([]uint8, numHunks) + var lastComp uint8 + var repCount int + + for hunkNum := range numHunks { + if repCount > 0 { + compTypes[hunkNum] = lastComp + repCount-- + continue + } + + val := decoder.decode(br) + switch val { + case HunkCompTypeRLESmall: + compTypes[hunkNum] = lastComp + repCount = 2 + int(decoder.decode(br)) + case HunkCompTypeRLELarge: + compTypes[hunkNum] = lastComp + repCount = 2 + 16 + (int(decoder.decode(br)) << 4) + repCount += int(decoder.decode(br)) + default: + compTypes[hunkNum] = val + lastComp = val + } + } + + // Phase 2: Read offsets/lengths based on compression type + curOffset := firstOffs + var lastSelf uint32 + var lastParent uint64 + + for hunkNum := range numHunks { + compType := compTypes[hunkNum] + var length uint32 + var offset uint64 + + switch compType { + case HunkCompTypeCodec0, HunkCompTypeCodec1, HunkCompTypeCodec2, HunkCompTypeCodec3: + length = br.read(lengthBits) + offset = curOffset + curOffset += uint64(length) + br.read(16) // CRC16 + case HunkCompTypeNone: + length = hm.header.HunkBytes + offset = curOffset + curOffset += uint64(length) + br.read(16) // CRC16 + case HunkCompTypeSelf: + lastSelf = br.read(selfBits) + offset = uint64(lastSelf) + case HunkCompTypeParent: + lastParent = uint64(br.read(parentBits)) + offset = lastParent + case HunkCompTypeSelf0: + offset = uint64(lastSelf) + compType = HunkCompTypeSelf + case HunkCompTypeSelf1: + lastSelf++ + offset = uint64(lastSelf) + compType = HunkCompTypeSelf + case HunkCompTypeParSelf: + offset = uint64(hunkNum) * uint64(hm.header.HunkBytes) / uint64(hm.header.UnitBytes) + lastParent = offset + compType = HunkCompTypeParent + case HunkCompTypePar0: + offset = lastParent + compType = HunkCompTypeParent + case HunkCompTypePar1: + lastParent += uint64(hm.header.HunkBytes) / uint64(hm.header.UnitBytes) + offset = lastParent + compType = HunkCompTypeParent + } + + hm.entries[hunkNum] = HunkMapEntry{ + CompType: compType, + CompLength: length, + Offset: offset, + } + } + + return nil +} + +// parseMapV4 parses a V3/V4 hunk map. +// V4 map is uncompressed, 16 bytes per entry: +// +// Offset 0: Offset (8 bytes) +// Offset 8: CRC32 (4 bytes) +// Offset 12: Length (2 bytes) + Flags (2 bytes) +func (hm *HunkMap) parseMapV4() error { + numHunks := hm.header.NumHunks() + entrySize := 16 + mapData := make([]byte, int(numHunks)*entrySize) + + //nolint:gosec // Safe: MapOffset validated during header parsing, int64 conversion safe for valid CHD files + if _, err := hm.reader.ReadAt(mapData, int64(hm.header.MapOffset)); err != nil { + return fmt.Errorf("read V4 map: %w", err) + } + + for i := range numHunks { + offset := int(i) * entrySize + + entryOffset := binary.BigEndian.Uint64(mapData[offset : offset+8]) + // CRC32 at offset+8 (skipped) + length := binary.BigEndian.Uint16(mapData[offset+12 : offset+14]) + flags := binary.BigEndian.Uint16(mapData[offset+14 : offset+16]) + + compType := uint8(HunkCompTypeNone) + if flags&1 != 0 { + compType = HunkCompTypeCodec0 // Compressed + } + + hm.entries[i] = HunkMapEntry{ + CompType: compType, + CompLength: uint32(length), + Offset: entryOffset, + } + } + + return nil +} + +// ReadHunk reads and decompresses a hunk by index. +func (hm *HunkMap) ReadHunk(index uint32) ([]byte, error) { + //nolint:gosec // Safe: len(entries) bounded by NumHunks which fits in uint32 + if index >= uint32(len(hm.entries)) { + return nil, fmt.Errorf("%w: %d >= %d", ErrInvalidHunk, index, len(hm.entries)) + } + + // Check cache + hm.cacheMu.RLock() + if data, ok := hm.cache[index]; ok { + hm.cacheMu.RUnlock() + return data, nil + } + hm.cacheMu.RUnlock() + + // Read and decompress + entry := hm.entries[index] + data, err := hm.decompressHunk(entry) + if err != nil { + return nil, fmt.Errorf("decompress hunk %d: %w", index, err) + } + + // Update cache + hm.cacheMu.Lock() + if hm.cacheSize >= hm.maxCache { + // Simple cache eviction: clear all + hm.cache = make(map[uint32][]byte) + hm.cacheSize = 0 + } + hm.cache[index] = data + hm.cacheSize++ + hm.cacheMu.Unlock() + + return data, nil +} + +// decompressHunk decompresses a single hunk. +func (hm *HunkMap) decompressHunk(entry HunkMapEntry) ([]byte, error) { + hunkSize := int(hm.header.HunkBytes) + dst := make([]byte, hunkSize) + + switch entry.CompType { + case HunkCompTypeNone: + return hm.readUncompressedHunk(dst, entry) + case HunkCompTypeCodec0, HunkCompTypeCodec1, HunkCompTypeCodec2, HunkCompTypeCodec3: + return hm.decompressWithCodec(dst, entry, hunkSize) + case HunkCompTypeSelf: + return hm.readSelfRefHunk(entry) + default: + return nil, fmt.Errorf("%w: compression type %d", ErrUnsupportedCodec, entry.CompType) + } +} + +// readUncompressedHunk reads an uncompressed hunk directly. +func (hm *HunkMap) readUncompressedHunk(dst []byte, entry HunkMapEntry) ([]byte, error) { + //nolint:gosec // Safe: entry.Offset from validated hunk map + if _, err := hm.reader.ReadAt(dst, int64(entry.Offset)); err != nil { + return nil, fmt.Errorf("read uncompressed: %w", err) + } + return dst, nil +} + +// decompressWithCodec decompresses a hunk using one of the registered codecs. +func (hm *HunkMap) decompressWithCodec(dst []byte, entry HunkMapEntry, hunkSize int) ([]byte, error) { + codecIdx := int(entry.CompType) + if codecIdx >= len(hm.codecs) || hm.codecs[codecIdx] == nil { + return nil, fmt.Errorf("%w: codec %d not available", ErrUnsupportedCodec, codecIdx) + } + + compData := make([]byte, entry.CompLength) + //nolint:gosec // Safe: entry.Offset from validated hunk map + if _, err := hm.reader.ReadAt(compData, int64(entry.Offset)); err != nil { + return nil, fmt.Errorf("read compressed: %w", err) + } + + codec := hm.codecs[codecIdx] + + if cdCodec, ok := codec.(CDCodec); ok { + unitBytes := int(hm.header.UnitBytes) + if unitBytes == 0 { + unitBytes = 2448 + } + frames := hunkSize / unitBytes + + decompN, err := cdCodec.DecompressCD(dst, compData, hunkSize, frames) + if err != nil { + return nil, fmt.Errorf("decompress CD: %w", err) + } + return dst[:decompN], nil + } + + decompN, err := codec.Decompress(dst, compData) + if err != nil { + return nil, fmt.Errorf("decompress: %w", err) + } + return dst[:decompN], nil +} + +// readSelfRefHunk reads a hunk that references another hunk. +func (hm *HunkMap) readSelfRefHunk(entry HunkMapEntry) ([]byte, error) { + //nolint:gosec // Safe: entry.Offset used as hunk index, validated below + refHunk := uint32(entry.Offset) + //nolint:gosec // Safe: len(entries) bounded by NumHunks + if refHunk >= uint32(len(hm.entries)) { + return nil, fmt.Errorf("%w: self-ref %d", ErrInvalidHunk, refHunk) + } + return hm.ReadHunk(refHunk) +} + +// NumHunks returns the total number of hunks. +func (hm *HunkMap) NumHunks() uint32 { + //nolint:gosec // Safe: len(entries) bounded by NumHunks which fits in uint32 + return uint32(len(hm.entries)) +} + +// HunkBytes returns the size of each hunk in bytes. +func (hm *HunkMap) HunkBytes() uint32 { + return hm.header.HunkBytes +} diff --git a/chd/metadata.go b/chd/metadata.go new file mode 100644 index 0000000..dd8006a --- /dev/null +++ b/chd/metadata.go @@ -0,0 +1,378 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package chd + +import ( + "encoding/binary" + "fmt" + "io" + "strconv" + "strings" +) + +// Metadata tag constants (as 4-byte big-endian integers) +const ( + // MetaTagCHT2 is the CD Track v2 metadata tag ("CHT2") + MetaTagCHT2 = 0x43485432 + + // MetaTagCHCD is the CD metadata tag ("CHCD") + MetaTagCHCD = 0x43484344 + + // MetaTagCHTR is the CD Track v1 metadata tag ("CHTR") + MetaTagCHTR = 0x43485452 + + // MetaTagGDTR is the GD-ROM track metadata tag ("CHGD") + MetaTagGDTR = 0x43484744 +) + +// Track represents a CD track in the CHD file. +type Track struct { + Type string + SubType string + Number int + Frames int + Pregap int + Postgap int + DataSize int + SubSize int + StartFrame int +} + +// metadataEntry represents a raw metadata entry from the CHD file. +type metadataEntry struct { + Data []byte + Next uint64 + Tag uint32 + Flags uint8 +} + +// parseMetadata reads all metadata entries from the CHD file. +func parseMetadata(reader io.ReaderAt, offset uint64) ([]metadataEntry, error) { + entries := make([]metadataEntry, 0, 8) // Pre-allocate for typical CHD track count + visited := make(map[uint64]bool) // Track visited offsets to detect loops + + for offset != 0 { + // Detect circular references + if visited[offset] { + return entries, fmt.Errorf("%w: circular metadata chain at offset %d", ErrInvalidMetadata, offset) + } + visited[offset] = true + + // Limit total entries to prevent memory exhaustion + if len(entries) >= MaxMetadataEntries { + return entries, fmt.Errorf("%w: too many metadata entries (%d)", ErrInvalidMetadata, len(entries)) + } + + entry, err := readMetadataEntry(reader, offset) + if err != nil { + return entries, fmt.Errorf("read metadata at %d: %w", offset, err) + } + + entries = append(entries, entry) + offset = entry.Next + } + + return entries, nil +} + +// readMetadataEntry reads a single metadata entry at the given offset. +// Metadata entry format: +// +// Offset 0: Tag (4 bytes, big-endian) +// Offset 4: Flags (1 byte) +// Offset 5: Length (3 bytes, big-endian) +// Offset 8: Next offset (8 bytes, big-endian) +// Offset 16: Data (length bytes) +func readMetadataEntry(reader io.ReaderAt, offset uint64) (metadataEntry, error) { + headerBuf := make([]byte, 16) + //nolint:gosec // Safe: offset from metadata chain, validated by CHD file structure + if _, err := reader.ReadAt(headerBuf, int64(offset)); err != nil { + return metadataEntry{}, fmt.Errorf("read metadata header: %w", err) + } + + entry := metadataEntry{ + Tag: binary.BigEndian.Uint32(headerBuf[0:4]), + Flags: headerBuf[4], + } + + // Length is 3 bytes big-endian (bytes 5-7) + length := uint32(headerBuf[5])<<16 | uint32(headerBuf[6])<<8 | uint32(headerBuf[7]) + + // Next offset (8 bytes) + entry.Next = binary.BigEndian.Uint64(headerBuf[8:16]) + + // Read data + if length > MaxMetadataLen { + return metadataEntry{}, fmt.Errorf("%w: metadata entry too large (%d > %d)", + ErrInvalidMetadata, length, MaxMetadataLen) + } + if length > 0 { + entry.Data = make([]byte, length) + //nolint:gosec // Safe: offset from metadata chain, validated by CHD file structure + if _, err := reader.ReadAt(entry.Data, int64(offset)+16); err != nil { + return metadataEntry{}, fmt.Errorf("read metadata data: %w", err) + } + } + + return entry, nil +} + +// parseTracks extracts track information from metadata entries. +func parseTracks(entries []metadataEntry) ([]Track, error) { + var tracks []Track + + for _, entry := range entries { + switch entry.Tag { + case MetaTagCHT2: + track, err := parseCHT2(entry.Data) + if err != nil { + return nil, fmt.Errorf("parse CHT2: %w", err) + } + tracks = append(tracks, track) + + case MetaTagCHTR: + track, err := parseCHTR(entry.Data) + if err != nil { + return nil, fmt.Errorf("parse CHTR: %w", err) + } + tracks = append(tracks, track) + + case MetaTagCHCD: + // Binary CD metadata - contains all tracks at once + parsed, err := parseCHCD(entry.Data) + if err != nil { + return nil, fmt.Errorf("parse CHCD: %w", err) + } + tracks = append(tracks, parsed...) + } + } + + // Calculate start frames for each track + startFrame := 0 + for i := range tracks { + tracks[i].StartFrame = startFrame + startFrame += tracks[i].Pregap + tracks[i].Frames + tracks[i].Postgap + } + + return tracks, nil +} + +// parseCHT2 parses CHT2 (CD Track v2) metadata. +// Format: ASCII key:value pairs +// Example: "TRACK:1 TYPE:MODE2_RAW SUBTYPE:NONE FRAMES:1234 PREGAP:150 PGTYPE:MODE2_RAW PGSUB:RW POSTGAP:0" +// +//nolint:gocognit,revive // CHT2 parsing requires handling many metadata fields +func parseCHT2(data []byte) (Track, error) { + var track Track + + // Trim null bytes and whitespace from the metadata string + str := strings.TrimRight(string(data), "\x00 \t\r\n") + str = strings.TrimSpace(str) + fields := strings.Fields(str) + + for _, field := range fields { + parts := strings.SplitN(field, ":", 2) + if len(parts) != 2 { + continue + } + + key := strings.ToUpper(parts[0]) + value := parts[1] + + switch key { + case "TRACK": + num, err := strconv.Atoi(value) + if err != nil { + return track, fmt.Errorf("invalid track number %q: %w", value, err) + } + track.Number = num + + case "TYPE": + track.Type = value + track.DataSize = trackTypeToDataSize(value) + + case "SUBTYPE": + track.SubType = value + track.SubSize = subTypeToSize(value) + + case "FRAMES": + frames, err := strconv.Atoi(value) + if err != nil { + return track, fmt.Errorf("invalid frames %q: %w", value, err) + } + track.Frames = frames + + case "PREGAP": + pregap, err := strconv.Atoi(value) + if err != nil { + return track, fmt.Errorf("invalid pregap %q: %w", value, err) + } + track.Pregap = pregap + + case "POSTGAP": + postgap, err := strconv.Atoi(value) + if err != nil { + return track, fmt.Errorf("invalid postgap %q: %w", value, err) + } + track.Postgap = postgap + } + } + + return track, nil +} + +// parseCHTR parses CHTR (CD Track v1) metadata. +// Format: ASCII, simpler format than CHT2 +// Example: "TRACK:1 TYPE:MODE1 SUBTYPE:NONE FRAMES:1234" +func parseCHTR(data []byte) (Track, error) { + // V1 format is similar to V2, just with fewer fields + return parseCHT2(data) +} + +// parseCHCD parses CHCD (binary CD metadata). +// Format: +// +// Offset 0: Number of tracks (4 bytes, big-endian) +// Offset 4: Track entries (24 bytes each) +// +// Track entry format: +// +// Offset 0: Type (4 bytes) +// Offset 4: Subtype (4 bytes) +// Offset 8: Data size (4 bytes) +// Offset 12: Sub size (4 bytes) +// Offset 16: Frames (4 bytes) +// Offset 20: Pad frames (4 bytes) +func parseCHCD(data []byte) ([]Track, error) { + if len(data) < 4 { + return nil, ErrInvalidMetadata + } + + numTracks := binary.BigEndian.Uint32(data[0:4]) + if numTracks > MaxNumTracks { + return nil, fmt.Errorf("%w: too many tracks (%d > %d)", ErrInvalidMetadata, numTracks, MaxNumTracks) + } + if len(data) < int(4+numTracks*24) { + return nil, ErrInvalidMetadata + } + + tracks := make([]Track, numTracks) + offset := 4 + + for i := range numTracks { + trackType := binary.BigEndian.Uint32(data[offset : offset+4]) + subType := binary.BigEndian.Uint32(data[offset+4 : offset+8]) + dataSize := binary.BigEndian.Uint32(data[offset+8 : offset+12]) + subSize := binary.BigEndian.Uint32(data[offset+12 : offset+16]) + frames := binary.BigEndian.Uint32(data[offset+16 : offset+20]) + // Pad frames at offset+20 is just for alignment + + tracks[i] = Track{ + Number: int(i + 1), + Type: cdTypeToString(trackType), + SubType: cdSubTypeToString(subType), + DataSize: int(dataSize), + SubSize: int(subSize), + Frames: int(frames), + } + + offset += 24 + } + + return tracks, nil +} + +// trackTypeToDataSize returns the data size for a track type string. +func trackTypeToDataSize(trackType string) int { + switch strings.ToUpper(trackType) { + case "MODE1/2048", "MODE2_FORM1": + return 2048 + case "MODE1/2352", "MODE1_RAW": + return 2352 + case "MODE2/2336", "MODE2_FORM_MIX": + return 2336 + case "MODE2/2048": + return 2048 + case "MODE2/2352", "MODE2_RAW": + return 2352 + case "AUDIO": + return 2352 + default: + return 2352 // Default to raw + } +} + +// subTypeToSize returns the subchannel size for a subtype string. +func subTypeToSize(subType string) int { + switch strings.ToUpper(subType) { + case "NONE": + return 0 + case "RW", "RW_RAW": + return 96 + default: + return 0 + } +} + +// cdTypeToString converts a binary CD type to a string. +func cdTypeToString(cdType uint32) string { + switch cdType { + case 0: + return "MODE1/2048" + case 1: + return "MODE1/2352" + case 2: + return "MODE2/2048" + case 3: + return "MODE2/2336" + case 4: + return "MODE2/2352" + case 5: + return "AUDIO" + default: + return "UNKNOWN" + } +} + +// cdSubTypeToString converts a binary CD subtype to a string. +func cdSubTypeToString(subType uint32) string { + switch subType { + case 0: + return "RW" + case 1: + return "RW_RAW" + case 2: + return "NONE" + default: + return "NONE" + } +} + +// IsDataTrack returns true if this is a data track (not audio). +func (t *Track) IsDataTrack() bool { + return !strings.EqualFold(t.Type, "AUDIO") +} + +// SectorSize returns the total size of each sector including subchannel data. +func (t *Track) SectorSize() int { + if t.DataSize == 0 { + return 2352 + t.SubSize + } + return t.DataSize + t.SubSize +} diff --git a/console.go b/console.go index 5e76294..37d6a96 100644 --- a/console.go +++ b/console.go @@ -24,6 +24,7 @@ import ( "path/filepath" "strings" + "github.com/ZaparooProject/go-gameid/chd" "github.com/ZaparooProject/go-gameid/identifier" "github.com/ZaparooProject/go-gameid/iso9660" ) @@ -154,6 +155,11 @@ func detectConsoleFromHeader(path, ext string) (identifier.Console, error) { return detectConsoleFromCue(path) } + // Handle CHD files specially + if ext == ".chd" { + return detectConsoleFromCHD(path) + } + // Read header for analysis file, err := os.Open(path) //nolint:gosec // Path from user input is expected if err != nil { @@ -201,6 +207,44 @@ func detectConsoleFromHeader(path, ext string) (identifier.Console, error) { return "", identifier.ErrNotSupported{Format: ext} } +// detectConsoleFromCHD handles CHD disc image detection. +func detectConsoleFromCHD(path string) (identifier.Console, error) { + chdFile, err := chd.Open(path) + if err != nil { + return "", fmt.Errorf("open CHD: %w", err) + } + defer func() { _ = chdFile.Close() }() + + // Read first sectors for magic word detection + reader := chdFile.RawSectorReader() + header := make([]byte, 0x1000) + if _, readErr := reader.ReadAt(header, 0); readErr != nil { + return "", fmt.Errorf("read CHD header: %w", readErr) + } + + // Check for Sega consoles first (they have magic words in raw sector data) + if identifier.ValidateSaturn(header) { + return identifier.ConsoleSaturn, nil + } + if identifier.ValidateSegaCD(header) { + return identifier.ConsoleSegaCD, nil + } + + // Check for GameCube (non-ISO9660 proprietary format) + if identifier.ValidateGC(header) { + return identifier.ConsoleGC, nil + } + + // Try parsing as ISO9660 for PSX/PS2/PSP/NeoGeoCD + iso, err := iso9660.OpenCHD(path) + if err != nil { + return "", fmt.Errorf("open CHD as ISO: %w", err) + } + defer func() { _ = iso.Close() }() + + return detectConsoleFromISO(iso) +} + // detectConsoleFromCue handles CUE sheet detection func detectConsoleFromCue(path string) (identifier.Console, error) { cue, err := iso9660.ParseCue(path) diff --git a/console_test.go b/console_test.go index ff1cce6..06c04f3 100644 --- a/console_test.go +++ b/console_test.go @@ -437,3 +437,52 @@ func TestFileExists(t *testing.T) { t.Error("fileExists() = true for directory") } } + +// TestDetectConsoleFromCHD_SegaCD verifies CHD detection for Sega CD. +func TestDetectConsoleFromCHD_SegaCD(t *testing.T) { + t.Parallel() + + console, err := DetectConsole("testdata/SegaCD/240pSuite_USA.chd") + if err != nil { + t.Fatalf("DetectConsole() error = %v", err) + } + if console != identifier.ConsoleSegaCD { + t.Errorf("DetectConsole() = %v, want %v", console, identifier.ConsoleSegaCD) + } +} + +// TestDetectConsoleFromCHD_NeoGeoCD verifies CHD detection for Neo Geo CD. +func TestDetectConsoleFromCHD_NeoGeoCD(t *testing.T) { + t.Parallel() + + console, err := DetectConsole("testdata/NeoGeoCD/240pTestSuite.chd") + if err != nil { + t.Fatalf("DetectConsole() error = %v", err) + } + if console != identifier.ConsoleNeoGeoCD { + t.Errorf("DetectConsole() = %v, want %v", console, identifier.ConsoleNeoGeoCD) + } +} + +// TestDetectConsoleFromCHD_GameCube verifies CHD detection for GameCube. +func TestDetectConsoleFromCHD_GameCube(t *testing.T) { + t.Parallel() + + console, err := DetectConsole("testdata/GC/GameCube-240pSuite-1.17.chd") + if err != nil { + t.Fatalf("DetectConsole() error = %v", err) + } + if console != identifier.ConsoleGC { + t.Errorf("DetectConsole() = %v, want %v", console, identifier.ConsoleGC) + } +} + +// TestDetectConsoleFromCHD_NonExistent verifies error for missing CHD. +func TestDetectConsoleFromCHD_NonExistent(t *testing.T) { + t.Parallel() + + _, err := DetectConsole("/nonexistent/path/game.chd") + if err == nil { + t.Error("DetectConsole() should fail for non-existent CHD") + } +} diff --git a/gameid_test.go b/gameid_test.go index b91e38d..0438308 100644 --- a/gameid_test.go +++ b/gameid_test.go @@ -296,3 +296,131 @@ func TestIsBlockDevice(t *testing.T) { t.Error("isBlockDevice() should return false for non-existent device") } } + +// TestIdentifyFromReader verifies IdentifyFromReader with a GBA ROM. +func TestIdentifyFromReader(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + gbaPath := createTestGBAFile(t, tmpDir) + + // Open the file and use IdentifyFromReader + //nolint:gosec // G304: test file path constructed from t.TempDir + file, err := os.Open(gbaPath) + if err != nil { + t.Fatalf("Failed to open file: %v", err) + } + defer func() { _ = file.Close() }() + + stat, err := file.Stat() + if err != nil { + t.Fatalf("Failed to stat file: %v", err) + } + + result, err := IdentifyFromReader(file, stat.Size(), ConsoleGBA, nil) + if err != nil { + t.Fatalf("IdentifyFromReader() error = %v", err) + } + + if result.Console != identifier.ConsoleGBA { + t.Errorf("Console = %v, want %v", result.Console, identifier.ConsoleGBA) + } + + if result.ID != "ATST" { + t.Errorf("ID = %q, want %q", result.ID, "ATST") + } +} + +// TestIdentifyFromReader_UnsupportedConsole verifies error for unsupported console. +func TestIdentifyFromReader_UnsupportedConsole(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + gbaPath := createTestGBAFile(t, tmpDir) + + //nolint:gosec // G304: test file path constructed from t.TempDir + file, err := os.Open(gbaPath) + if err != nil { + t.Fatalf("Failed to open file: %v", err) + } + defer func() { _ = file.Close() }() + + stat, err := file.Stat() + if err != nil { + t.Fatalf("Failed to stat file: %v", err) + } + + _, err = IdentifyFromReader(file, stat.Size(), "Xbox", nil) + if err == nil { + t.Error("IdentifyFromReader() should error for unsupported console") + } +} + +// TestIdentifyWithConsole_UnsupportedConsole verifies error for unsupported console. +func TestIdentifyWithConsole_UnsupportedConsole(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + gbaPath := createTestGBAFile(t, tmpDir) + + _, err := IdentifyWithConsole(gbaPath, "Xbox", nil) + if err == nil { + t.Error("IdentifyWithConsole() should error for unsupported console") + } +} + +// TestIdentifyFromDirectory_PSP verifies mounted PSP directory identification. +func TestIdentifyFromDirectory_PSP(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + + // Create UMD_DATA.BIN marker file (identifies as PSP) + markerPath := filepath.Join(tmpDir, "UMD_DATA.BIN") + if err := os.WriteFile(markerPath, []byte("ULJM12345|0000000001|0001"), 0o600); err != nil { + t.Fatalf("Failed to create marker file: %v", err) + } + + // First detect the console + console, err := DetectConsole(tmpDir) + if err != nil { + t.Fatalf("DetectConsole() error = %v", err) + } + if console != ConsolePSP { + t.Fatalf("DetectConsole() = %v, want PSP", console) + } + + // Now identify - note this will error because we don't have a full PSP filesystem + // but it will exercise the identifyFromDirectory path + _, err = IdentifyWithConsole(tmpDir, console, nil) + // Error is expected since we don't have PARAM.SFO + if err == nil { + t.Log("IdentifyWithConsole() succeeded unexpectedly for minimal PSP dir") + } +} + +// TestIdentifyFromDirectory_CartridgeConsole verifies error when using directory with cartridge console. +func TestIdentifyFromDirectory_CartridgeConsole(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + + // Try to identify a directory as a GBA game (cartridge-based) + // This should fail since directories aren't supported for cartridge consoles + _, err := identifyFromDirectory(tmpDir, ConsoleGBA, nil) + if err == nil { + t.Error("identifyFromDirectory() should error for cartridge-based console") + } +} + +// TestIdentifyFromDirectory_UnsupportedConsole verifies error for unsupported console. +func TestIdentifyFromDirectory_UnsupportedConsole(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + + _, err := identifyFromDirectory(tmpDir, "Xbox", nil) + if err == nil { + t.Error("identifyFromDirectory() should error for unsupported console") + } +} diff --git a/go.mod b/go.mod index 03e0523..c660295 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,14 @@ module github.com/ZaparooProject/go-gameid go 1.24.11 + +require ( + github.com/klauspost/compress v1.18.0 + github.com/mewkiz/flac v1.0.12 + github.com/ulikunitz/xz v0.5.15 +) + +require ( + github.com/icza/bitio v1.1.0 // indirect + github.com/mewkiz/pkg v0.0.0-20230226050401-4010bf0fec14 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ba23629 --- /dev/null +++ b/go.sum @@ -0,0 +1,43 @@ +github.com/d4l3k/messagediff v1.2.2-0.20190829033028-7e0a312ae40b/go.mod h1:Oozbb1TVXFac9FtSIxHBMnBCq2qeH/2KkEQxENCrlLo= +github.com/icza/bitio v1.1.0 h1:ysX4vtldjdi3Ygai5m1cWy4oLkhWTAi+SyO6HC8L9T0= +github.com/icza/bitio v1.1.0/go.mod h1:0jGnlLAx8MKMr9VGnn/4YrvZiprkvBelsVIbA9Jjr9A= +github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k= +github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA= +github.com/jszwec/csvutil v1.5.1/go.mod h1:Rpu7Uu9giO9subDyMCIQfHVDuLrcaC36UA4YcJjGBkg= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/mewkiz/flac v1.0.12 h1:5Y1BRlUebfiVXPmz7hDD7h3ceV2XNrGNMejNVjDpgPY= +github.com/mewkiz/flac v1.0.12/go.mod h1:1UeXlFRJp4ft2mfZnPLRpQTd7cSjb/s17o7JQzzyrCA= +github.com/mewkiz/pkg v0.0.0-20230226050401-4010bf0fec14 h1:tnAPMExbRERsyEYkmR1YjhTgDM0iqyiBYf8ojRXxdbA= +github.com/mewkiz/pkg v0.0.0-20230226050401-4010bf0fec14/go.mod h1:QYCFBiH5q6XTHEbWhR0uhR3M9qNPoD2CSQzr0g75kE4= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY= +github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/image v0.5.0/go.mod h1:FVC7BI/5Ym8R25iw5OLsgshdUBbT1h5jZTpA+mvAdZ4= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/identifier/gc.go b/identifier/gc.go index b02ddc8..1c2384f 100644 --- a/identifier/gc.go +++ b/identifier/gc.go @@ -21,7 +21,10 @@ package identifier import ( "fmt" "io" + "path/filepath" + "strings" + "github.com/ZaparooProject/go-gameid/chd" "github.com/ZaparooProject/go-gameid/internal/binary" ) @@ -117,3 +120,30 @@ func ValidateGC(header []byte) bool { magic := header[0x1C : 0x1C+4] return binary.BytesEqual(magic, gcMagicWord) } + +// IdentifyFromPath handles path-based identification for GameCube discs. +// This is needed for CHD files which require special handling. +func (g *GCIdentifier) IdentifyFromPath(path string, db Database) (*Result, error) { + ext := strings.ToLower(filepath.Ext(path)) + if ext == ".chd" { + return g.identifyFromCHD(path, db) + } + + // For non-CHD files, fall back to standard file reading + return nil, ErrNotSupported{Format: "use standard Identify for non-CHD files"} +} + +// identifyFromCHD reads GameCube disc data from a CHD file. +func (g *GCIdentifier) identifyFromCHD(path string, db Database) (*Result, error) { + chdFile, err := chd.Open(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + defer func() { _ = chdFile.Close() }() + + // GameCube discs don't use ISO9660 - read raw sector data + reader := chdFile.RawSectorReader() + size := chdFile.Size() + + return g.Identify(reader, size, db) +} diff --git a/identifier/neogeocd.go b/identifier/neogeocd.go index 9afcb62..66ec56d 100644 --- a/identifier/neogeocd.go +++ b/identifier/neogeocd.go @@ -56,13 +56,20 @@ func (n *NeoGeoCDIdentifier) IdentifyFromPath(path string, database Database) (* ext := strings.ToLower(filepath.Ext(path)) - if ext == ".cue" { + switch ext { + case ".cue": isoFile, err := iso9660.OpenCue(path) if err != nil { return nil, fmt.Errorf("open CUE: %w", err) } iso = isoFile - } else { + case ".chd": + isoFile, err := iso9660.OpenCHD(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + iso = isoFile + default: isoFile, err := iso9660.Open(path) if err != nil { return nil, fmt.Errorf("open ISO: %w", err) diff --git a/identifier/psp.go b/identifier/psp.go index eb95356..414cf0e 100644 --- a/identifier/psp.go +++ b/identifier/psp.go @@ -48,9 +48,22 @@ func (*PSPIdentifier) Identify(_ io.ReaderAt, _ int64, _ Database) (*Result, err // IdentifyFromPath identifies a PSP game from a file path. func (*PSPIdentifier) IdentifyFromPath(path string, database Database) (*Result, error) { - iso, err := iso9660.Open(path) - if err != nil { - return nil, fmt.Errorf("open ISO: %w", err) + ext := strings.ToLower(filepath.Ext(path)) + + var iso *iso9660.ISO9660 + var err error + + switch ext { + case ".chd": + iso, err = iso9660.OpenCHD(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + default: + iso, err = iso9660.Open(path) + if err != nil { + return nil, fmt.Errorf("open ISO: %w", err) + } } defer func() { _ = iso.Close() }() diff --git a/identifier/psx.go b/identifier/psx.go index 944d450..4c98656 100644 --- a/identifier/psx.go +++ b/identifier/psx.go @@ -35,23 +35,32 @@ type playstationISO interface { Close() error } -// openPlayStationISO opens an ISO from a path, handling CUE files. +// openPlayStationISO opens an ISO from a path, handling CUE and CHD files. func openPlayStationISO(path string) (playstationISO, error) { ext := strings.ToLower(filepath.Ext(path)) - if ext == ".cue" { + switch ext { + case ".cue": iso, err := iso9660.OpenCue(path) if err != nil { return nil, fmt.Errorf("open CUE: %w", err) } return iso, nil - } - iso, err := iso9660.Open(path) - if err != nil { - return nil, fmt.Errorf("open ISO: %w", err) + case ".chd": + iso, err := iso9660.OpenCHD(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + return iso, nil + + default: + iso, err := iso9660.Open(path) + if err != nil { + return nil, fmt.Errorf("open ISO: %w", err) + } + return iso, nil } - return iso, nil } // identifyPlayStation identifies a PlayStation game from an ISO. diff --git a/identifier/saturn.go b/identifier/saturn.go index d2f4686..e632f48 100644 --- a/identifier/saturn.go +++ b/identifier/saturn.go @@ -25,6 +25,7 @@ import ( "path/filepath" "strings" + "github.com/ZaparooProject/go-gameid/chd" "github.com/ZaparooProject/go-gameid/internal/binary" "github.com/ZaparooProject/go-gameid/iso9660" ) @@ -93,13 +94,15 @@ func (s *SaturnIdentifier) Identify(reader io.ReaderAt, size int64, db Database) } // IdentifyFromPath identifies a Saturn game from a file path. +// +//nolint:gocognit,revive // CUE/CHD/ISO handling requires separate branches func (s *SaturnIdentifier) IdentifyFromPath(path string, database Database) (*Result, error) { ext := strings.ToLower(filepath.Ext(path)) var header []byte - //nolint:nestif // CUE vs ISO handling requires separate branches - if ext == ".cue" { + switch ext { + case ".cue": cue, err := iso9660.ParseCue(path) if err != nil { return nil, fmt.Errorf("parse CUE: %w", err) @@ -116,7 +119,20 @@ func (s *SaturnIdentifier) IdentifyFromPath(path string, database Database) (*Re if _, err := binFile.Read(header); err != nil { return nil, fmt.Errorf("read BIN header: %w", err) } - } else { + + case ".chd": + chdFile, err := chd.Open(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + defer func() { _ = chdFile.Close() }() + header = make([]byte, 0x100) + reader := chdFile.RawSectorReader() + if _, err := reader.ReadAt(header, 0); err != nil { + return nil, fmt.Errorf("read CHD header: %w", err) + } + + default: isoFile, err := os.Open(path) //nolint:gosec // Path from user input is expected if err != nil { return nil, fmt.Errorf("open ISO file: %w", err) diff --git a/identifier/segacd.go b/identifier/segacd.go index 28f35fa..01c3d26 100644 --- a/identifier/segacd.go +++ b/identifier/segacd.go @@ -25,6 +25,7 @@ import ( "path/filepath" "strings" + "github.com/ZaparooProject/go-gameid/chd" "github.com/ZaparooProject/go-gameid/internal/binary" "github.com/ZaparooProject/go-gameid/iso9660" ) @@ -66,13 +67,19 @@ func (s *SegaCDIdentifier) Identify(reader io.ReaderAt, size int64, db Database) } // IdentifyFromPath identifies a Sega CD game from a file path. +// + func (s *SegaCDIdentifier) IdentifyFromPath(path string, database Database) (*Result, error) { ext := strings.ToLower(filepath.Ext(path)) - if ext == ".cue" { + switch ext { + case ".cue": return s.identifyFromCue(path, database) + case ".chd": + return s.identifyFromCHD(path, database) + default: + return s.identifyFromISO(path, database) } - return s.identifyFromISO(path, database) } func (s *SegaCDIdentifier) identifyFromCue(path string, database Database) (*Result, error) { @@ -122,6 +129,28 @@ func (s *SegaCDIdentifier) identifyFromISO(path string, database Database) (*Res return s.identifyFromHeader(header, database, iso) } +func (s *SegaCDIdentifier) identifyFromCHD(path string, database Database) (*Result, error) { + chdFile, err := chd.Open(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + defer func() { _ = chdFile.Close() }() + + header := make([]byte, 0x300) + reader := chdFile.RawSectorReader() + if _, err := reader.ReadAt(header, 0); err != nil { + return nil, fmt.Errorf("read CHD header: %w", err) + } + + // ISO parsing is optional - SegaCD can be identified from raw header alone + iso, _ := iso9660.OpenCHD(path) + if iso != nil { + defer func() { _ = iso.Close() }() + } + + return s.identifyFromHeader(header, database, iso) +} + //nolint:funlen,revive // Header parsing requires many field extractions func (s *SegaCDIdentifier) identifyFromHeader(header []byte, db Database, iso *iso9660.ISO9660) (*Result, error) { // Find magic word diff --git a/iso9660/chd.go b/iso9660/chd.go new file mode 100644 index 0000000..99e1d18 --- /dev/null +++ b/iso9660/chd.go @@ -0,0 +1,50 @@ +// Copyright (c) 2025 Niema Moshiri and The Zaparoo Project. +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of go-gameid. +// +// go-gameid is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-gameid is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-gameid. If not, see . + +package iso9660 + +import ( + "fmt" + + "github.com/ZaparooProject/go-gameid/chd" +) + +// OpenCHD opens an ISO9660 filesystem from a CHD disc image file. +// The CHD file's DataTrackSectorReader provides 2048-byte logical sectors +// starting at the first data track, suitable for ISO9660 parsing. +// This handles multi-track CDs like Neo Geo CD that have audio tracks first. +func OpenCHD(path string) (*ISO9660, error) { + chdFile, err := chd.Open(path) + if err != nil { + return nil, fmt.Errorf("open CHD: %w", err) + } + + // Use the data track sector reader which provides 2048-byte logical sectors + // starting at the first data track (essential for multi-track CDs) + reader := chdFile.DataTrackSectorReader() + size := chdFile.DataTrackSize() + + // Create ISO9660 with the CHD as the underlying closer + iso, err := OpenReaderWithCloser(reader, size, chdFile) + if err != nil { + _ = chdFile.Close() + return nil, fmt.Errorf("parse ISO9660 from CHD: %w", err) + } + + return iso, nil +} diff --git a/iso9660/iso9660.go b/iso9660/iso9660.go index 93699d3..ed1da63 100644 --- a/iso9660/iso9660.go +++ b/iso9660/iso9660.go @@ -55,7 +55,8 @@ type pathTableEntry struct { // ISO9660 represents a parsed ISO9660 disc image. type ISO9660 struct { - file *os.File + reader io.ReaderAt + closer io.Closer // Optional closer for the underlying reader pvd []byte pathTable []pathTableEntry blockSize int @@ -78,8 +79,9 @@ func Open(path string) (*ISO9660, error) { } iso := &ISO9660{ - file: isoFile, - size: info.Size(), + reader: isoFile, + closer: isoFile, + size: info.Size(), } if err := iso.init(); err != nil { @@ -94,19 +96,22 @@ func Open(path string) (*ISO9660, error) { // OpenReader creates an ISO9660 from an io.ReaderAt. // The caller is responsible for closing the underlying reader if needed. func OpenReader(reader io.ReaderAt, size int64) (*ISO9660, error) { - iso := &ISO9660{ - size: size, - } + return OpenReaderWithCloser(reader, size, nil) +} - // Create a wrapper that implements the file interface we need - fileReader, ok := reader.(*os.File) - if !ok { - // For non-file readers, we need a different approach - return nil, errors.New("OpenReader currently only supports *os.File") +// OpenReaderWithCloser creates an ISO9660 from an io.ReaderAt with an optional closer. +// The closer will be called when Close() is called on the ISO9660. +func OpenReaderWithCloser(reader io.ReaderAt, size int64, closer io.Closer) (*ISO9660, error) { + iso := &ISO9660{ + reader: reader, + closer: closer, + size: size, } - iso.file = fileReader if err := iso.init(); err != nil { + if closer != nil { + _ = closer.Close() + } return nil, err } @@ -122,7 +127,9 @@ func (iso *ISO9660) init() error { case iso.size%2048 == 0: iso.blockSize = 2048 default: - return ErrInvalidBlock + // For CHD sources, block size may not divide evenly + // Default to 2048 for ISO9660 standard + iso.blockSize = 2048 } // Search for PVD in first ~1MB @@ -132,7 +139,7 @@ func (iso *ISO9660) init() error { } header := make([]byte, searchSize) - if _, err := iso.file.ReadAt(header, 0); err != nil && err != io.EOF { + if _, err := iso.reader.ReadAt(header, 0); err != nil && err != io.EOF { return fmt.Errorf("failed to read header: %w", err) } @@ -161,7 +168,7 @@ func (iso *ISO9660) init() error { // Read PVD (one block) iso.pvd = make([]byte, iso.blockSize) - if _, err := iso.file.ReadAt(iso.pvd, pvdOffset); err != nil { + if _, err := iso.reader.ReadAt(iso.pvd, pvdOffset); err != nil { return fmt.Errorf("failed to read PVD: %w", err) } @@ -183,7 +190,7 @@ func (iso *ISO9660) parsePathTable() error { // Read path table offset := iso.blockOffset + int64(pathTableLBA)*int64(iso.blockSize) pathTableRaw := make([]byte, pathTableSize) - if _, err := iso.file.ReadAt(pathTableRaw, offset); err != nil { + if _, err := iso.reader.ReadAt(pathTableRaw, offset); err != nil { return fmt.Errorf("failed to read path table: %w", err) } @@ -228,9 +235,9 @@ func (iso *ISO9660) parsePathTable() error { // Close closes the ISO9660 file. func (iso *ISO9660) Close() error { - if iso.file != nil { - if err := iso.file.Close(); err != nil { - return fmt.Errorf("close ISO file: %w", err) + if iso.closer != nil { + if err := iso.closer.Close(); err != nil { + return fmt.Errorf("close ISO: %w", err) } } return nil @@ -313,7 +320,7 @@ func (iso *ISO9660) IterFiles(onlyRootDir bool) ([]FileInfo, error) { for { // Read record length lenBuf := make([]byte, 1) - if _, err := iso.file.ReadAt(lenBuf, offset); err != nil { + if _, err := iso.reader.ReadAt(lenBuf, offset); err != nil { break } recLen := int(lenBuf[0]) @@ -323,7 +330,7 @@ func (iso *ISO9660) IterFiles(onlyRootDir bool) ([]FileInfo, error) { // Read record recBuf := make([]byte, recLen-1) - if _, err := iso.file.ReadAt(recBuf, offset+1); err != nil { + if _, err := iso.reader.ReadAt(recBuf, offset+1); err != nil { break } @@ -368,7 +375,7 @@ func (iso *ISO9660) IterFiles(onlyRootDir bool) ([]FileInfo, error) { func (iso *ISO9660) ReadFile(info FileInfo) ([]byte, error) { offset := iso.blockOffset + int64(info.LBA)*int64(iso.blockSize) data := make([]byte, info.Size) - if _, err := iso.file.ReadAt(data, offset); err != nil && err != io.EOF { + if _, err := iso.reader.ReadAt(data, offset); err != nil && err != io.EOF { return nil, fmt.Errorf("failed to read file %s: %w", info.Path, err) } return data, nil diff --git a/iso9660/iso9660_test.go b/iso9660/iso9660_test.go index 1420e45..aaf1a71 100644 --- a/iso9660/iso9660_test.go +++ b/iso9660/iso9660_test.go @@ -430,3 +430,118 @@ func TestISO9660_GetPublisherID(t *testing.T) { t.Errorf("GetPublisherID() = %q, want prefix %q", pubID, "MY PUBLISHER") } } + +// TestOpenCHD_NeoGeoCD verifies OpenCHD with real Neo Geo CD test file. +func TestOpenCHD_NeoGeoCD(t *testing.T) { + t.Parallel() + + iso, err := OpenCHD("../testdata/NeoGeoCD/240pTestSuite.chd") + if err != nil { + t.Fatalf("OpenCHD failed: %v", err) + } + defer func() { _ = iso.Close() }() + + // Verify we can read ISO9660 metadata + volumeID := iso.GetVolumeID() + if volumeID == "" { + t.Error("GetVolumeID() returned empty") + } + t.Logf("Volume ID: %q", volumeID) + + // Verify we can list files + files, err := iso.IterFiles(false) + if err != nil { + t.Fatalf("IterFiles failed: %v", err) + } + t.Logf("Found %d files", len(files)) + + // Should have IPL.TXT for Neo Geo CD + hasIPL := false + for _, f := range files { + if strings.Contains(strings.ToUpper(f.Path), "IPL.TXT") { + hasIPL = true + break + } + } + if !hasIPL { + t.Log("Note: IPL.TXT not found in root - may be in subdirectory") + } +} + +// TestOpenCHD_NonExistent verifies error handling for missing files. +func TestOpenCHD_NonExistent(t *testing.T) { + t.Parallel() + + _, err := OpenCHD("/nonexistent/path/file.chd") + if err == nil { + t.Error("OpenCHD should fail for non-existent file") + } +} + +// TestOpenCHD_InvalidCHD verifies error handling for non-CHD files. +func TestOpenCHD_InvalidCHD(t *testing.T) { + t.Parallel() + + // Try to open a non-CHD file + _, err := OpenCHD("iso9660_test.go") + if err == nil { + t.Error("OpenCHD should fail for non-CHD file") + } +} + +// TestOpenReaderWithCloser verifies OpenReaderWithCloser functionality. +func TestOpenReaderWithCloser(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + + isoData := createMinimalISO("TEST", "SYS", "PUB") + isoPath := filepath.Join(tmpDir, "test.iso") + if err := os.WriteFile(isoPath, isoData, 0o600); err != nil { + t.Fatalf("Failed to write ISO: %v", err) + } + + // Open the file and use OpenReaderWithCloser + //nolint:gosec // G304: test file path constructed from t.TempDir + file, err := os.Open(isoPath) + if err != nil { + t.Fatalf("Failed to open file: %v", err) + } + + stat, _ := file.Stat() + iso, err := OpenReaderWithCloser(file, stat.Size(), file) + if err != nil { + _ = file.Close() + t.Fatalf("OpenReaderWithCloser failed: %v", err) + } + + // Verify it works + _ = iso.GetVolumeID() + + // Close should close the underlying file + if err := iso.Close(); err != nil { + t.Errorf("Close failed: %v", err) + } +} + +// TestGetDataPreparerID verifies data preparer ID extraction. +func TestGetDataPreparerID(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + + isoData := createMinimalISO("VOL", "SYS", "PUB") + isoPath := filepath.Join(tmpDir, "test.iso") + if err := os.WriteFile(isoPath, isoData, 0o600); err != nil { + t.Fatalf("Failed to write ISO: %v", err) + } + + iso, err := Open(isoPath) + if err != nil { + t.Fatalf("Open failed: %v", err) + } + defer func() { _ = iso.Close() }() + + // Data preparer ID is at a different offset - our minimal ISO doesn't set it + _ = iso.GetDataPreparerID() +} diff --git a/testdata/GC/GameCube-240pSuite-1.17.chd b/testdata/GC/GameCube-240pSuite-1.17.chd new file mode 100644 index 0000000..58cb778 Binary files /dev/null and b/testdata/GC/GameCube-240pSuite-1.17.chd differ diff --git a/testdata/NeoGeoCD/240pTestSuite.chd b/testdata/NeoGeoCD/240pTestSuite.chd new file mode 100644 index 0000000..5f678e0 Binary files /dev/null and b/testdata/NeoGeoCD/240pTestSuite.chd differ diff --git a/testdata/SegaCD/240pSuite_EU.chd b/testdata/SegaCD/240pSuite_EU.chd new file mode 100644 index 0000000..ba2b12f Binary files /dev/null and b/testdata/SegaCD/240pSuite_EU.chd differ diff --git a/testdata/SegaCD/240pSuite_JP.chd b/testdata/SegaCD/240pSuite_JP.chd new file mode 100644 index 0000000..3e179c3 Binary files /dev/null and b/testdata/SegaCD/240pSuite_JP.chd differ diff --git a/testdata/SegaCD/240pSuite_USA.chd b/testdata/SegaCD/240pSuite_USA.chd new file mode 100644 index 0000000..a9a2d28 Binary files /dev/null and b/testdata/SegaCD/240pSuite_USA.chd differ