diff --git a/.github/actions/ci-setup/action.yml b/.github/actions/ci-setup/action.yml index f24caac4..df0373aa 100644 --- a/.github/actions/ci-setup/action.yml +++ b/.github/actions/ci-setup/action.yml @@ -16,11 +16,10 @@ runs: using: "composite" steps: - name: Install Nim - uses: iffy/install-nim@v5 + uses: jiro4989/setup-nim-action@v2 with: - version: binary:${{ inputs.nim_version }} - env: - GITHUB_TOKEN: ${{ inputs.github_token }} + nim-version: ${{ inputs.nim_version }} + repo-token: ${{ inputs.github_token }} - name: Setup CMake uses: lukka/get-cmake@latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85549fac..90b17a13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ concurrency: cancel-in-progress: true env: - nim_version: 2.2.4 + nim_version: 2.2.6 rust_version: 1.79.0 cmake_version: 3.x node_version: 22 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 67e53c7d..5fd3f7ac 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ on: workflow_dispatch: env: - nim_version: 2.2.4 + nim_version: 2.2.6 rust_version: 1.79.0 cmake_version: 3.x archivist_binary_base: archivist diff --git a/.gitignore b/.gitignore index fdff9a8d..125ddb60 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,10 @@ docker/hostdatadir docker/prometheus-data .DS_Store +.claude +.mcp.json + data/ nimbledeps logs/ + diff --git a/README.md b/README.md index bd250c5a..30336c08 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ ### Prerequisites The following should be installed before building the node: -- [Nim][nim] 2.2.4 +- [Nim][nim] 2.2.6 - [Cmake][cmake] 3.x - [Rust][rustup] 1.79.0 - Optional: [NodeJS][nodejs] 22.x, only required for some tests diff --git a/archivist.nimble b/archivist.nimble index cfcd0c44..5e9111b5 100644 --- a/archivist.nimble +++ b/archivist.nimble @@ -18,8 +18,8 @@ requires "https://github.com/durability-labs/nim-serde >= 2.1.0" requires "https://github.com/durability-labs/nim-leopard >= 0.2.2" requires "https://github.com/guzba/zippy >= 0.10.16" requires "https://github.com/durability-labs/nim-chronicles#version-0-12-3-pre" # TODO: update to version 0.12.3 once it is released -requires "https://github.com/durability-labs/nim-groth16 >= 0.1.0" -requires "https://github.com/durability-labs/circom-witnessgen >= 0.1.3" +requires "https://github.com/durability-labs/nim-groth16 >= 0.1.1" +requires "https://github.com/durability-labs/circom-witnessgen >= 0.1.4" import std/os diff --git a/archivist/archivisttypes.nim b/archivist/archivisttypes.nim index b5f18022..6bcd4133 100644 --- a/archivist/archivisttypes.nim +++ b/archivist/archivisttypes.nim @@ -42,6 +42,7 @@ const Pos2Bn128MrklCodec* = multiCodec("poseidon2-alt_bn_128-merkle-2kb") ManifestCodec* = multiCodec("codex-manifest") + DirectoryCodec* = MultiCodec(0xCD04) # codex-directory (not yet registered in libp2p) DatasetRootCodec* = multiCodec("codex-root") BlockCodec* = multiCodec("codex-block") SlotRootCodec* = multiCodec("codex-slot-root") @@ -51,8 +52,8 @@ const HashesCodecs* = [Sha256HashCodec, Pos2Bn128SpngCodec, Pos2Bn128MrklCodec] PrimitivesCodecs* = [ - ManifestCodec, DatasetRootCodec, BlockCodec, SlotRootCodec, SlotProvingRootCodec, - SlotCellCodec, + ManifestCodec, DirectoryCodec, DatasetRootCodec, BlockCodec, SlotRootCodec, + SlotProvingRootCodec, SlotCellCodec, ] proc initEmptyCidTable(): ?!Table[(CidVersion, MultiCodec, MultiCodec), Cid] = diff --git a/archivist/conf.nim b/archivist/conf.nim index 73549e84..28f9f93a 100644 --- a/archivist/conf.nim +++ b/archivist/conf.nim @@ -334,9 +334,9 @@ type "the whole slot id space and the value of " & "the --validator-group-index parameter will be ignored. " & "Powers of twos are advised for even distribution", - defaultValue: ValidationGroups.none, + defaultValue: int.none, name: "validator-groups" - .}: Option[ValidationGroups] + .}: Option[int] validatorGroupIndex* {. desc: "Slot validation group index", @@ -608,6 +608,16 @@ proc readValue*( ) {.raises: [SerializationError, IOError].} = val = EthAddress.init(r.readValue(string)).get() +proc readValue*( + r: var TomlReader, val: var Cid +) {.raises: [SerializationError, IOError].} = + let cidStr = r.readValue(string) + let cidResult = Cid.init(cidStr) + if cidResult.isOk: + val = cidResult.get() + else: + raise newException(SerializationError, "Invalid CID: " & cidStr) + proc readValue*(r: var TomlReader, val: var SignedPeerRecord) = without uri =? r.readValue(string).catch, err: error "invalid SignedPeerRecord configuration value", error = err.msg diff --git a/archivist/directorynode.nim b/archivist/directorynode.nim new file mode 100644 index 00000000..79878832 --- /dev/null +++ b/archivist/directorynode.nim @@ -0,0 +1,168 @@ +## Copyright (c) 2025 Archivist Authors +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +## Directory operations for ArchivistNode +## +## This module is kept separate from node.nim to avoid importing minprotobuf +## into the main compilation path, which triggers serialization conflicts +## with TOML config loading. + +{.push raises: [].} + +import pkg/chronos +import pkg/questionable +import pkg/questionable/results +import pkg/libp2p/cid + +# Import minprotobuf explicitly to avoid leaking symbols that conflict with serialization +from pkg/libp2p/protobuf/minprotobuf import + ProtoBuffer, initProtoBuffer, getField, getRequiredRepeatedField, + write, finish, ProtoResult + +import ./manifest/directory +import ./blocktype as bt +import ./stores +import ./archivisttypes +import ./units +import ./errors +import ./logutils + +logScope: + topics = "archivist directorynode" + +proc decodeDirectoryManifest*(blk: bt.Block): ?!DirectoryManifest = + ## Decode a directory manifest from a block + ## + if not ?blk.cid.isDirectory: + return failure "Cid not a directory codec" + + var + pbNode = initProtoBuffer(blk.data) + pbEntries: seq[seq[byte]] + totalSize: uint64 + name: string + entries: seq[DirectoryEntry] + + if pbNode.getRequiredRepeatedField(1, pbEntries).isErr: + return failure("Unable to decode `entries` from directory manifest!") + + if pbNode.getField(2, totalSize).isErr: + return failure("Unable to decode `totalSize` from directory manifest!") + + if pbNode.getField(3, name).isErr: + return failure("Unable to decode `name` from directory manifest!") + + for pbEntryData in pbEntries: + var + pbEntry = initProtoBuffer(pbEntryData) + entryName: string + cidBuf: seq[byte] + size: uint64 + isDir: uint32 + mimetype: string + + if pbEntry.getField(1, entryName).isErr: + return failure("Unable to decode entry `name` from directory manifest!") + + if pbEntry.getField(2, cidBuf).isErr: + return failure("Unable to decode entry `cid` from directory manifest!") + + if pbEntry.getField(3, size).isErr: + return failure("Unable to decode entry `size` from directory manifest!") + + if pbEntry.getField(4, isDir).isErr: + return failure("Unable to decode entry `isDirectory` from directory manifest!") + + if pbEntry.getField(5, mimetype).isErr: + return failure("Unable to decode entry `mimetype` from directory manifest!") + + let entryCid = ?Cid.init(cidBuf).mapFailure + + entries.add(DirectoryEntry( + name: entryName, + cid: entryCid, + size: size.NBytes, + isDirectory: isDir != 0, + mimetype: mimetype, + )) + + success DirectoryManifest( + entries: entries, + totalSize: totalSize.NBytes, + name: name, + ) + +proc encodeDirectoryManifest*(directory: DirectoryManifest): seq[byte] = + ## Encode a directory manifest to protobuf bytes + ## + var pbNode = initProtoBuffer() + + for entry in directory.entries: + var pbEntry = initProtoBuffer() + pbEntry.write(1, entry.name) + pbEntry.write(2, entry.cid.data.buffer) + pbEntry.write(3, entry.size.uint64) + pbEntry.write(4, entry.isDirectory.uint32) + if entry.mimetype.len > 0: + pbEntry.write(5, entry.mimetype) + pbEntry.finish() + pbNode.write(1, pbEntry) + + pbNode.write(2, directory.totalSize.uint64) + + if directory.name.len > 0: + pbNode.write(3, directory.name) + + pbNode.finish() + pbNode.buffer + +proc storeDirectoryManifest*( + networkStore: NetworkStore, directory: DirectoryManifest +): Future[?!bt.Block] {.async: (raises: [CancelledError]).} = + ## Store a directory manifest and return its block + ## + let encoded = encodeDirectoryManifest(directory) + + without blk =? bt.Block.new(data = encoded, codec = DirectoryCodec), error: + trace "Unable to create block from directory manifest" + return failure(error) + + if err =? (await networkStore.putBlock(blk)).errorOption: + trace "Unable to store directory manifest block", cid = blk.cid, err = err.msg + return failure(err) + + info "Stored directory manifest", + cid = blk.cid, + entries = directory.entries.len, + totalSize = directory.totalSize + + success blk + +proc fetchDirectoryManifest*( + networkStore: NetworkStore, cid: Cid +): Future[?!DirectoryManifest] {.async: (raises: [CancelledError]).} = + ## Fetch and decode a directory manifest block + ## + if err =? cid.isDirectory.errorOption: + return failure "CID has invalid content type for directory manifest {$cid}" + + trace "Retrieving directory manifest for cid", cid + + without blk =? await networkStore.getBlock(BlockAddress.init(cid)), err: + trace "Error retrieving directory manifest block", cid, err = err.msg + return failure err + + trace "Decoding directory manifest for cid", cid + + without directory =? decodeDirectoryManifest(blk), err: + trace "Unable to decode as directory manifest", err = err.msg + return failure("Unable to decode as directory manifest") + + trace "Decoded directory manifest", cid, entries = directory.entries.len + + return directory.success diff --git a/archivist/discovery.nim b/archivist/discovery.nim index da59e688..e5da4d3e 100644 --- a/archivist/discovery.nim +++ b/archivist/discovery.nim @@ -18,6 +18,7 @@ import pkg/libp2p/[cid, multicodec, routing_record, signed_envelope] import pkg/questionable import pkg/questionable/results import pkg/contractabi/address as ca +import pkg/datastore import pkg/archivistdht/discv5/[routing_table, protocol as discv5] from pkg/nimcrypto import keccak256 @@ -219,7 +220,7 @@ proc new*( bindPort = 0.Port, announceAddrs: openArray[MultiAddress], bootstrapNodes: openArray[SignedPeerRecord] = [], - store: Datastore = SQLiteDatastore.new(Memory).expect("Should not fail!"), + store: Datastore = SQLiteDatastore.new(datastore.Memory).expect("Should not fail!"), ): Discovery = ## Create a new Discovery node instance for the given key and datastore ## diff --git a/archivist/manifest/directory.nim b/archivist/manifest/directory.nim new file mode 100644 index 00000000..0f00f758 --- /dev/null +++ b/archivist/manifest/directory.nim @@ -0,0 +1,186 @@ +## Copyright (c) 2025 Archivist Authors +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +# This module defines DirectoryManifest for folder/directory support + +{.push raises: [].} + +# Note: minprotobuf import is NOT included here - it triggers a serialization +# error when imported into a module that gets pulled into TOML config loading. +# The encode/decode procs that need protobuf are in coders.nim instead. + +import pkg/libp2p/[cid, multihash, multicodec] +import pkg/questionable/results + +import ../errors +import ../units +import ../archivisttypes + +type + DirectoryEntry* = object + name*: string + cid*: Cid + size*: NBytes + isDirectory*: bool + mimetype*: string # Empty string = not set + + DirectoryManifest* = object + entries*: seq[DirectoryEntry] + totalSize*: NBytes + name*: string # Empty string = not set + +############################################################ +# Accessors +############################################################ + +func entries*(self: DirectoryManifest): seq[DirectoryEntry] = + self.entries + +func totalSize*(self: DirectoryManifest): NBytes = + self.totalSize + +func name*(self: DirectoryManifest): string = + self.name + +func filesCount*(self: DirectoryManifest): int = + var count = 0 + for entry in self.entries: + if not entry.isDirectory: + inc count + count + +func dirsCount*(self: DirectoryManifest): int = + var count = 0 + for entry in self.entries: + if entry.isDirectory: + inc count + count + +############################################################ +# Predicates +############################################################ + +func isDirectory*(cid: Cid): ?!bool = + success (DirectoryCodec == ?cid.contentType().mapFailure(ArchivistError)) + +func isDirectory*(mc: MultiCodec): ?!bool = + success mc == DirectoryCodec + +## Note: encode/decode procs are in coders.nim to avoid minprotobuf import +## which triggers a serialization error when imported here. + +############################################################ +# Constructors +############################################################ + +func new*( + T: type DirectoryManifest, + entries: seq[DirectoryEntry] = @[], + name: string = "", +): DirectoryManifest = + var total: NBytes = 0.NBytes + for entry in entries: + total = NBytes(total.int + entry.size.int) + + T( + entries: entries, + totalSize: total, + name: name, + ) + +func new*( + T: type DirectoryEntry, + name: string, + cid: Cid, + size: NBytes, + isDirectory: bool = false, + mimetype: string = "", +): DirectoryEntry = + T( + name: name, + cid: cid, + size: size, + isDirectory: isDirectory, + mimetype: mimetype, + ) + +############################################################ +# String representation +############################################################ + +func `$`*(entry: DirectoryEntry): string = + result = "DirectoryEntry(name: " & entry.name + result &= ", cid: " & $entry.cid + result &= ", size: " & $entry.size + result &= ", isDirectory: " & $entry.isDirectory + if entry.mimetype.len > 0: + result &= ", mimetype: " & entry.mimetype + result &= ")" + +func `$`*(self: DirectoryManifest): string = + result = "DirectoryManifest(" + if self.name.len > 0: + result &= "name: " & self.name & ", " + result &= "totalSize: " & $self.totalSize + result &= ", entries: " & $self.entries.len & " items)" + +############################################################ +# Equality +############################################################ + +func `==`*(a, b: DirectoryEntry): bool = + a.name == b.name and + a.cid == b.cid and + a.size == b.size and + a.isDirectory == b.isDirectory and + a.mimetype == b.mimetype + +func `==`*(a, b: DirectoryManifest): bool = + a.entries == b.entries and + a.totalSize == b.totalSize and + a.name == b.name + +############################################################ +# Helpers +############################################################ + +proc findEntry*(self: DirectoryManifest, name: string, foundEntry: var DirectoryEntry): bool = + ## Find an entry by name in the directory + ## Sets foundEntry and returns true if found, returns false otherwise + for entry in self.entries: + if entry.name == name: + foundEntry = entry + return true + return false + +proc sortByName(entries: var seq[DirectoryEntry]) = + ## Simple insertion sort to avoid importing std/algorithm which triggers + ## a serialization error during TOML config loading + for i in 1 ..< entries.len: + let key = entries[i] + var j = i - 1 + while j >= 0 and entries[j].name > key.name: + entries[j + 1] = entries[j] + dec j + entries[j + 1] = key + +proc sortedEntries*(self: DirectoryManifest): seq[DirectoryEntry] = + ## Return entries sorted: directories first, then files, alphabetically + var dirs: seq[DirectoryEntry] + var files: seq[DirectoryEntry] + + for entry in self.entries: + if entry.isDirectory: + dirs.add(entry) + else: + files.add(entry) + + sortByName(dirs) + sortByName(files) + + result = dirs & files diff --git a/archivist/manifest/directorycoders.nim b/archivist/manifest/directorycoders.nim new file mode 100644 index 00000000..2cd0e646 --- /dev/null +++ b/archivist/manifest/directorycoders.nim @@ -0,0 +1,134 @@ +## Copyright (c) 2025 Archivist Authors +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +# This module implements serialization and deserialization of DirectoryManifest +# It is kept separate from coders.nim to avoid pulling directory types into +# the main compilation path which triggers serialization errors with ref types. + +{.push raises: [].} + +# Use specific libp2p imports instead of the full package +# to avoid serialization conflicts +import pkg/libp2p/cid +import pkg/libp2p/protobuf/minprotobuf +import pkg/questionable/results + +import ./directory +import ../errors +import ../blocktype +import ../units + +proc encode*(self: DirectoryManifest): ?!seq[byte] = + ## Encode the directory manifest into a ``DirectoryCodec`` + ## multicodec container + ## + ## ```protobuf + ## Message DirectoryEntry { + ## string name = 1; + ## bytes cid = 2; + ## uint64 size = 3; + ## bool isDirectory = 4; + ## optional string mimetype = 5; + ## } + ## + ## Message DirectoryManifest { + ## repeated DirectoryEntry entries = 1; + ## uint64 totalSize = 2; + ## optional string name = 3; + ## } + ## ``` + + var pbNode = initProtoBuffer() + + for entry in self.entries: + var pbEntry = initProtoBuffer() + pbEntry.write(1, entry.name) + pbEntry.write(2, entry.cid.data.buffer) + pbEntry.write(3, entry.size.uint64) + pbEntry.write(4, entry.isDirectory.uint32) + if entry.mimetype.len > 0: + pbEntry.write(5, entry.mimetype) + pbEntry.finish() + pbNode.write(1, pbEntry) + + pbNode.write(2, self.totalSize.uint64) + + if self.name.len > 0: + pbNode.write(3, self.name) + + pbNode.finish() + return pbNode.buffer.success + +proc decode*(_: type DirectoryManifest, data: openArray[byte]): ?!DirectoryManifest = + ## Decode a directory manifest from a data blob + ## + + var + pbNode = initProtoBuffer(data) + pbEntries: seq[seq[byte]] + totalSize: uint64 + name: string + entries: seq[DirectoryEntry] + + if pbNode.getRequiredRepeatedField(1, pbEntries).isErr: + return failure("Unable to decode `entries` from directory manifest!") + + if pbNode.getField(2, totalSize).isErr: + return failure("Unable to decode `totalSize` from directory manifest!") + + if pbNode.getField(3, name).isErr: + return failure("Unable to decode `name` from directory manifest!") + + for pbEntryData in pbEntries: + var + pbEntry = initProtoBuffer(pbEntryData) + entryName: string + cidBuf: seq[byte] + size: uint64 + isDir: uint32 + mimetype: string + + if pbEntry.getField(1, entryName).isErr: + return failure("Unable to decode entry `name` from directory manifest!") + + if pbEntry.getField(2, cidBuf).isErr: + return failure("Unable to decode entry `cid` from directory manifest!") + + if pbEntry.getField(3, size).isErr: + return failure("Unable to decode entry `size` from directory manifest!") + + if pbEntry.getField(4, isDir).isErr: + return failure("Unable to decode entry `isDirectory` from directory manifest!") + + if pbEntry.getField(5, mimetype).isErr: + return failure("Unable to decode entry `mimetype` from directory manifest!") + + let cid = ?Cid.init(cidBuf).mapFailure + + entries.add(DirectoryEntry( + name: entryName, + cid: cid, + size: size.NBytes, + isDirectory: isDir != 0, + mimetype: mimetype, # Empty string = not set + )) + + success DirectoryManifest( + entries: entries, + totalSize: totalSize.NBytes, + name: name, # Empty string = not set + ) + +func decode*(_: type DirectoryManifest, blk: Block): ?!DirectoryManifest = + ## Decode a directory manifest from a block + ## + + if not ?blk.cid.isDirectory: + return failure "Cid not a directory codec" + + DirectoryManifest.decode(blk.data) diff --git a/archivist/node.nim b/archivist/node.nim index 6e8da603..716a7f6e 100644 --- a/archivist/node.nim +++ b/archivist/node.nim @@ -91,6 +91,9 @@ func switch*(self: ArchivistNodeRef): Switch = func blockStore*(self: ArchivistNodeRef): BlockStore = return self.networkStore +func networkStore*(self: ArchivistNodeRef): NetworkStore = + return self.networkStore + func engine*(self: ArchivistNodeRef): BlockExcEngine = return self.engine diff --git a/archivist/rest/api.nim b/archivist/rest/api.nim index 64c9be29..ba7258f2 100644 --- a/archivist/rest/api.nim +++ b/archivist/rest/api.nim @@ -11,7 +11,10 @@ import std/sequtils import std/mimetypes -import std/os +import std/tables +import std/strutils +import std/algorithm +from std/json import parseJson, JsonParsingError import pkg/questionable import pkg/questionable/results @@ -28,11 +31,15 @@ import pkg/archivistdht/discv5/spr as spr import ../logutils import ../node +import ../directorynode import ../blocktype import ../conf import ../erasure/erasure import ../manifest +import ../manifest/directory import ../streams/asyncstreamwrapper +import ../streams/rangestream +import ../streams/storestream import ../stores import ../marketplace import ../marketplace/abstractmarketplace @@ -41,6 +48,7 @@ import ../sales/reservations import ./coders import ./json +import ./directoryhtml logScope: topics = "archivist restapi" @@ -48,6 +56,62 @@ logScope: declareCounter(archivist_api_uploads, "archivist API uploads") declareCounter(archivist_api_downloads, "archivist API downloads") +type + ByteRange* = object + start*: int + finish*: Option[int] # None means "to end of file" + +proc parseRangeHeader*(header: string): Option[ByteRange] = + ## Parse an HTTP Range header value like "bytes=0-499" or "bytes=500-" + ## Returns None if the header is missing, malformed, or uses unsupported syntax. + ## Only supports single ranges (not multi-part ranges). + ## + if header.len == 0: + return ByteRange.none + + # Must start with "bytes=" + if not header.startsWith("bytes="): + return ByteRange.none + + let rangeSpec = header[6..^1] # Skip "bytes=" + + # We don't support multi-part ranges (e.g., "bytes=0-100,200-300") + if ',' in rangeSpec: + return ByteRange.none + + let parts = rangeSpec.split('-') + if parts.len != 2: + return ByteRange.none + + # Parse start (required) + if parts[0].len == 0: + # Suffix range like "bytes=-500" (last 500 bytes) - not supported yet + return ByteRange.none + + var start: int + try: + start = parseInt(parts[0]) + except ValueError: + return ByteRange.none + + if start < 0: + return ByteRange.none + + # Parse end (optional) + var finish: Option[int] + if parts[1].len > 0: + try: + let endVal = parseInt(parts[1]) + if endVal < start: + return ByteRange.none + finish = endVal.some + except ValueError: + return ByteRange.none + else: + finish = int.none + + return ByteRange(start: start, finish: finish).some + proc validate(pattern: string, value: string): int {.gcsafe, raises: [Defect].} = 0 @@ -71,10 +135,14 @@ proc isPending(resp: HttpResponseRef): bool = return resp.getResponseState() == HttpResponseState.Empty proc retrieveCid( - node: ArchivistNodeRef, cid: Cid, local: bool = true, resp: HttpResponseRef + node: ArchivistNodeRef, + cid: Cid, + local: bool = true, + resp: HttpResponseRef, + byteRange: Option[ByteRange] = ByteRange.none ): Future[void] {.async: (raises: [CancelledError, HttpWriteError]).} = - ## Download a file from the node in a streaming - ## manner + ## Download a file from the node in a streaming manner. + ## If byteRange is provided, returns partial content (HTTP 206). ## var lpStream: LPStream @@ -93,8 +161,6 @@ proc retrieveCid( await resp.sendBody(error.msg) return - lpStream = stream - # It is ok to fetch again the manifest because it will hit the cache without manifest =? (await node.fetchManifest(cid)), err: error "Failed to fetch manifest", err = err.msg @@ -102,6 +168,14 @@ proc retrieveCid( await resp.sendBody(err.msg) return + # Total size of the content + let totalSize = + if manifest.protected: manifest.originalDatasetSize.int + else: manifest.datasetSize.int + + # Advertise that we support range requests + resp.setHeader("Accept-Ranges", "bytes") + if manifest.mimetype.isSome: resp.setHeader("Content-Type", manifest.mimetype.get()) else: @@ -115,19 +189,51 @@ proc retrieveCid( else: resp.setHeader("Content-Disposition", "attachment") - # For erasure-coded datasets, we need to return the _original_ length; i.e., - # the length of the non-erasure-coded dataset, as that's what we will be - # returning to the client. - let contentLength = - if manifest.protected: manifest.originalDatasetSize else: manifest.datasetSize - resp.setHeader("Content-Length", $(contentLength.int)) + # Determine what we're actually sending + var rangeStart = 0 + var rangeEnd = totalSize - 1 + var isRangeRequest = false + + if byteRange.isSome: + let br = byteRange.get() + rangeStart = br.start + + # Validate range start + if rangeStart >= totalSize: + resp.status = Http416 # Range Not Satisfiable + resp.setHeader("Content-Range", "bytes */" & $totalSize) + await resp.sendBody("Range not satisfiable") + return + + # Calculate range end + if br.finish.isSome: + rangeEnd = min(br.finish.get(), totalSize - 1) + else: + rangeEnd = totalSize - 1 + + isRangeRequest = true + + let contentLength = rangeEnd - rangeStart + 1 + + if isRangeRequest: + resp.status = Http206 + resp.setHeader("Content-Range", "bytes " & $rangeStart & "-" & $rangeEnd & "/" & $totalSize) + + resp.setHeader("Content-Length", $contentLength) + + # Wrap stream for range request if needed + let storeStream = StoreStream(stream) + if isRangeRequest: + lpStream = RangeStream.new(storeStream, rangeStart, rangeEnd) + else: + lpStream = stream await resp.prepare(HttpResponseStreamType.Plain) - while not stream.atEof: + while not lpStream.atEof: var buff = newSeqUninitialized[byte](DefaultBlockSize.int) - len = await stream.readOnce(addr buff[0], buff.len) + len = await lpStream.readOnce(addr buff[0], buff.len) buff.setLen(len) if buff.len <= 0: @@ -146,7 +252,7 @@ proc retrieveCid( if resp.isPending(): await resp.sendBody(exc.msg) finally: - info "Sent bytes", cid = cid, bytes + info "Sent bytes", cid = cid, bytes, isRange = byteRange.isSome if not lpStream.isNil: await lpStream.close() @@ -214,6 +320,19 @@ proc initDataApi(node: ArchivistNodeRef, repoStore: RepoStore, router: var RestR if mimetype.get() != "": let mimetypeVal = mimetype.get() var m = newMimetypes() + # Add formats missing from std/mimetypes + m.register("xml", "application/xml") + m.register("xml", "text/xml") + m.register("flac", "audio/flac") + m.register("mp3", "audio/mpeg") + m.register("opus", "audio/opus") + m.register("m4a", "audio/mp4") + m.register("m4a", "audio/x-m4a") + m.register("aac", "audio/aac") + m.register("wma", "audio/x-ms-wma") + m.register("mkv", "video/x-matroska") + m.register("webm", "video/webm") + m.register("ts", "video/mp2t") let extension = m.getExt(mimetypeVal, "") if extension == "": return RestApiResponse.error( @@ -226,8 +345,13 @@ proc initDataApi(node: ArchivistNodeRef, repoStore: RepoStore, router: var RestR let contentDisposition = request.headers.getString(ContentDispositionHeader) let filename = getFilenameFromContentDisposition(contentDisposition) - if filename.isSome and not isValidFilename(filename.get()): - return RestApiResponse.error(Http422, "The filename is not valid.") + # Validate filename - only block null bytes and literal "." or ".." + # Forward slashes are allowed (relative paths for directory uploads like "Album/track.mp3") + # Backslashes are normalized to forward slashes + if filename.isSome: + let fname = filename.get().replace('\\', '/') + if fname.len == 0 or fname == "." or fname == ".." or '\0' in fname: + return RestApiResponse.error(Http422, "The filename is not valid.") # Here we could check if the extension matches the filename if needed @@ -260,30 +384,354 @@ proc initDataApi(node: ArchivistNodeRef, repoStore: RepoStore, router: var RestR let json = await formatManifestBlocks(node) return RestApiResponse.response($json, contentType = "application/json") + router.api(MethodOptions, "/api/archivist/v1/directory") do( + resp: HttpResponseRef + ) -> RestApiResponse: + if corsOrigin =? allowedOrigin: + resp.setCorsHeaders("POST", corsOrigin) + resp.setHeader( + "Access-Control-Allow-Headers", "content-type, x-pubkey" + ) + + resp.status = Http204 + await resp.sendBody("") + + router.rawApi(MethodPost, "/api/archivist/v1/directory") do() -> RestApiResponse: + ## Finalize a directory from pre-uploaded files + ## + ## Accepts JSON with array of entries, each containing: + ## - path: string (e.g., "folder/file.mp3") + ## - cid: string (CID of already-uploaded file) + ## - size: int (file size in bytes) + ## - mimetype: string (optional) + ## + ## Returns JSON with root directory CID + ## + trace "Handling directory finalize" + + var headers = buildCorsHeaders("POST", allowedOrigin) + + # Parse JSON body + let body = await request.getBody() + var jsonBody: JsonNode + try: + jsonBody = parseJson(cast[string](body)) + except JsonParsingError: + return RestApiResponse.error( + Http400, "Invalid JSON body", headers = headers + ) + + # Validate structure + if not jsonBody.hasKey("entries"): + return RestApiResponse.error( + Http400, "Missing 'entries' array in request body", headers = headers + ) + + let entriesJson = jsonBody["entries"] + if entriesJson.kind != JArray: + return RestApiResponse.error( + Http400, "'entries' must be an array", headers = headers + ) + + if entriesJson.len == 0: + return RestApiResponse.error( + Http400, "No entries provided", headers = headers + ) + + # Parse entries + type InputEntry = object + path: string + cid: Cid + size: NBytes + mimetype: ?string + + var inputEntries: seq[InputEntry] + + for i, entry in entriesJson.elems: + if entry.kind != JObject: + return RestApiResponse.error( + Http400, "Entry " & $i & " must be an object", headers = headers + ) + + if not entry.hasKey("path") or not entry.hasKey("cid"): + return RestApiResponse.error( + Http400, "Entry " & $i & " missing required 'path' or 'cid'", headers = headers + ) + + let pathStr = entry["path"].getStr() + let cidStr = entry["cid"].getStr() + + # Validate and normalize path + var normalPath = pathStr.replace("\\", "/") + while normalPath.len > 0 and normalPath[0] == '/': + normalPath = normalPath[1..^1] + + # Check for directory traversal - ".." must be a path segment, not part of filename + # Valid: "F.R.E.S.H..mp3" (double dot in filename) + # Invalid: "../etc/passwd" or "foo/../bar" + let pathParts = normalPath.split('/') + for part in pathParts: + if part == "..": + return RestApiResponse.error( + Http400, "Invalid path (directory traversal not allowed): " & pathStr, + headers = headers, + ) + + if normalPath.len == 0: + continue + + # Parse CID + without cidVal =? Cid.init(cidStr).mapFailure, err: + return RestApiResponse.error( + Http400, "Entry " & $i & " has invalid CID: " & cidStr, headers = headers + ) + + let size = NBytes(entry.getOrDefault("size").getInt(0)) + let mimetypeOpt = + if entry.hasKey("mimetype") and entry["mimetype"].getStr().len > 0: + entry["mimetype"].getStr().some + else: + string.none + + inputEntries.add(InputEntry( + path: normalPath, + cid: cidVal, + size: size, + mimetype: mimetypeOpt, + )) + + trace "Parsed directory finalize request", entries = inputEntries.len + + # Build directory tree from bottom up + # Group files by their parent directory + type DirNode = ref object + name: string + files: seq[tuple[name: string, cid: Cid, size: NBytes, mimetype: ?string]] + subdirs: Table[string, DirNode] + + var root = DirNode(name: "", subdirs: initTable[string, DirNode]()) + + for entry in inputEntries: + let pathParts = entry.path.split('/') + var current = root + + # Navigate/create directory structure + for i in 0 ..< pathParts.len - 1: + let part = pathParts[i] + if part notin current.subdirs: + current.subdirs[part] = DirNode( + name: part, + subdirs: initTable[string, DirNode](), + ) + current = current.subdirs[part] + + # Add file to current directory + current.files.add(( + name: pathParts[^1], + cid: entry.cid, + size: entry.size, + mimetype: entry.mimetype, + )) + + # If root has exactly one subdir and no files, promote that subdir to root + # This makes "MyAlbum/track1.mp3" become a directory named "MyAlbum" at root + # instead of having an anonymous root containing "MyAlbum" + while root.subdirs.len == 1 and root.files.len == 0: + for name, subdir in root.subdirs.pairs: + root = subdir + break + + # Recursively create directory manifests from leaves to root + proc buildDirManifest(dirNode: DirNode): Future[?!Cid] {.async.} = + var entries: seq[DirectoryEntry] + + # First, process subdirectories (they need their CIDs computed first) + for name, subdir in dirNode.subdirs.pairs: + without subdirCid =? (await buildDirManifest(subdir)), err: + return failure(err) + + # We need the total size of the subdirectory + without subdirManifest =? ( + await fetchDirectoryManifest(node.networkStore, subdirCid) + ), err: + return failure(err) + + entries.add(DirectoryEntry.new( + name = name, + cid = subdirCid, + size = subdirManifest.totalSize, + isDirectory = true, + )) + + # Add files + for f in dirNode.files: + entries.add(DirectoryEntry.new( + name = f.name, + cid = f.cid, + size = f.size, + isDirectory = false, + mimetype = if f.mimetype.isSome: f.mimetype.unsafeGet() else: "", + )) + + # Sort entries: directories first, then files, alphabetically + entries.sort(proc(a, b: DirectoryEntry): int = + if a.isDirectory and not b.isDirectory: + return -1 + elif not a.isDirectory and b.isDirectory: + return 1 + else: + return cmp(a.name, b.name) + ) + + let dirManifest = DirectoryManifest.new( + entries = entries, + name = dirNode.name, + ) + + without blk =? (await storeDirectoryManifest(node.networkStore, dirManifest)), err: + return failure(err) + + return blk.cid.success + + without rootCid =? (await buildDirManifest(root)), err: + error "Error building directory manifest", exc = err.msg + return RestApiResponse.error(Http500, err.msg, headers = headers) + + without rootDir =? (await fetchDirectoryManifest(node.networkStore, rootCid)), err: + return RestApiResponse.error(Http500, err.msg, headers = headers) + + archivist_api_uploads.inc() + + # Build JSON response directly (RestDirectoryUploadResponse causes serialization issues) + var responseJson = newJObject() + responseJson["cid"] = %rootCid + responseJson["totalSize"] = %(rootDir.totalSize.int) + responseJson["filesCount"] = %rootDir.filesCount + return RestApiResponse.response( + $responseJson, contentType = "application/json", headers = headers + ) + router.api(MethodOptions, "/api/archivist/v1/data/{cid}") do( cid: Cid, resp: HttpResponseRef ) -> RestApiResponse: if corsOrigin =? allowedOrigin: - resp.setCorsHeaders("GET,DELETE", corsOrigin) + resp.setCorsHeaders("GET,HEAD,DELETE", corsOrigin) resp.status = Http204 await resp.sendBody("") + router.api(MethodHead, "/api/archivist/v1/data/{cid}") do( + cid: Cid, resp: HttpResponseRef + ) -> RestApiResponse: + ## HEAD request - returns headers without body + ## Used to check content type and size before fetching + var headers = buildCorsHeaders("HEAD", allowedOrigin) + + if cid.isErr: + return RestApiResponse.error(Http400, $cid.error(), headers = headers) + + let cidVal = cid.get() + + if corsOrigin =? allowedOrigin: + resp.setCorsHeaders("HEAD", corsOrigin) + + # Check if this is a directory manifest + without isDir =? cidVal.isDirectory, err: + return RestApiResponse.error(Http400, err.msg, headers = headers) + + if isDir: + # Directory - check if it exists + without directory =? (await fetchDirectoryManifest(node.networkStore, cidVal)), err: + return RestApiResponse.error(Http404, err.msg, headers = headers) + + # Check Accept header to determine response format + let acceptHeader = request.headers.getString("Accept") + if "application/json" in acceptHeader: + resp.setHeader("Content-Type", "application/json") + else: + resp.setHeader("Content-Type", "text/html; charset=utf-8") + resp.setHeader("Content-Length", $(directory.totalSize.int)) + resp.status = Http200 + await resp.sendBody("") + + else: + # Regular file - get manifest for headers + without manifest =? (await node.fetchManifest(cidVal)), err: + return RestApiResponse.error(Http404, err.msg, headers = headers) + + if manifest.mimetype.isSome: + resp.setHeader("Content-Type", manifest.mimetype.get()) + else: + resp.setHeader("Content-Type", "application/octet-stream") + + let contentLength = + if manifest.protected: manifest.originalDatasetSize else: manifest.datasetSize + resp.setHeader("Content-Length", $(contentLength.int)) + + if manifest.filename.isSome: + resp.setHeader("Content-Disposition", + "attachment; filename=\"" & manifest.filename.get() & "\"") + + resp.status = Http200 + await resp.sendBody("") + router.api(MethodGet, "/api/archivist/v1/data/{cid}") do( cid: Cid, resp: HttpResponseRef ) -> RestApiResponse: var headers = buildCorsHeaders("GET", allowedOrigin) - ## Download a file from the local node in a streaming - ## manner + ## Download a file from the local node in a streaming manner, + ## or browse a directory manifest (returning HTML or JSON) if cid.isErr: return RestApiResponse.error(Http400, $cid.error(), headers = headers) + let cidVal = cid.get() + if corsOrigin =? allowedOrigin: resp.setCorsHeaders("GET", corsOrigin) resp.setHeader("Access-Control-Headers", "X-Requested-With") - await node.retrieveCid(cid.get(), local = true, resp = resp) + # Check if this is a directory manifest + without isDir =? cidVal.isDirectory, err: + return RestApiResponse.error(Http400, err.msg, headers = headers) + + if isDir: + # This is a directory - return HTML or JSON listing + without directory =? (await fetchDirectoryManifest(node.networkStore, cidVal)), err: + return RestApiResponse.error(Http404, err.msg, headers = headers) + + # Check Accept header to determine response format + let acceptHeader = request.headers.getString("Accept") + + if "text/html" in acceptHeader or "text/*" in acceptHeader or "*/*" in acceptHeader: + # Return HTML directory listing + let html = generateDirectoryHtml(directory, cidVal) + return RestApiResponse.response(html, contentType = "text/html; charset=utf-8") + else: + # Return JSON (build directly to avoid serialization issues with RestDirectory) + var entriesJson = newJArray() + for entry in directory.entries: + var entryJson = newJObject() + entryJson["name"] = %entry.name + entryJson["cid"] = %($entry.cid) + entryJson["size"] = %(entry.size.int) + entryJson["isDirectory"] = %entry.isDirectory + if entry.mimetype.len > 0: + entryJson["mimetype"] = %entry.mimetype + entriesJson.add(entryJson) + + var json = newJObject() + json["cid"] = %($cidVal) + if directory.name.len > 0: + json["name"] = %directory.name + json["totalSize"] = %(directory.totalSize.int) + json["entries"] = entriesJson + return RestApiResponse.response($json, contentType = "application/json", headers = headers) + + # Regular file - stream it (with optional range support) + let rangeHeader = request.headers.getString("Range") + let byteRange = parseRangeHeader(rangeHeader) + await node.retrieveCid(cidVal, local = true, resp = resp, byteRange = byteRange) router.api(MethodDelete, "/api/archivist/v1/data/{cid}") do( cid: Cid, resp: HttpResponseRef @@ -344,7 +792,29 @@ proc initDataApi(node: ArchivistNodeRef, repoStore: RepoStore, router: var RestR resp.setHeader("Access-Control-Headers", "X-Requested-With") resp.setHeader("Access-Control-Expose-Headers", "Content-Disposition") - await node.retrieveCid(cid.get(), local = false, resp = resp) + let rangeHeader = request.headers.getString("Range") + let byteRange = parseRangeHeader(rangeHeader) + await node.retrieveCid(cid.get(), local = false, resp = resp, byteRange = byteRange) + + router.api(MethodHead, "/api/archivist/v1/data/{cid}/network/stream") do( + cid: Cid, resp: HttpResponseRef + ) -> RestApiResponse: + ## HEAD request for network stream - returns headers without body + ## + var headers = buildCorsHeaders("HEAD", allowedOrigin) + + if cid.isErr: + return RestApiResponse.error(Http400, $cid.error(), headers = headers) + + if corsOrigin =? allowedOrigin: + resp.setCorsHeaders("HEAD", corsOrigin) + + # For streaming endpoint, just return 200 with audio content type + # The actual content-length isn't known without fetching + resp.setHeader("Content-Type", "application/octet-stream") + resp.setHeader("Accept-Ranges", "bytes") + resp.status = Http200 + await resp.sendBody("") router.api(MethodGet, "/api/archivist/v1/data/{cid}/network/manifest") do( cid: Cid, resp: HttpResponseRef @@ -374,6 +844,93 @@ proc initDataApi(node: ArchivistNodeRef, repoStore: RepoStore, router: var RestR ) return RestApiResponse.response($json, contentType = "application/json") + # Path resolution within directories + router.api(MethodGet, "/api/archivist/v1/data/{cid}/path") do( + cid: Cid, p: Option[string], resp: HttpResponseRef + ) -> RestApiResponse: + ## Access a file or subdirectory within a directory by path + ## Use query parameter ?p=images/logo.png + ## + var headers = buildCorsHeaders("GET", allowedOrigin) + + if cid.isErr: + return RestApiResponse.error(Http400, $cid.error(), headers = headers) + + let cidVal = cid.get() + + # Get path from query parameter (Option[Result[string, cstring]]) + var pathStr = "" + if pOpt =? p: + if pRes =? pOpt: + pathStr = pRes + let pathParts = if pathStr.len > 0: pathStr.split('/') else: @[] + + if pathParts.len == 0: + # Redirect to directory listing + return RestApiResponse.redirect( + Http307, "/api/archivist/v1/data/" & $cidVal + ) + + # Check if this is a directory manifest + without isDir =? cidVal.isDirectory, err: + return RestApiResponse.error(Http400, err.msg, headers = headers) + + if not isDir: + return RestApiResponse.error( + Http400, "CID is not a directory manifest", headers = headers + ) + + without directory =? (await fetchDirectoryManifest(node.networkStore, cidVal)), err: + return RestApiResponse.error(Http404, err.msg, headers = headers) + + # Resolve the path + var currentDir = directory + var currentCid = cidVal + + for i, part in pathParts: + if part == "": + continue + + var foundEntry: DirectoryEntry + if not currentDir.findEntry(part, foundEntry): + return RestApiResponse.error( + Http404, "Path not found: " & part, headers = headers + ) + + if i == pathParts.high: + # This is the last path component + if foundEntry.isDirectory: + # Redirect to directory listing + return RestApiResponse.redirect( + Http307, "/api/archivist/v1/data/" & $foundEntry.cid + ) + else: + # Serve the file (with optional range support) + if corsOrigin =? allowedOrigin: + resp.setCorsHeaders("GET", corsOrigin) + resp.setHeader("Access-Control-Headers", "X-Requested-With") + + resp.setHeader("Access-Control-Expose-Headers", "Content-Disposition") + let rangeHeader = request.headers.getString("Range") + let byteRange = parseRangeHeader(rangeHeader) + await node.retrieveCid(foundEntry.cid, local = true, resp = resp, byteRange = byteRange) + return RestApiResponse.response("") + else: + # Navigate into subdirectory + if not foundEntry.isDirectory: + return RestApiResponse.error( + Http400, "Path component is not a directory: " & part, headers = headers + ) + + without subDir =? (await fetchDirectoryManifest(node.networkStore, foundEntry.cid)), err: + return RestApiResponse.error(Http404, err.msg, headers = headers) + + currentDir = subDir + currentCid = foundEntry.cid + + # Should not reach here + return RestApiResponse.error(Http500, "Unexpected error", headers = headers) + proc initSalesApi(node: ArchivistNodeRef, router: var RestRouter) = let allowedOrigin = router.allowedOrigin diff --git a/archivist/rest/directoryhtml.nim b/archivist/rest/directoryhtml.nim new file mode 100644 index 00000000..bba5fdb3 --- /dev/null +++ b/archivist/rest/directoryhtml.nim @@ -0,0 +1,489 @@ +## Copyright (c) 2025 Archivist Authors +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +# HTML template for directory listing with Archivist branding + +{.push raises: [].} + +import std/strutils +import std/strformat +import std/options + +import pkg/libp2p/cid + +import ../manifest/directory +import ../units + +const ArchivistLogoSvg* = """ + + + + + + +""" + +const DirectoryListingCss* = """ +:root { + --bg-primary: #0d1117; + --bg-secondary: #161b22; + --bg-tertiary: #21262d; + --text-primary: #c9d1d9; + --text-secondary: #8b949e; + --accent: #00ff41; + --accent-dim: #00cc33; + --link: #58a6ff; + --link-hover: #79c0ff; + --border: #30363d; +} + +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif; + background-color: var(--bg-primary); + color: var(--text-primary); + line-height: 1.5; + min-height: 100vh; +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 0 16px; +} + +header { + background-color: var(--bg-secondary); + border-bottom: 1px solid var(--border); + padding: 16px 0; + position: sticky; + top: 0; + z-index: 100; +} + +.header-content { + display: flex; + align-items: center; + gap: 16px; +} + +.logo { + display: flex; + align-items: center; + gap: 8px; + text-decoration: none; + color: var(--accent); + font-weight: 600; + font-size: 18px; +} + +.logo svg { + flex-shrink: 0; +} + +.breadcrumb { + display: flex; + align-items: center; + gap: 4px; + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + font-size: 14px; + color: var(--text-secondary); + flex-wrap: wrap; +} + +.breadcrumb a { + color: var(--link); + text-decoration: none; +} + +.breadcrumb a:hover { + text-decoration: underline; +} + +.breadcrumb .separator { + color: var(--text-secondary); +} + +main { + padding: 24px 0; +} + +.dir-info { + background-color: var(--bg-secondary); + border: 1px solid var(--border); + border-radius: 6px; + padding: 16px; + margin-bottom: 16px; +} + +.dir-info h1 { + font-size: 20px; + font-weight: 600; + margin-bottom: 8px; + display: flex; + align-items: center; + gap: 8px; +} + +.dir-meta { + font-size: 13px; + color: var(--text-secondary); + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; +} + +.dir-meta .cid { + word-break: break-all; +} + +.dir-meta a { + color: var(--link); + text-decoration: none; +} + +.dir-meta a:hover { + text-decoration: underline; +} + +.file-list { + background-color: var(--bg-secondary); + border: 1px solid var(--border); + border-radius: 6px; + overflow: hidden; +} + +.file-list-header { + display: grid; + grid-template-columns: 1fr 200px 100px; + gap: 16px; + padding: 12px 16px; + background-color: var(--bg-tertiary); + border-bottom: 1px solid var(--border); + font-size: 12px; + font-weight: 600; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.file-row { + display: grid; + grid-template-columns: 1fr 200px 100px; + gap: 16px; + padding: 10px 16px; + border-bottom: 1px solid var(--border); + font-size: 14px; + transition: background-color 0.1s; +} + +.file-row:last-child { + border-bottom: none; +} + +.file-row:hover { + background-color: var(--bg-tertiary); +} + +.file-name { + display: flex; + align-items: center; + gap: 8px; + min-width: 0; +} + +.file-name a { + color: var(--link); + text-decoration: none; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.file-name a:hover { + text-decoration: underline; +} + +.file-icon { + flex-shrink: 0; + font-size: 16px; +} + +.file-cid { + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + font-size: 12px; + color: var(--text-secondary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.file-cid a { + color: var(--text-secondary); + text-decoration: none; +} + +.file-cid a:hover { + color: var(--link); +} + +.file-size { + text-align: right; + color: var(--text-secondary); + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + font-size: 13px; +} + +.parent-link { + color: var(--text-secondary) !important; +} + +.empty-dir { + padding: 48px 16px; + text-align: center; + color: var(--text-secondary); +} + +footer { + padding: 24px 0; + text-align: center; + color: var(--text-secondary); + font-size: 12px; + border-top: 1px solid var(--border); + margin-top: 48px; +} + +footer a { + color: var(--accent); + text-decoration: none; +} + +footer a:hover { + text-decoration: underline; +} + +@media (max-width: 768px) { + .file-list-header, + .file-row { + grid-template-columns: 1fr 80px; + } + + .file-cid { + display: none; + } + + .header-content { + flex-direction: column; + align-items: flex-start; + } +} +""" + +proc formatSize*(bytes: NBytes): string = + ## Format bytes into human-readable size + let b = bytes.int64 + if b < 1024: + return $b & " B" + elif b < 1024 * 1024: + return fmt"{b.float / 1024.0:.1f} KB" + elif b < 1024 * 1024 * 1024: + return fmt"{b.float / (1024.0 * 1024.0):.1f} MB" + else: + return fmt"{b.float / (1024.0 * 1024.0 * 1024.0):.2f} GB" + +proc getFileIcon*(entry: DirectoryEntry): string = + ## Get appropriate icon for file type + if entry.isDirectory: + return "📁" # folder icon + + if entry.mimetype.len == 0: + return "📄" # generic file icon + + let mime = entry.mimetype + + if mime.startsWith("image/"): + return "📷" # camera/image icon + elif mime.startsWith("video/"): + return "🎥" # video icon + elif mime.startsWith("audio/"): + return "🎵" # music icon + elif mime.startsWith("text/"): + return "📝" # text icon + elif mime == "application/pdf": + return "📕" # book icon + elif mime == "application/zip" or mime == "application/x-tar" or + mime == "application/gzip" or mime == "application/x-7z-compressed": + return "📦" # archive icon + elif mime == "application/json" or mime == "application/xml": + return "📄" # code/file icon + else: + return "📄" # generic file icon + +proc escapeHtml*(s: string): string = + ## Escape HTML special characters + result = s + result = result.replace("&", "&") + result = result.replace("<", "<") + result = result.replace(">", ">") + result = result.replace("\"", """) + result = result.replace("'", "'") + +proc truncateCid*(cidStr: string, maxLen: int = 16): string = + ## Truncate CID for display + if cidStr.len <= maxLen: + return cidStr + let halfLen = (maxLen - 3) div 2 + return cidStr[0 ..< halfLen] & "..." & cidStr[^halfLen .. ^1] + +proc generateDirectoryHtml*( + directory: DirectoryManifest, + dirCid: Cid, + basePath: string = "", + parentCid: Option[Cid] = none(Cid), +): string = + ## Generate HTML page for directory listing + ## + ## basePath: the path within the directory (e.g., "/subdir/nested") + ## parentCid: CID of parent directory for ".." link + + let + cidStr = $dirCid + dirName = if directory.name.len > 0: directory.name else: cidStr[0 ..< 12] & "..." + escapedDirName = escapeHtml(dirName) + pathParts = if basePath.len > 0: basePath.strip(chars = {'/'}).split('/') else: @[] + + var html = fmt""" + + + + + Index of {escapedDirName} - Archivist + + + +
+
+
+ + +
+
+
+ +
+
+
+

📁 Index of {escapedDirName}

+
+ CID: {cidStr} +  •  + {directory.entries.len} items +  •  + {formatSize(directory.totalSize)} +
+
+ +
+
+ Name + CID + Size +
+""" + + # Add parent directory link if we have a parent + if parentCid.isSome: + let parentCidStr = $parentCid.unsafeGet() + html &= fmt"""
+
+ 📁 + .. +
+ +
-
+
+""" + elif basePath.len > 0: + # Parent is same directory, just go up one path level + let parentPath = if pathParts.len > 1: + "/" & pathParts[0 ..< ^1].join("/") + else: + "" + html &= fmt"""
+
+ 📁 + .. +
+
-
+
-
+
+""" + + # Add sorted entries (directories first, then files) + let sortedEntries = directory.sortedEntries() + + if sortedEntries.len == 0: + html &= """
This directory is empty
+""" + else: + for entry in sortedEntries: + let + entryCidStr = $entry.cid + entryIcon = getFileIcon(entry) + entryName = escapeHtml(entry.name) + entrySize = formatSize(entry.size) + entryLink = fmt"/api/archivist/v1/data/{entryCidStr}" + displayName = if entry.isDirectory: entryName & "/" else: entryName + + html &= fmt"""
+
+ {entryIcon} + {displayName} +
+ +
{entrySize}
+
+""" + + html &= fmt"""
+
+
+ + + +""" + + return html diff --git a/archivist/sales/reservations.nim b/archivist/sales/reservations.nim index 545876f0..0f3dbe12 100644 --- a/archivist/sales/reservations.nim +++ b/archivist/sales/reservations.nim @@ -707,14 +707,29 @@ proc findAvailability*( return none Availability for item in storables.items: - if bytesResult =? (await item) and bytes =? bytesResult and - availability =? Availability.fromJson(bytes): - if availability.enabled and size <= availability.freeSize and - duration <= availability.duration and - collateralPerByte <= availability.maxCollateralPerByte and - pricePerBytePerSecond >= availability.minPricePerBytePerSecond and - (availability.until == 0 or availability.until >= validUntil): - trace "availability matched", + if bytesResult =? (await item): + if bytes =? bytesResult and availability =? Availability.fromJson(bytes): + if availability.enabled and size <= availability.freeSize and + duration <= availability.duration and + collateralPerByte <= availability.maxCollateralPerByte and + pricePerBytePerSecond >= availability.minPricePerBytePerSecond and + (availability.until == 0 or availability.until >= validUntil): + trace "availability matched", + id = availability.id, + enabled = availability.enabled, + size, + availFreeSize = availability.freeSize, + duration, + availDuration = availability.duration, + pricePerBytePerSecond, + availMinPricePerBytePerSecond = availability.minPricePerBytePerSecond, + collateralPerByte, + availMaxCollateralPerByte = availability.maxCollateralPerByte, + until = availability.until + + return some availability + + trace "availability did not match", id = availability.id, enabled = availability.enabled, size, @@ -726,18 +741,3 @@ proc findAvailability*( collateralPerByte, availMaxCollateralPerByte = availability.maxCollateralPerByte, until = availability.until - - return some availability - - trace "availability did not match", - id = availability.id, - enabled = availability.enabled, - size, - availFreeSize = availability.freeSize, - duration, - availDuration = availability.duration, - pricePerBytePerSecond, - availMinPricePerBytePerSecond = availability.minPricePerBytePerSecond, - collateralPerByte, - availMaxCollateralPerByte = availability.maxCollateralPerByte, - until = availability.until diff --git a/archivist/slots/builder/builder.nim b/archivist/slots/builder/builder.nim index a3784594..19616dac 100644 --- a/archivist/slots/builder/builder.nim +++ b/archivist/slots/builder/builder.nim @@ -208,8 +208,12 @@ proc getCellHashes*[SomeTree, SomeHash]( pos = i trace "Getting block CID for tree at index" - without (_, tree) =? (await self.buildBlockTree(blkIdx, i)) and digest =? tree.root, - e: + + without (_, tree) =? (await self.buildBlockTree(blkIdx, i)), e: + error "Failed to get block CID for tree at index", e = e.msg + return failure(e) + + without digest =? tree.root, e: error "Failed to get block CID for tree at index", e = e.msg return failure(e) diff --git a/archivist/stores/repostore/store.nim b/archivist/stores/repostore/store.nim index c4a93591..27fc23bd 100644 --- a/archivist/stores/repostore/store.nim +++ b/archivist/stores/repostore/store.nim @@ -313,12 +313,12 @@ method listBlocks*( proc next(): Future[?!Cid] {.async: (raises: [CancelledError]).} = await idleAsync() - if pair =? (await queryIter.next()) and cid =? pair.key: - doAssert pair.data.len == 0 - trace "Retrieved record from repo", cid - return Cid.init(cid.value).mapFailure - else: - return Cid.failure("No or invalid Cid") + if pair =? (await queryIter.next()): + if cid =? pair.key: + doAssert pair.data.len == 0 + trace "Retrieved record from repo", cid + return Cid.init(cid.value).mapFailure + return Cid.failure("No or invalid Cid") proc isFinished(): bool = queryIter.finished diff --git a/archivist/streams/rangestream.nim b/archivist/streams/rangestream.nim new file mode 100644 index 00000000..c1d026ad --- /dev/null +++ b/archivist/streams/rangestream.nim @@ -0,0 +1,84 @@ +## Copyright (c) 2025 Archivist Authors +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +## RangeStream - A stream wrapper that provides access to a byte range +## of an underlying SeekableStream. Used for HTTP Range request support. + +{.push raises: [].} + +import pkg/chronos +import pkg/libp2p/stream/lpstream + +import ./seekablestream +import ../logutils + +export seekablestream + +logScope: + topics = "archivist rangestream" + +const RangeStreamTrackerName* = "RangeStream" + +type + RangeStream* = ref object of LPStream + source*: SeekableStream + rangeStart*: int # First byte of the range (inclusive) + rangeEnd*: int # Last byte of the range (inclusive) + bytesRemaining*: int # Bytes left to read in this range + +method initStream*(s: RangeStream) = + if s.objName.len == 0: + s.objName = RangeStreamTrackerName + procCall LPStream(s).initStream() + +proc new*( + T: type RangeStream, + source: SeekableStream, + rangeStart: int, + rangeEnd: int +): RangeStream = + ## Create a RangeStream that reads bytes [rangeStart, rangeEnd] from source. + ## Both bounds are inclusive (per HTTP Range semantics). + ## + let rangeLen = rangeEnd - rangeStart + 1 + result = RangeStream( + source: source, + rangeStart: rangeStart, + rangeEnd: rangeEnd, + bytesRemaining: rangeLen + ) + # Seek the underlying stream to the start of our range + source.setPos(rangeStart) + result.initStream() + +proc rangeLength*(self: RangeStream): int = + ## Total length of this range + self.rangeEnd - self.rangeStart + 1 + +method atEof*(self: RangeStream): bool = + self.bytesRemaining <= 0 or self.source.atEof + +method readOnce*( + self: RangeStream, pbytes: pointer, nbytes: int +): Future[int] {.async: (raises: [CancelledError, LPStreamError]).} = + ## Read up to nbytes, but never more than bytesRemaining in our range. + ## + if self.atEof: + raise newLPStreamEOFError() + + # Don't read past our range boundary + let toRead = min(nbytes, self.bytesRemaining) + let bytesRead = await self.source.readOnce(pbytes, toRead) + + self.bytesRemaining -= bytesRead + return bytesRead + +method closeImpl*(self: RangeStream) {.async: (raises: []).} = + trace "Closing RangeStream" + await self.source.close() + await procCall LPStream(self).closeImpl() diff --git a/archivist/utils.nim b/archivist/utils.nim index dddecaeb..02d7ce9f 100644 --- a/archivist/utils.nim +++ b/archivist/utils.nim @@ -41,24 +41,6 @@ func roundUp*[T](a, b: T): T = proc orElse*[A](a, b: Option[A]): Option[A] = if (a.isSome()): a else: b -template findIt*(s, pred: untyped): untyped = - ## Returns the index of the first object matching a predicate, or -1 if no - ## object matches it. - runnableExamples: - type MyType = object - att: int - - var s = @[MyType(att: 1), MyType(att: 2), MyType(att: 3)] - doAssert s.findIt(it.att == 2) == 1 - doAssert s.findIt(it.att == 4) == -1 - - var index = -1 - for i, it {.inject.} in enumerate(items(s)): - if pred: - index = i - break - index - when not declared(parseDuration): # Odd code formatting to minimize diff v. mainLine const Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'} diff --git a/archivist/validationconfig.nim b/archivist/validationconfig.nim index 3e21c4fa..11042eaf 100644 --- a/archivist/validationconfig.nim +++ b/archivist/validationconfig.nim @@ -3,7 +3,7 @@ import pkg/questionable import pkg/questionable/results type - ValidationGroups* = range[2 .. 65535] + ValidationGroups* = int MaxSlots* = int ValidationConfig* = object maxSlots: MaxSlots @@ -19,9 +19,13 @@ func init*( if maxSlots < 0: return failure "The value of maxSlots must be greater than " & fmt"or equal to 0! (got: {maxSlots})" - if validationGroups =? groups and groupIndex >= uint16(validationGroups): - return failure "The value of the group index must be less than " & - fmt"validation groups! (got: {groupIndex = }, " & fmt"groups = {validationGroups})" + if validationGroups =? groups: + if validationGroups < 2: + return failure "number of validation groups should be at least 2" + if groupIndex >= uint16(validationGroups): + return failure "The value of the group index must be less than " & + fmt"validation groups! (got: {groupIndex = }, " & + fmt"groups = {validationGroups})" success ValidationConfig(maxSlots: maxSlots, groups: groups, groupIndex: groupIndex) diff --git a/docker/archivist.Dockerfile b/docker/archivist.Dockerfile index 24be399f..f178eb81 100644 --- a/docker/archivist.Dockerfile +++ b/docker/archivist.Dockerfile @@ -1,5 +1,5 @@ # Variables -ARG BUILDER=nimlang/nim:2.2.4-ubuntu-regular +ARG BUILDER=nimlang/nim:2.2.6-ubuntu-regular ARG IMAGE=ubuntu:24.04 ARG RUST_VERSION=${RUST_VERSION:-1.79.0} ARG BUILD_HOME=/src diff --git a/nimble.lock b/nimble.lock index 868268c4..65f50fc0 100644 --- a/nimble.lock +++ b/nimble.lock @@ -373,7 +373,7 @@ "questionable": { "version": "0.10.15", "vcsRevision": "82d90b67bcfb7f2e918b61dace2ff1a4ced60935", - "url": "https://github.com/logos-storage/questionable", + "url": "https://github.com/codex-storage/questionable", "downloadMethod": "git", "dependencies": [], "checksums": { @@ -419,8 +419,8 @@ } }, "archivistdht": { - "version": "0.7.1", - "vcsRevision": "a2669b0351613da6c55684f897c119db7cbd30ad", + "version": "0.7.2", + "vcsRevision": "d44be375ed8417d7bf0b25499f3ad209c9db5c56", "url": "https://github.com/durability-labs/archivist-dht", "downloadMethod": "git", "dependencies": [ @@ -438,19 +438,19 @@ "questionable" ], "checksums": { - "sha1": "87f688ecbb700c29f4775b356d2f9a9b983d2236" + "sha1": "de73c9d1b1ebc59223be7be905bdc2397522d437" } }, "circom_witnessgen": { - "version": "0.1.3", - "vcsRevision": "53066696877e238c50ac85dfe8741f43f8d39f08", + "version": "0.1.4", + "vcsRevision": "15289d5b896cea5df7cc91e2c9bec054238bc74a", "url": "https://github.com/durability-labs/circom-witnessgen", "downloadMethod": "git", "dependencies": [ "constantine" ], "checksums": { - "sha1": "260bbffa9db87b53816d1b9b6a45fe9bf753757d" + "sha1": "34537ae8d63ba9c08593e7482e362d5e30001108" } }, "circomcompat": { @@ -465,7 +465,7 @@ }, "confutils": { "version": "0.1.0", - "vcsRevision": "eec84f4b5e8ea322f42b9c4008e549825c310a8d", + "vcsRevision": "20ae94b46852e99d9d548ca1af9e7cc1777b1272", "url": "https://github.com/status-im/nim-confutils", "downloadMethod": "git", "dependencies": [ @@ -473,7 +473,7 @@ "serialization" ], "checksums": { - "sha1": "a26438274bd33da733aa06ef50090c61e51403f9" + "sha1": "2057604a2a27ca44ea2d0bf7a26bd1b7a616af97" } }, "contractabi": { @@ -614,8 +614,8 @@ } }, "groth16": { - "version": "0.1.0", - "vcsRevision": "0adcf64b7b76556227fec09297e3beef4b9c43df", + "version": "0.1.1", + "vcsRevision": "e86837330525b60c9198d7cee1878feae9110080", "url": "https://github.com/durability-labs/nim-groth16", "downloadMethod": "git", "dependencies": [ @@ -623,7 +623,7 @@ "constantine" ], "checksums": { - "sha1": "8350e6e28bf12dc20ff69e76a127f625260113c1" + "sha1": "a9cadf8dfd6a23d0f3c3871e93c427089c65aa06" } }, "leopard": { @@ -667,8 +667,8 @@ } }, "presto": { - "version": "0.1.0", - "vcsRevision": "92b1c7ff141e6920e1f8a98a14c35c1fa098e3be", + "version": "0.1.1", + "vcsRevision": "d66043dd7ede146442e6c39720c76a20bde5225f", "url": "https://github.com/status-im/nim-presto", "downloadMethod": "git", "dependencies": [ @@ -679,7 +679,7 @@ "stew" ], "checksums": { - "sha1": "ea53609c6b79e4c63e36aba5fe78b291ad0df4da" + "sha1": "8df97c45683abe2337bdff43b844c4fbcc124ca2" } }, "toml_serialization": { diff --git a/tests/archivist/testvalidation.nim b/tests/archivist/testvalidation.nim index 5e6583cd..fc679705 100644 --- a/tests/archivist/testvalidation.nim +++ b/tests/archivist/testvalidation.nim @@ -69,6 +69,12 @@ asyncchecksuite "validation": test "the list of slots that it's monitoring is empty initially": check validation.slots.len == 0 + test "initializing ValidationConfig fails when groups < 2": + for groups in [int.low, -1, 0, 1]: + let config = ValidationConfig.init(maxSlots, groups.some, groupIndex) + check config.isFailure + check config.error.msg == "number of validation groups should be at least 2" + for (validationGroups, groupIndex) in [(100, 100'u16), (100, 101'u16)]: test "initializing ValidationConfig fails when groupIndex is " & "greater than or equal to validationGroups " & diff --git a/tests/archivist/utils/testutils.nim b/tests/archivist/utils/testutils.nim index c6875632..5b689472 100644 --- a/tests/archivist/utils/testutils.nim +++ b/tests/archivist/utils/testutils.nim @@ -2,25 +2,6 @@ import pkg/unittest2 import pkg/archivist/utils -suite "findIt": - setup: - type AnObject = object - attribute1*: int - - var objList = - @[ - AnObject(attribute1: 1), - AnObject(attribute1: 3), - AnObject(attribute1: 5), - AnObject(attribute1: 3), - ] - - test "should retur index of first object matching predicate": - assert objList.findIt(it.attribute1 == 3) == 1 - - test "should return -1 when no object matches predicate": - assert objList.findIt(it.attribute1 == 15) == -1 - suite "parseDuration": test "should parse durations": var res: Duration # caller must still know if 'b' refers to bytes|bits