From 50d06c9bd9791b32f9e7b40d423278f1144e348c Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:14:08 +0000 Subject: [PATCH 01/15] Move `Range` header implementation from `cache` to `shared` We'll need this for R2 too --- packages/tre/src/plugins/cache/gateway.ts | 2 +- packages/tre/src/plugins/cache/index.ts | 1 - packages/tre/src/plugins/shared/index.ts | 1 + packages/tre/src/plugins/{cache => shared}/range.ts | 0 packages/tre/test/plugins/{cache => shared}/range.spec.ts | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename packages/tre/src/plugins/{cache => shared}/range.ts (100%) rename packages/tre/test/plugins/{cache => shared}/range.spec.ts (100%) diff --git a/packages/tre/src/plugins/cache/gateway.ts b/packages/tre/src/plugins/cache/gateway.ts index 54c1bfa33..3e8004ac8 100644 --- a/packages/tre/src/plugins/cache/gateway.ts +++ b/packages/tre/src/plugins/cache/gateway.ts @@ -7,8 +7,8 @@ import { Headers, HeadersInit, Request, Response, fetch } from "../../http"; import { Clock, Log, millisToSeconds } from "../../shared"; import { Storage } from "../../storage"; import { isSitesRequest } from "../kv"; +import { _getRangeResponse } from "../shared"; import { CacheMiss, PurgeFailure, StorageFailure } from "./errors"; -import { _getRangeResponse } from "./range"; interface CacheMetadata { headers: string[][]; diff --git a/packages/tre/src/plugins/cache/index.ts b/packages/tre/src/plugins/cache/index.ts index d786a6bff..40ea0e5fd 100644 --- a/packages/tre/src/plugins/cache/index.ts +++ b/packages/tre/src/plugins/cache/index.ts @@ -93,4 +93,3 @@ export const CACHE_PLUGIN: Plugin< }; export * from "./gateway"; -export * from "./range"; diff --git a/packages/tre/src/plugins/shared/index.ts b/packages/tre/src/plugins/shared/index.ts index 796e67a98..f281a33a1 100644 --- a/packages/tre/src/plugins/shared/index.ts +++ b/packages/tre/src/plugins/shared/index.ts @@ -64,5 +64,6 @@ export function namespaceEntries( export * from "./constants"; export * from "./gateway"; +export * from "./range"; export * from "./router"; export * from "./routing"; diff --git a/packages/tre/src/plugins/cache/range.ts b/packages/tre/src/plugins/shared/range.ts similarity index 100% rename from packages/tre/src/plugins/cache/range.ts rename to packages/tre/src/plugins/shared/range.ts diff --git a/packages/tre/test/plugins/cache/range.spec.ts b/packages/tre/test/plugins/shared/range.spec.ts similarity index 100% rename from packages/tre/test/plugins/cache/range.spec.ts rename to packages/tre/test/plugins/shared/range.spec.ts From 062ca4867199600a69f9dbb28d26492a80188e49 Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:15:19 +0000 Subject: [PATCH 02/15] Refactor out common range parsing code in storages We made a similar change in Miniflare 2 recently --- packages/tre/src/storage/file.ts | 21 ++------------------- packages/tre/src/storage/memory.ts | 28 +--------------------------- packages/tre/src/storage/sqlite.ts | 2 +- packages/tre/src/storage/storage.ts | 29 +++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 47 deletions(-) diff --git a/packages/tre/src/storage/file.ts b/packages/tre/src/storage/file.ts index ccb4ba9e3..85b686da3 100644 --- a/packages/tre/src/storage/file.ts +++ b/packages/tre/src/storage/file.ts @@ -15,6 +15,7 @@ import { StoredKeyMeta, StoredMeta, StoredValueMeta, + parseRange, } from "./storage"; export interface FileRange { @@ -57,25 +58,7 @@ export async function readFileRange( filePath = await fs.realpath(filePath); const { size } = await fs.lstat(filePath); // build offset and length as necessary - if (suffix !== undefined) { - if (suffix <= 0) { - throw new Error("Suffix must be > 0"); - } - if (suffix > size) suffix = size; - offset = size - suffix; - length = size - offset; - } - if (offset === undefined) offset = 0; - if (length === undefined) { - // get length of file - length = size - offset; - } - - // check offset and length are valid - if (offset < 0) throw new Error("Offset must be >= 0"); - if (offset >= size) throw new Error("Offset must be < size"); - if (length <= 0) throw new Error("Length must be > 0"); - if (offset + length > size) length = size - offset; + ({ offset, length } = parseRange({ offset, length, suffix }, size)); // read file fd = await fs.open(filePath, "r"); diff --git a/packages/tre/src/storage/memory.ts b/packages/tre/src/storage/memory.ts index 9f5db324f..b1a61d03e 100644 --- a/packages/tre/src/storage/memory.ts +++ b/packages/tre/src/storage/memory.ts @@ -8,34 +8,9 @@ import { StoredMeta, StoredValueMeta, cloneMetadata, + parseRange, } from "./storage"; -export function parseRange( - { offset, length, suffix }: Range, - size: number -): Required> { - // build proper offset and length - if (suffix !== undefined) { - if (suffix <= 0) { - throw new Error("Suffix must be > 0"); - } - if (suffix > size) suffix = size; - offset = size - suffix; - length = size - offset; - } - if (offset === undefined) offset = 0; - if (length === undefined) length = size - offset; - - // if offset is negative or greater than size, throw an error - if (offset < 0) throw new Error("Offset must be >= 0"); - if (offset >= size) throw new Error("Offset must be < size"); - // if length is less than or equal to 0, throw an error - if (length <= 0) throw new Error("Length must be > 0"); - // if length goes beyond actual length, adjust length to the end of the value - if (offset + length > size) length = size - offset; - - return { offset, length }; -} export class MemoryStorage extends LocalStorage { #sqliteDatabase?: DatabaseType; @@ -87,7 +62,6 @@ export class MemoryStorage extends LocalStorage { if (stored === undefined) return; const { value } = stored; const size = value.byteLength; - const { offset, length } = parseRange(range, size); return { diff --git a/packages/tre/src/storage/sqlite.ts b/packages/tre/src/storage/sqlite.ts index 2bfb11f1d..584767d13 100644 --- a/packages/tre/src/storage/sqlite.ts +++ b/packages/tre/src/storage/sqlite.ts @@ -2,7 +2,6 @@ import crypto from "crypto"; import Database, { Database as DatabaseType } from "better-sqlite3"; import { defaultClock } from "../shared"; import { LocalStorage } from "./local"; -import { parseRange } from "./memory"; import { Range, RangeStoredValueMeta, @@ -10,6 +9,7 @@ import { StoredKeyMeta, StoredMeta, StoredValueMeta, + parseRange, } from "./storage"; // Don't use this! diff --git a/packages/tre/src/storage/storage.ts b/packages/tre/src/storage/storage.ts index c638c1a22..51795ea2b 100644 --- a/packages/tre/src/storage/storage.ts +++ b/packages/tre/src/storage/storage.ts @@ -45,6 +45,35 @@ export interface Range { length?: number; suffix?: number; } +export interface ParsedRange { + offset: number; + length: number; +} +export function parseRange( + { offset, length, suffix }: Range, + size: number +): ParsedRange { + if (suffix !== undefined) { + if (suffix <= 0) { + throw new Error("Suffix must be > 0"); + } + if (suffix > size) suffix = size; + offset = size - suffix; + length = size - offset; + } + if (offset === undefined) offset = 0; + if (length === undefined) length = size - offset; + + // If offset is negative or greater than size, throw an error + if (offset < 0) throw new Error("Offset must be >= 0"); + if (offset > size) throw new Error("Offset must be < size"); + // If length is less than or equal to 0, throw an error + if (length <= 0) throw new Error("Length must be > 0"); + // If length goes beyond actual length, adjust length to the end of the value + if (offset + length > size) length = size - offset; + + return { offset, length }; +} export interface StorageListOptions { // Stage 1: filtering From 767b27fe90c05f9318c0e433587cb60345c3791a Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:29:55 +0000 Subject: [PATCH 03/15] Add validation for internal R2 requests Validate incoming R2 requests from `workerd` with `zod`, increasing type safety. This removes some implicit `any`s from `JSON.parse`, and allows us to use `method` as a type-level union discriminator. Validation of types has also been removed from the `Validator` class, as we don't need to duplicate this. `workerd` will handle validating user input here. --- package-lock.json | 13 +- packages/tre/package.json | 2 +- packages/tre/src/plugins/r2/errors.ts | 17 +- packages/tre/src/plugins/r2/gateway.ts | 114 +++-------- packages/tre/src/plugins/r2/index.ts | 1 + packages/tre/src/plugins/r2/r2Object.ts | 74 +++---- packages/tre/src/plugins/r2/router.ts | 186 +++++------------ packages/tre/src/plugins/r2/schemas.ts | 243 +++++++++++++++++++++++ packages/tre/src/plugins/r2/validator.ts | 189 ++---------------- 9 files changed, 375 insertions(+), 464 deletions(-) create mode 100644 packages/tre/src/plugins/r2/schemas.ts diff --git a/package-lock.json b/package-lock.json index 5282c5478..aee5e5dbd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5024,8 +5024,9 @@ } }, "node_modules/zod": { - "version": "3.19.1", - "license": "MIT", + "version": "3.20.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.20.6.tgz", + "integrity": "sha512-oyu0m54SGCtzh6EClBVqDDlAYRz4jrVtKwQ7ZnsEmMI9HnzuZFj8QFwAY1M5uniIYACdGvv0PBWPF2kO0aNofA==", "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -5050,7 +5051,7 @@ "workerd": "^1.20230221.0", "ws": "^8.11.0", "youch": "^3.2.2", - "zod": "^3.18.0" + "zod": "^3.20.6" }, "devDependencies": { "@cloudflare/workers-types": "^4.20221111.1", @@ -5229,7 +5230,7 @@ "workerd": "^1.20230221.0", "ws": "^8.11.0", "youch": "^3.2.2", - "zod": "^3.18.0" + "zod": "^3.20.6" } }, "@nodelib/fs.scandir": { @@ -8066,7 +8067,9 @@ } }, "zod": { - "version": "3.19.1" + "version": "3.20.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.20.6.tgz", + "integrity": "sha512-oyu0m54SGCtzh6EClBVqDDlAYRz4jrVtKwQ7ZnsEmMI9HnzuZFj8QFwAY1M5uniIYACdGvv0PBWPF2kO0aNofA==" } } } diff --git a/packages/tre/package.json b/packages/tre/package.json index a4324d776..5cf3386c7 100644 --- a/packages/tre/package.json +++ b/packages/tre/package.json @@ -42,7 +42,7 @@ "workerd": "^1.20230221.0", "ws": "^8.11.0", "youch": "^3.2.2", - "zod": "^3.18.0" + "zod": "^3.20.6" }, "devDependencies": { "@cloudflare/workers-types": "^4.20221111.1", diff --git a/packages/tre/src/plugins/r2/errors.ts b/packages/tre/src/plugins/r2/errors.ts index 43a485913..56119f25e 100644 --- a/packages/tre/src/plugins/r2/errors.ts +++ b/packages/tre/src/plugins/r2/errors.ts @@ -1,4 +1,5 @@ import { Response } from "../../http"; +import { HttpError } from "../../shared"; import { CfHeader } from "../shared/constants"; import { R2Object } from "./r2Object"; @@ -22,22 +23,18 @@ enum CfCode { InvalidRange = 10039, } -export class R2Error extends Error { - status: number; - v4Code: number; +export class R2Error extends HttpError { object?: R2Object; - constructor(status: number, message: string, v4Code: number) { - super(message); - this.name = "R2Error"; - this.status = status; - this.v4Code = v4Code; + + constructor(code: number, message: string, readonly v4Code: number) { + super(code, message); } toResponse() { if (this.object !== undefined) { const { metadataSize, value } = this.object.encode(); return new Response(value, { - status: this.status, + status: this.code, headers: { [CfHeader.MetadataSize]: `${metadataSize}`, "Content-Type": "application/json", @@ -51,7 +48,7 @@ export class R2Error extends Error { }); } return new Response(null, { - status: this.status, + status: this.code, headers: { [CfHeader.Error]: JSON.stringify({ message: this.message, diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index 573cf2e3b..d8fa74a34 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -1,93 +1,43 @@ +import { z } from "zod"; import { Log } from "../../shared"; import { RangeStoredValueMeta, Storage } from "../../storage"; import { InvalidRange, NoSuchKey } from "./errors"; import { - R2HTTPMetadata, R2Object, R2ObjectBody, R2ObjectMetadata, createVersion, } from "./r2Object"; -import { Validator } from "./validator"; - -// For more information, refer to https://datatracker.ietf.org/doc/html/rfc7232 -export interface R2Conditional { - // Performs the operation if the object’s etag matches the given string. - etagMatches?: string; - // Performs the operation if the object’s etag does not match the given string. - etagDoesNotMatch?: string; - // Performs the operation if the object was uploaded before the given date. - uploadedBefore?: number; - // Performs the operation if the object was uploaded after the given date. - uploadedAfter?: number; -} - -export interface R2Range { - offset?: number; - length?: number; - suffix?: number; -} - -export interface R2GetOptions { - // Specifies that the object should only be returned given satisfaction of - // certain conditions in the R2Conditional. Refer to R2Conditional above. - onlyIf?: R2Conditional; - // Specifies that only a specific length (from an optional offset) or suffix - // of bytes from the object should be returned. Refer to - // https://developers.cloudflare.com/r2/runtime-apis/#ranged-reads. - range?: R2Range; -} - -export interface R2PutOptions { - // Various HTTP headers associated with the object. Refer to - // https://developers.cloudflare.com/r2/runtime-apis/#http-metadata. - httpMetadata: R2HTTPMetadata; - // A map of custom, user-defined metadata that will be stored with the object. - customMetadata: Record; - // A md5 hash to use to check the recieved object’s integrity. - md5?: string; -} - -export type R2ListOptionsInclude = ("httpMetadata" | "customMetadata")[]; +import { + R2GetRequestSchema, + R2ListRequestSchema, + R2PutRequestSchema, +} from "./schemas"; +import { MAX_LIST_KEYS, Validator } from "./validator"; -export interface R2ListOptions { - // The number of results to return. Defaults to 1000, with a maximum of 1000. - limit?: number; - // The prefix to match keys against. Keys will only be returned if they start with given prefix. - prefix?: string; - // An opaque token that indicates where to continue listing objects from. - // A cursor can be retrieved from a previous list operation. - cursor?: string; - // The character to use when grouping keys. - delimiter?: string; - // Can include httpFields and/or customFields. If included, items returned by - // the list will include the specified metadata. Note that there is a limit on the - // total amount of data that a single list operation can return. - // If you request data, you may recieve fewer than limit results in your response - // to accomodate metadata. - // Use the truncated property to determine if the list request has more data to be returned. - include?: R2ListOptionsInclude; -} +export type OmitRequest = Omit; +export type R2GetOptions = OmitRequest>; +export type R2PutOptions = OmitRequest>; +export type R2ListOptions = OmitRequest>; export interface R2Objects { // An array of objects matching the list request. objects: R2Object[]; - // If true, indicates there are more results to be retrieved for the current list request. + // If true, indicates there are more results to be retrieved for the current + // list request. truncated: boolean; - // A token that can be passed to future list calls to resume listing from that point. + // A token that can be passed to future list calls to resume listing from that + // point. // Only present if truncated is true. cursor?: string; - // If a delimiter has been specified, contains all prefixes between the specified - // prefix and the next occurence of the delimiter. - // For example, if no prefix is provided and the delimiter is ‘/’, foo/bar/baz - // would return foo as a delimited prefix. If foo/ was passed as a prefix - // with the same structure and delimiter, foo/bar would be returned as a delimited prefix. + // If a delimiter has been specified, contains all prefixes between the + // specified prefix and the next occurrence of the delimiter. For example, if + // no prefix is provided and the delimiter is "/", "foo/bar/baz" would return + // "foo" as a delimited prefix. If "foo/" was passed as a prefix with the same + // structure and delimiter, "foo/bar" would be returned as a delimited prefix. delimitedPrefixes: string[]; } -const MAX_LIST_KEYS = 1_000; -// https://developers.cloudflare.com/r2/platform/limits/ (5GB - 5MB) - const validate = new Validator(); export class R2Gateway { @@ -110,10 +60,7 @@ export class R2Gateway { options: R2GetOptions = {} ): Promise { const { range = {}, onlyIf } = options; - validate - .key(key) - .getOptions(options) - .condition(await this.head(key), onlyIf); + validate.key(key).condition(await this.head(key), onlyIf); let stored: RangeStoredValueMeta | undefined; @@ -140,22 +87,18 @@ export class R2Gateway { ): Promise { const { customMetadata, md5, httpMetadata } = options; - const hash = validate - .key(key) - .putOptions(options) - .size(value) - .md5(value, md5); + const hash = validate.key(key).size(value).md5(value, md5); // build metadata const metadata: R2ObjectMetadata = { key, size: value.byteLength, - etag: hash, + etag: hash.toString("hex"), version: createVersion(), httpEtag: `"${hash}"`, uploaded: Date.now(), - httpMetadata, - customMetadata, + httpMetadata: httpMetadata ?? {}, + customMetadata: customMetadata ?? {}, }; // Store value with expiration and metadata @@ -173,9 +116,7 @@ export class R2Gateway { } async list(listOptions: R2ListOptions = {}): Promise { - const delimitedPrefixes = new Set(); - - validate.listOptions(listOptions); + validate.limit(listOptions.limit); const { prefix = "", include = [], cursor = "" } = listOptions; let { delimiter, limit = MAX_LIST_KEYS } = listOptions; @@ -190,8 +131,7 @@ export class R2Gateway { cursor, delimiter, }); - // add delimited prefixes should they exist - for (const dP of res.delimitedPrefixes ?? []) delimitedPrefixes.add(dP); + const delimitedPrefixes = new Set(res.delimitedPrefixes ?? []); const objects = res.keys // grab metadata diff --git a/packages/tre/src/plugins/r2/index.ts b/packages/tre/src/plugins/r2/index.ts index cdb0a4746..dab218d66 100644 --- a/packages/tre/src/plugins/r2/index.ts +++ b/packages/tre/src/plugins/r2/index.ts @@ -45,3 +45,4 @@ export const R2_PLUGIN: Plugin< export * from "./r2Object"; export * from "./gateway"; +export * from "./schemas"; diff --git a/packages/tre/src/plugins/r2/r2Object.ts b/packages/tre/src/plugins/r2/r2Object.ts index 76b48f968..ffaea9491 100644 --- a/packages/tre/src/plugins/r2/r2Object.ts +++ b/packages/tre/src/plugins/r2/r2Object.ts @@ -1,16 +1,17 @@ +import assert from "assert"; import crypto from "crypto"; import { TextEncoder } from "util"; -import { R2Objects, R2Range } from "./gateway"; +import type { R2StringChecksums } from "@cloudflare/workers-types/experimental"; +import { R2Objects } from "./gateway"; +import { + BASE64_REGEXP, + HEX_REGEXP, + R2HeadResponse, + R2HttpFields, + R2Range, +} from "./schemas"; const encoder = new TextEncoder(); -export interface R2HTTPMetadata { - contentType?: string; - contentLanguage?: string; - contentDisposition?: string; - contentEncoding?: string; - cacheControl?: string; - cacheExpiry?: Date; -} export interface R2ObjectMetadata { // The object’s key. @@ -21,29 +22,19 @@ export interface R2ObjectMetadata { size: number; // The etag associated with the object upload. etag: string; - // The object’s etag, in quotes so as to be returned as a header. + // The object's etag, in quotes to be returned as a header. httpEtag: string; // The time the object was uploaded. uploaded: number; - // Various HTTP headers associated with the object. Refer to HTTP Metadata. - httpMetadata: R2HTTPMetadata; + // Various HTTP headers associated with the object. Refer to HTTP Metadata: + // https://developers.cloudflare.com/r2/runtime-apis/#http-metadata. + httpMetadata: R2HttpFields; // A map of custom, user-defined metadata associated with the object. customMetadata: Record; // If a GET request was made with a range option, this will be added range?: R2Range; } -// R2ObjectMetadata in the format the Workers Runtime expects to be returned -export interface RawR2ObjectMetadata - extends Omit { - // The object’s name. - name: string; - // Various HTTP headers associated with the object. Refer to HTTP Metadata. - httpFields: R2HTTPMetadata; - // A map of custom, user-defined metadata associated with the object. - customFields: { k: string; v: string }[]; -} - export interface EncodedMetadata { metadataSize: number; value: Uint8Array; @@ -60,25 +51,16 @@ export function createVersion(): string { * will have an R2Object created. */ export class R2Object implements R2ObjectMetadata { - // The object’s key. - key: string; - // Random unique string associated with a specific upload of a key. - version: string; - // Size of the object in bytes. - size: number; - // The etag associated with the object upload. - etag: string; - // The object’s etag, in quotes so as to be returned as a header. - httpEtag: string; - // The time the object was uploaded. - uploaded: number; - // Various HTTP headers associated with the object. Refer to - // https://developers.cloudflare.com/r2/runtime-apis/#http-metadata. - httpMetadata: R2HTTPMetadata; - // A map of custom, user-defined metadata associated with the object. - customMetadata: Record; - // If a GET request was made with a range option, this will be added - range?: R2Range; + readonly key: string; + readonly version: string; + readonly size: number; + readonly etag: string; + readonly httpEtag: string; + readonly uploaded: number; + readonly httpMetadata: R2HttpFields; + readonly customMetadata: Record; + readonly range?: R2Range; + constructor(metadata: R2ObjectMetadata) { this.key = metadata.key; this.version = metadata.version; @@ -92,7 +74,7 @@ export class R2Object implements R2ObjectMetadata { } // Format for return to the Workers Runtime - rawProperties(): RawR2ObjectMetadata { + #rawProperties(): R2HeadResponse { return { ...this, name: this.key, @@ -105,7 +87,7 @@ export class R2Object implements R2ObjectMetadata { } encode(): EncodedMetadata { - const json = JSON.stringify(this.rawProperties()); + const json = JSON.stringify(this.#rawProperties()); const bytes = encoder.encode(json); return { metadataSize: bytes.length, value: bytes }; } @@ -113,7 +95,7 @@ export class R2Object implements R2ObjectMetadata { static encodeMultiple(objects: R2Objects): EncodedMetadata { const json = JSON.stringify({ ...objects, - objects: objects.objects.map((o) => o.rawProperties()), + objects: objects.objects.map((o) => o.#rawProperties()), }); const bytes = encoder.encode(json); return { metadataSize: bytes.length, value: bytes }; @@ -121,7 +103,7 @@ export class R2Object implements R2ObjectMetadata { } export class R2ObjectBody extends R2Object { - body: Uint8Array; + readonly body: Uint8Array; constructor(metadata: R2ObjectMetadata, body: Uint8Array) { super(metadata); diff --git a/packages/tre/src/plugins/r2/router.ts b/packages/tre/src/plugins/r2/router.ts index 9bbbc0c1f..5ea416766 100644 --- a/packages/tre/src/plugins/r2/router.ts +++ b/packages/tre/src/plugins/r2/router.ts @@ -1,3 +1,4 @@ +import assert from "assert"; import { TextDecoder } from "util"; import { Request, Response } from "../../http"; import { @@ -8,18 +9,11 @@ import { Router, decodePersist, } from "../shared"; -import { InternalError, InvalidMetadata, R2Error } from "./errors"; -import { - R2Gateway, - R2GetOptions, - R2ListOptions, - R2PutOptions, -} from "./gateway"; -import { R2HTTPMetadata, R2Object } from "./r2Object"; +import { InternalError, InvalidMetadata } from "./errors"; +import { R2Gateway, R2Objects } from "./gateway"; +import { EncodedMetadata, R2Object, R2ObjectBody } from "./r2Object"; +import { R2BindingRequestSchema } from "./schemas"; -export interface R2Params { - bucket: string; -} const decoder = new TextDecoder(); async function decodeMetadata(req: Request) { @@ -34,137 +28,55 @@ async function decodeMetadata(req: Request) { bytes.slice(0, metadataSize), bytes.slice(metadataSize), ]; - const metadata = JSON.parse(decoder.decode(metadataBytes)); + const metadataText = decoder.decode(metadataBytes); + const metadata = R2BindingRequestSchema.parse(JSON.parse(metadataText)); return { metadata, value: new Uint8Array(value) }; } function decodeHeaderMetadata(req: Request) { - if (req.headers.get(CfHeader.Request) === null) { - throw new InvalidMetadata(); - } - return JSON.parse(req.headers.get(CfHeader.Request) as string); + const header = req.headers.get(CfHeader.Request); + if (header === null) throw new InvalidMetadata(); + return R2BindingRequestSchema.parse(JSON.parse(header)); } -export interface RawR2GetOptions { - range?: { - offset?: string; - length?: string; - suffix?: string; - }; - onlyIf: { - etagMatches?: string; - etagDoesNotMatch?: string; - uploadedBefore?: string; - uploadedAfter?: string; - }; -} -export interface RawR2PutOptions { - // Various HTTP headers associated with the object. Refer to - // https://developers.cloudflare.com/r2/runtime-apis/#http-metadata. - httpFields?: R2HTTPMetadata; - // A map of custom, user-defined metadata that will be stored with the object. - customFields?: { k: string; v: string }[]; - // A md5 hash to use to check the recieved object’s integrity. - md5?: string; -} +function encodeResult(result: R2Object | R2ObjectBody | R2Objects) { + let encoded: EncodedMetadata; + if (result instanceof R2Object) { + encoded = result.encode(); + } else { + encoded = R2Object.encodeMultiple(result); + } -export interface RawR2ListOptions { - // The number of results to return. Defaults to 1000, with a maximum of 1000. - limit?: number; - // The prefix to match keys against. Keys will only be returned if they start with given prefix. - prefix?: string; - // An opaque token that indicates where to continue listing objects from. - // A cursor can be retrieved from a previous list operation. - cursor?: string; - // The character to use when grouping keys. - delimiter?: string; - // Can include httpFields and/or customFields. If included, items returned by - // the list will include the specified metadata. Note that there is a limit on the - // total amount of data that a single list operation can return. - // If you request data, you may recieve fewer than limit results in your response - // to accomodate metadata. - // Use the truncated property to determine if the list request has more data to be returned. - include?: (0 | 1)[]; -} -function parseGetOptions({ - range = {}, - onlyIf = {}, -}: RawR2GetOptions): R2GetOptions { - return { - range: { - offset: range?.offset ? Number(range?.offset) : undefined, - length: range?.length ? Number(range?.length) : undefined, - suffix: range?.suffix ? Number(range?.suffix) : undefined, - }, - onlyIf: { - etagMatches: onlyIf.etagMatches, - etagDoesNotMatch: onlyIf.etagDoesNotMatch, - uploadedAfter: onlyIf?.uploadedAfter - ? Number(onlyIf?.uploadedAfter) - : undefined, - uploadedBefore: onlyIf?.uploadedBefore - ? Number(onlyIf?.uploadedBefore) - : undefined, + return new Response(encoded.value, { + headers: { + [CfHeader.MetadataSize]: `${encoded.metadataSize}`, + "Content-Type": "application/json", }, - }; -} - -function parsePutOptions(options: RawR2PutOptions): R2PutOptions { - return { - ...options, - httpMetadata: options.httpFields ?? {}, - customMetadata: options.customFields - ? Object.fromEntries(options.customFields.map(({ k, v }) => [k, v])) - : {}, - }; + }); } -function parseListOptions(options: RawR2ListOptions): R2ListOptions { - return { - ...options, - include: options.include - ?.filter((i) => i === 1 || i === 0) - .map((i) => (i === 0 ? "httpMetadata" : "customMetadata")), - }; +export interface R2Params { + bucket: string; } export class R2Router extends Router { @GET("/:bucket") get: RouteHandler = async (req, params) => { - const { method, object, ...options } = decodeHeaderMetadata(req); - + const metadata = decodeHeaderMetadata(req); const persist = decodePersist(req.headers); const gateway = this.gatewayFactory.get(params.bucket, persist); - try { - let val; - if (method === "head") { - val = await gateway.head(object); - } else if (method === "get") { - val = await gateway.get(object, parseGetOptions(options)); - } else if (method === "list") { - val = await gateway.list(parseListOptions(options)); - } - if (!val) { - throw new InternalError(); - } - - if (val instanceof R2Object) { - val = val.encode(); - } else { - val = R2Object.encodeMultiple(val); - } - return new Response(val.value, { - headers: { - [CfHeader.MetadataSize]: `${val.metadataSize}`, - "Content-Type": "application/json", - }, - }); - } catch (e) { - if (e instanceof R2Error) { - return e.toResponse(); - } - throw e; + let result: R2Object | R2ObjectBody | R2Objects; + if (metadata.method === "head") { + result = await gateway.head(metadata.object); + } else if (metadata.method === "get") { + result = await gateway.get(metadata.object, metadata); + } else if (metadata.method === "list") { + result = await gateway.list(metadata); + } else { + throw new InternalError(); } + + return encodeResult(result); }; @PUT("/:bucket") @@ -173,22 +85,16 @@ export class R2Router extends Router { const persist = decodePersist(req.headers); const gateway = this.gatewayFactory.get(params.bucket, persist); - try { - if (metadata.method === "delete") { - await gateway.delete(metadata.object); - return new Response(); - } else if (metadata.method === "put") { - return Response.json( - await gateway.put(metadata.object, value, parsePutOptions(metadata)) - ); - } - // Unknown method: should never be reached - throw new InternalError(); - } catch (e) { - if (e instanceof R2Error) { - return e.toResponse(); - } - throw e; + if (metadata.method === "delete") { + assert("object" in metadata); + // TODO: support multiple delete + await gateway.delete(metadata.object); + return new Response(); + } else if (metadata.method === "put") { + const result = await gateway.put(metadata.object, value, metadata); + return encodeResult(result); + } else { + throw new InternalError(); // Unknown method: should never be reached } }; } diff --git a/packages/tre/src/plugins/r2/schemas.ts b/packages/tre/src/plugins/r2/schemas.ts new file mode 100644 index 000000000..6feda178c --- /dev/null +++ b/packages/tre/src/plugins/r2/schemas.ts @@ -0,0 +1,243 @@ +import { z } from "zod"; + +// https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-api.capnp + +export const HEX_REGEXP = /^[0-9a-f]*$/i; +// https://github.com/capnproto/capnproto/blob/6b5bcc2c6e954bc6e167ac581eb628e5a462a469/c%2B%2B/src/kj/encoding.c%2B%2B#L719-L720 +export const BASE64_REGEXP = /^[0-9a-z+/=]*$/i; + +export const DateSchema = z.coerce + .number() + .transform((value) => new Date(value)); +export const HexDataSchema = z + .string() + .regex(HEX_REGEXP) + .transform((hex) => Buffer.from(hex, "hex")); +export const Base64DataSchema = z + .string() + .regex(BASE64_REGEXP) + .transform((base64) => Buffer.from(base64, "base64")); + +export const RecordSchema = z + .object({ + k: z.string(), + v: z.string(), + }) + .array() + .transform((entries) => + Object.fromEntries(entries.map(({ k, v }) => [k, v])) + ); +export type RawRecord = z.input; + +export const R2RangeSchema = z.object({ + offset: z.coerce.number().optional(), + length: z.coerce.number().optional(), + suffix: z.coerce.number().optional(), +}); +export type R2Range = z.infer; + +// For more information, refer to https://datatracker.ietf.org/doc/html/rfc7232 +export const R2ConditionalSchema = z.object({ + // Performs the operation if the object's ETag matches the given string + etagMatches: z.ostring(), // "If-Match" + // Performs the operation if the object's ETag does NOT match the given string + etagDoesNotMatch: z.ostring(), // "If-None-Match" + // Performs the operation if the object was uploaded BEFORE the given date + uploadedBefore: DateSchema.optional(), // "If-Unmodified-Since" + // Performs the operation if the object was uploaded AFTER the given date + uploadedAfter: DateSchema.optional(), // "If-Modified-Since" + // Truncates dates to seconds before performing comparisons + secondsGranularity: z.oboolean(), +}); +export type R2Conditional = z.infer; + +export const R2ChecksumsSchema = z + .object({ + 0: HexDataSchema.optional(), + 1: HexDataSchema.optional(), + 2: HexDataSchema.optional(), + 3: HexDataSchema.optional(), + 4: HexDataSchema.optional(), + }) + .transform((checksums) => ({ + md5: checksums["0"], + sha1: checksums["1"], + sha256: checksums["2"], + sha384: checksums["3"], + sha512: checksums["4"], + })); +export type RawR2Checksums = z.input; +export type R2Checksums = z.infer; + +export const R2PublishedPartSchema = z.object({ + etag: z.string(), + part: z.number(), +}); +export type R2PublishedPart = z.infer; + +export const R2HttpFieldsSchema = z.object({ + contentType: z.ostring(), + contentLanguage: z.ostring(), + contentDisposition: z.ostring(), + contentEncoding: z.ostring(), + cacheControl: z.ostring(), + cacheExpiry: z.coerce.number().optional(), +}); +export type R2HttpFields = z.infer; + +export const R2HeadRequestSchema = z.object({ + method: z.literal("head"), + object: z.string(), +}); + +export const R2GetRequestSchema = z.object({ + method: z.literal("get"), + object: z.string(), + // Specifies that only a specific length (from an optional offset) or suffix + // of bytes from the object should be returned. Refer to + // https://developers.cloudflare.com/r2/runtime-apis/#ranged-reads. + range: R2RangeSchema.optional(), + rangeHeader: z.ostring(), + // Specifies that the object should only be returned given satisfaction of + // certain conditions in the R2Conditional. Refer to R2Conditional above. + onlyIf: R2ConditionalSchema.optional(), +}); + +export const R2PutRequestSchema = z + .object({ + method: z.literal("put"), + object: z.string(), + customFields: RecordSchema.optional(), // (renamed in transform) + httpFields: R2HttpFieldsSchema.optional(), // (renamed in transform) + onlyIf: R2ConditionalSchema.optional(), + md5: Base64DataSchema.optional(), // (intentionally base64, not hex) // TODO: make sure we're testing this is base64 + sha1: HexDataSchema.optional(), // TODO: support + sha256: HexDataSchema.optional(), // TODO: support + sha384: HexDataSchema.optional(), // TODO: support + sha512: HexDataSchema.optional(), // TODO: support + }) + .transform((value) => ({ + method: value.method, + object: value.object, + customMetadata: value.customFields, + httpMetadata: value.httpFields, + onlyIf: value.onlyIf, + md5: value.md5, + sha1: value.sha1, + sha256: value.sha256, + sha384: value.sha384, + sha512: value.sha512, + })); + +// TODO: support multipart +export const R2CreateMultipartUploadRequestSchema = z.object({ + method: z.literal("createMultipartUpload"), + object: z.string(), + customFields: RecordSchema.optional(), + httpFields: R2HttpFieldsSchema.optional(), +}); + +export const R2UploadPartRequestSchema = z.object({ + method: z.literal("uploadPart"), + object: z.string(), + uploadId: z.string(), + partNumber: z.number(), +}); + +export const R2CompleteMultipartUploadRequestSchema = z.object({ + method: z.literal("completeMultipartUpload"), + object: z.string(), + uploadId: z.string(), + parts: R2PublishedPartSchema.array(), +}); + +export const R2AbortMultipartUploadRequestSchema = z.object({ + method: z.literal("abortMultipartUpload"), + object: z.string(), + uploadId: z.string(), +}); + +export const R2ListRequestSchema = z.object({ + method: z.literal("list"), + limit: z.onumber(), + prefix: z.ostring(), + cursor: z.ostring(), + delimiter: z.ostring(), + startAfter: z.ostring(), + include: z + .union([z.literal(0), z.literal(1)]) + .transform((value) => (value === 0 ? "httpMetadata" : "customMetadata")) + .array() + .optional(), +}); + +export const R2DeleteRequestSchema = z.intersection( + z.object({ method: z.literal("delete") }), + z.union([ + z.object({ object: z.string() }), + z.object({ objects: z.string().array() }), // TODO: support + ]) +); + +// Not using `z.discriminatedUnion()` here, as that doesn't work with +// intersection/transformed types. +export const R2BindingRequestSchema = z.union([ + R2HeadRequestSchema, + R2GetRequestSchema, + R2PutRequestSchema, + R2CreateMultipartUploadRequestSchema, + R2UploadPartRequestSchema, + R2CompleteMultipartUploadRequestSchema, + R2AbortMultipartUploadRequestSchema, + R2ListRequestSchema, + R2DeleteRequestSchema, +]); + +export interface R2ErrorResponse { + version: number; + v4code: number; + message: string; +} + +export interface R2HeadResponse { + name: string; + version: string; + size: number; + etag: string; + uploaded: number; + // Optional: https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L81 + httpFields?: R2HttpFields; + // Optional: https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L113 + customFields?: RawRecord; + // Optional: https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L130 + range?: R2Range; + // Optional: https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L140 + checksums?: RawR2Checksums; +} + +export type R2GetResponse = R2HeadResponse; + +export type R2PutResponse = R2HeadResponse; + +export interface R2CreateMultipartUploadResponse { + uploadId: string; +} + +export interface R2UploadPartResponse { + etag: string; +} + +export type R2CompleteMultipartUploadResponse = R2PutResponse; + +// eslint-disable-next-line @typescript-eslint/no-empty-interface +export interface R2AbortMultipartUploadResponse {} + +export interface R2ListResponse { + objects: R2HeadResponse[]; + truncated: boolean; + cursor?: string; + delimitedPrefixes: string[]; +} + +// eslint-disable-next-line @typescript-eslint/no-empty-interface +export interface R2DeleteResponse {} diff --git a/packages/tre/src/plugins/r2/validator.ts b/packages/tre/src/plugins/r2/validator.ts index 523b84951..8ab262670 100644 --- a/packages/tre/src/plugins/r2/validator.ts +++ b/packages/tre/src/plugins/r2/validator.ts @@ -2,22 +2,14 @@ import crypto from "crypto"; import { BadDigest, EntityTooLarge, - InternalError, - InvalidDigest, InvalidMaxKeys, InvalidObjectName, PreconditionFailed, } from "./errors"; -import { - R2Conditional, - R2GetOptions, - R2ListOptions, - R2PutOptions, -} from "./gateway"; - -import { R2HTTPMetadata, R2Object, R2ObjectMetadata } from "./r2Object"; +import { R2Object, R2ObjectMetadata } from "./r2Object"; +import { R2Conditional } from "./schemas"; -const MAX_LIST_KEYS = 1_000; +export const MAX_LIST_KEYS = 1_000; const MAX_KEY_SIZE = 1024; const UNPAIRED_SURROGATE_PAIR_REGEX = @@ -54,7 +46,7 @@ function testR2Conditional( if ( ifMatch !== true && // if "ifMatch" is true, we ignore date checking uploadedBefore !== undefined && - uploaded > uploadedBefore + uploaded > uploadedBefore.getTime() ) { return false; } @@ -63,7 +55,7 @@ function testR2Conditional( if ( ifNoneMatch !== true && // if "ifNoneMatch" is true, we ignore date checking uploadedAfter !== undefined && - uploaded < uploadedAfter + uploaded < uploadedAfter.getTime() ) { return false; } @@ -71,9 +63,9 @@ function testR2Conditional( return true; } export class Validator { - md5(value: Uint8Array, md5?: string): string { - const md5Hash = crypto.createHash("md5").update(value).digest("base64"); - if (md5 !== undefined && md5 !== md5Hash) { + md5(value: Uint8Array, md5?: Buffer): Buffer { + const md5Hash = crypto.createHash("md5").update(value).digest(); + if (md5 !== undefined && !md5.equals(md5Hash)) { throw new BadDigest(); } return md5Hash; @@ -85,12 +77,15 @@ export class Validator { } return this; } + size(value: Uint8Array): Validator { + // TODO: should we be validating httpMetadata/customMetadata size too if (value.byteLength > MAX_VALUE_SIZE) { throw new EntityTooLarge(); } return this; } + key(key: string): Validator { // Check key isn't too long and exists outside regex const keyLength = Buffer.byteLength(key); @@ -103,165 +98,9 @@ export class Validator { return this; } - onlyIf(onlyIf: R2Conditional): Validator { - if (typeof onlyIf !== "object") { - throw new InternalError().context( - "onlyIf must be an object, a Headers instance, or undefined." - ); - } - - // Check onlyIf variables - const { etagMatches, etagDoesNotMatch, uploadedBefore, uploadedAfter } = - onlyIf; - if ( - etagMatches !== undefined && - !(typeof etagMatches === "string" || Array.isArray(etagMatches)) - ) { - throw new InternalError().context("etagMatches must be a string."); - } - if ( - etagDoesNotMatch !== undefined && - !(typeof etagDoesNotMatch === "string" || Array.isArray(etagDoesNotMatch)) - ) { - throw new InternalError().context("etagDoesNotMatch must be a string."); - } - if (uploadedBefore !== undefined && !!Number.isNaN(uploadedBefore)) { - throw new InternalError().context("uploadedBefore must be a number."); - } - if (uploadedAfter !== undefined && !!Number.isNaN(uploadedBefore)) { - throw new InternalError().context("uploadedAfter must be a number."); - } - return this; - } - - getOptions(options: R2GetOptions): Validator { - const { onlyIf = {}, range = {} } = options; - - this.onlyIf(onlyIf); - - if (typeof range !== "object") { - throw new InternalError().context( - "range must either be an object or undefined." - ); - } - const { offset, length, suffix } = range; - - if (offset !== undefined) { - if (typeof offset !== "number" || Number.isNaN(offset)) { - throw new InternalError().context( - "offset must either be a number or undefined." - ); - } - if (offset < 0) { - throw new InternalError().context( - "Invalid range. Starting offset must be greater than or equal to 0." - ); - } - } - if ( - (length !== undefined && typeof length !== "number") || - Number.isNaN(length) - ) { - throw new InternalError().context( - "length must either be a number or undefined." - ); - } - if ( - (suffix !== undefined && typeof suffix !== "number") || - Number.isNaN(suffix) - ) { - throw new InternalError().context( - "suffix must either be a number or undefined." - ); - } - return this; - } - - httpMetadata(httpMetadata?: R2HTTPMetadata): Validator { - if (httpMetadata === undefined) return this; - if (typeof httpMetadata !== "object") { - throw new InternalError().context( - "httpMetadata must be an object or undefined." - ); - } - for (const [key, value] of Object.entries(httpMetadata)) { - if (typeof value !== "string" && value !== undefined) { - throw new InvalidObjectName().context( - `${key}'s value must be a string or undefined.` - ); - } - } - return this; - } - - putOptions(options: R2PutOptions): Validator { - const { httpMetadata, customMetadata, md5 } = options; - - this.httpMetadata(httpMetadata); - - if (customMetadata !== undefined) { - if (typeof customMetadata !== "object") { - throw new InternalError().context( - "customMetadata must be an object or undefined." - ); - } - for (const v of Object.values(customMetadata)) { - if (typeof v !== "string") { - throw new InternalError().context( - "customMetadata values must be strings." - ); - } - } - } - - if (md5 !== undefined && typeof md5 !== "string") { - throw new InvalidDigest().context("md5 must be a string or undefined."); - } - return this; - } - - listOptions(options: R2ListOptions): Validator { - const { limit, prefix, cursor, delimiter, include } = options; - - if (limit !== undefined) { - if (typeof limit !== "number") { - throw new InternalError().context( - "limit must be a number or undefined." - ); - } - if (limit < 1 || limit > MAX_LIST_KEYS) { - throw new InvalidMaxKeys(); - } - } - if (prefix !== undefined && typeof prefix !== "string") { - throw new InternalError().context( - "prefix must be a string or undefined." - ); - } - if (cursor !== undefined && typeof cursor !== "string") { - throw new InternalError().context( - "cursor must be a string or undefined." - ); - } - if (delimiter !== undefined && typeof delimiter !== "string") { - throw new InternalError().context( - "delimiter must be a string or undefined." - ); - } - - if (include !== undefined) { - if (!Array.isArray(include)) { - throw new InternalError().context( - "include must be an array or undefined." - ); - } - for (const value of include) { - if (value !== "httpMetadata" && value !== "customMetadata") { - throw new InternalError().context( - "include values must be httpMetadata and/or customMetadata strings." - ); - } - } + limit(limit?: number): Validator { + if (limit !== undefined && (limit < 1 || limit > MAX_LIST_KEYS)) { + throw new InvalidMaxKeys(); } return this; } From cb705182b40b185c79e3db99fb27f2150bd78194 Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:43:21 +0000 Subject: [PATCH 04/15] Support batch `R2Bucket#delete()`ing keys Ref: https://community.cloudflare.com/t/2022-9-16-workers-runtime-release-notes/420496 --- packages/tre/src/plugins/r2/gateway.ts | 11 ++++++++--- packages/tre/src/plugins/r2/router.ts | 7 +++---- packages/tre/src/plugins/r2/schemas.ts | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index d8fa74a34..ee2065064 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -110,9 +110,14 @@ export class R2Gateway { return new R2Object(metadata); } - async delete(key: string) { - validate.key(key); - await this.storage.delete(key); + async delete(keys: string | string[]) { + if (Array.isArray(keys)) { + for (const key of keys) validate.key(key); + await this.storage.deleteMany(keys); + } else { + validate.key(keys); + await this.storage.delete(keys); + } } async list(listOptions: R2ListOptions = {}): Promise { diff --git a/packages/tre/src/plugins/r2/router.ts b/packages/tre/src/plugins/r2/router.ts index 5ea416766..d9e44bf98 100644 --- a/packages/tre/src/plugins/r2/router.ts +++ b/packages/tre/src/plugins/r2/router.ts @@ -1,4 +1,3 @@ -import assert from "assert"; import { TextDecoder } from "util"; import { Request, Response } from "../../http"; import { @@ -86,9 +85,9 @@ export class R2Router extends Router { const gateway = this.gatewayFactory.get(params.bucket, persist); if (metadata.method === "delete") { - assert("object" in metadata); - // TODO: support multiple delete - await gateway.delete(metadata.object); + await gateway.delete( + "object" in metadata ? metadata.object : metadata.objects + ); return new Response(); } else if (metadata.method === "put") { const result = await gateway.put(metadata.object, value, metadata); diff --git a/packages/tre/src/plugins/r2/schemas.ts b/packages/tre/src/plugins/r2/schemas.ts index 6feda178c..d5faa7dd3 100644 --- a/packages/tre/src/plugins/r2/schemas.ts +++ b/packages/tre/src/plugins/r2/schemas.ts @@ -175,7 +175,7 @@ export const R2DeleteRequestSchema = z.intersection( z.object({ method: z.literal("delete") }), z.union([ z.object({ object: z.string() }), - z.object({ objects: z.string().array() }), // TODO: support + z.object({ objects: z.string().array() }), ]) ); From 3383f592f14a05bc66c3f251709b01ae33679f59 Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:45:00 +0000 Subject: [PATCH 05/15] Support ranged-`R2Bucket#get()` with `Range` header Ref: https://community.cloudflare.com/t/2022-8-12-workers-runtime-release-notes/410873 --- packages/tre/src/plugins/r2/gateway.ts | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index ee2065064..666bb544c 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -1,6 +1,7 @@ import { z } from "zod"; import { Log } from "../../shared"; import { RangeStoredValueMeta, Storage } from "../../storage"; +import { _parseRanges } from "../shared"; import { InvalidRange, NoSuchKey } from "./errors"; import { R2Object, @@ -59,12 +60,26 @@ export class R2Gateway { key: string, options: R2GetOptions = {} ): Promise { - const { range = {}, onlyIf } = options; - validate.key(key).condition(await this.head(key), onlyIf); + const meta = await this.head(key); + validate.key(key).condition(meta, options.onlyIf); + + let range = options.range ?? {}; + if (options.rangeHeader !== undefined) { + const ranges = _parseRanges(options.rangeHeader, meta.size); + if (ranges?.length === 1) { + // If the header contained a single range, convert it to an R2Range. + // Note `start` and `end` are inclusive. + const [start, end] = ranges[0]; + range = { offset: start, length: end - start + 1 }; + } else { + // If the header was invalid, or contained multiple ranges, just return + // the full response + range = {}; + } + } let stored: RangeStoredValueMeta | undefined; - // get data dependent upon whether suffix or range exists try { stored = await this.storage.getRange(key, range); } catch { From cf5fef802a3c22ef0d3b16f1292023aa4d0651af Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:45:26 +0000 Subject: [PATCH 06/15] Return `range` from `R2Bucket#head()` Calling `head()` should return the full-range here --- packages/tre/src/plugins/r2/gateway.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index 666bb544c..261727602 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -52,6 +52,7 @@ export class R2Gateway { if (stored?.metadata === undefined) throw new NoSuchKey(); const { metadata } = stored; + metadata.range = { offset: 0, length: metadata.size }; return new R2Object(metadata); } From 67849eda6ecdccc5ca7215e0c7c9b53b3968fcf5 Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:46:12 +0000 Subject: [PATCH 07/15] Support `R2Bucket#list()` `startAfter` option Note this is returning slightly different, but still correct, results to the real implementation. This is due a restriction of Miniflare's storage abstraction, see the comment in `list()` for more details. We're planning to replace this very soon though, so this shouldn't be a problem for long. --- packages/tre/src/plugins/r2/gateway.ts | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index 261727602..790366259 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -139,18 +139,26 @@ export class R2Gateway { async list(listOptions: R2ListOptions = {}): Promise { validate.limit(listOptions.limit); - const { prefix = "", include = [], cursor = "" } = listOptions; + const { prefix = "", include = [], cursor = "", startAfter } = listOptions; let { delimiter, limit = MAX_LIST_KEYS } = listOptions; if (delimiter === "") delimiter = undefined; - // if include contains inputs, we reduce the limit to max 100 + // If include contains inputs, we reduce the limit to max 100 if (include.length > 0) limit = Math.min(limit, 100); + // If startAfter is set, we should increment the limit here, as `startAfter` + // is exclusive, but we're using inclusive `start` to implement it. + // Ideally we want to return `limit` number of keys here. However, doing + // this would be incompatible with the returned opaque `cursor`. Luckily, + // the R2 list contract allows us to return less than `limit` keys, as + // long as we set `truncated: true`. We'll fix this behaviour when we switch + // to the new storage system. const res = await this.storage.list({ prefix, limit, cursor, delimiter, + start: startAfter, }); const delimitedPrefixes = new Set(res.delimitedPrefixes ?? []); @@ -169,6 +177,11 @@ export class R2Gateway { return new R2Object(metadata); }); + if (startAfter !== undefined && objects[0]?.key === startAfter) { + // If the first key matched `startAfter`, remove it as this is exclusive + objects.splice(0, 1); + } + const cursorLength = res.cursor.length > 0; return { objects, From 6e2ca409e01849eb404d6ba690a441010fd39c12 Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 13:59:21 +0000 Subject: [PATCH 08/15] Support `SHA-*` checksums and return from `R2Bucket#{get,head}` Ref: https://community.cloudflare.com/t/2022-9-16-workers-runtime-release-notes/420496 Ref: cloudflare/workerd#103 --- packages/tre/src/plugins/r2/errors.ts | 25 +++++++++-------- packages/tre/src/plugins/r2/gateway.ts | 21 +++++++-------- packages/tre/src/plugins/r2/r2Object.ts | 27 +++++++++++++++++++ packages/tre/src/plugins/r2/schemas.ts | 8 +++--- packages/tre/src/plugins/r2/validator.ts | 34 ++++++++++++++++++++---- 5 files changed, 81 insertions(+), 34 deletions(-) diff --git a/packages/tre/src/plugins/r2/errors.ts b/packages/tre/src/plugins/r2/errors.ts index 56119f25e..a43422795 100644 --- a/packages/tre/src/plugins/r2/errors.ts +++ b/packages/tre/src/plugins/r2/errors.ts @@ -14,7 +14,6 @@ enum CfCode { InternalError = 10001, NoSuchObjectKey = 10007, EntityTooLarge = 100100, - InvalidDigest = 10014, InvalidObjectName = 10020, InvalidMaxKeys = 10022, InvalidArgument = 10029, @@ -110,21 +109,21 @@ export class EntityTooLarge extends R2Error { } } -export class InvalidDigest extends R2Error { - constructor() { - super( - Status.BadRequest, - "The Content-MD5 you specified is not valid.", - CfCode.InvalidDigest - ); - } -} - export class BadDigest extends R2Error { - constructor() { + constructor( + algorithm: "MD5" | "SHA-1" | "SHA-256" | "SHA-384" | "SHA-512", + provided: Buffer, + calculated: Buffer + ) { super( Status.BadRequest, - "The Content-MD5 you specified did not match what we received.", + [ + `The ${algorithm} checksum you specified did not match what we received.`, + `You provided a ${algorithm} checksum with value: ${provided.toString( + "hex" + )}`, + `Actual ${algorithm} was: ${calculated.toString("hex")}`, + ].join("\n"), CfCode.BadDigest ); } diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index 790366259..51861f3de 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -1,3 +1,4 @@ +import crypto from "crypto"; import { z } from "zod"; import { Log } from "../../shared"; import { RangeStoredValueMeta, Storage } from "../../storage"; @@ -101,28 +102,24 @@ export class R2Gateway { value: Uint8Array, options: R2PutOptions ): Promise { - const { customMetadata, md5, httpMetadata } = options; - - const hash = validate.key(key).size(value).md5(value, md5); + const checksums = validate.key(key).size(value).hash(value, options); // build metadata + const md5Hash = crypto.createHash("md5").update(value).digest("hex"); const metadata: R2ObjectMetadata = { key, size: value.byteLength, - etag: hash.toString("hex"), + etag: md5Hash, version: createVersion(), - httpEtag: `"${hash}"`, + httpEtag: `"${md5Hash}"`, uploaded: Date.now(), - httpMetadata: httpMetadata ?? {}, - customMetadata: customMetadata ?? {}, + httpMetadata: options.httpMetadata ?? {}, + customMetadata: options.customMetadata ?? {}, + checksums, }; // Store value with expiration and metadata - await this.storage.put(key, { - value, - metadata, - }); - + await this.storage.put(key, { value, metadata }); return new R2Object(metadata); } diff --git a/packages/tre/src/plugins/r2/r2Object.ts b/packages/tre/src/plugins/r2/r2Object.ts index ffaea9491..0962c3061 100644 --- a/packages/tre/src/plugins/r2/r2Object.ts +++ b/packages/tre/src/plugins/r2/r2Object.ts @@ -33,6 +33,9 @@ export interface R2ObjectMetadata { customMetadata: Record; // If a GET request was made with a range option, this will be added range?: R2Range; + // Hashes used to check the received object’s integrity. At most one can be + // specified. + checksums?: R2StringChecksums; } export interface EncodedMetadata { @@ -60,6 +63,7 @@ export class R2Object implements R2ObjectMetadata { readonly httpMetadata: R2HttpFields; readonly customMetadata: Record; readonly range?: R2Range; + readonly checksums: R2StringChecksums; constructor(metadata: R2ObjectMetadata) { this.key = metadata.key; @@ -71,6 +75,22 @@ export class R2Object implements R2ObjectMetadata { this.httpMetadata = metadata.httpMetadata; this.customMetadata = metadata.customMetadata; this.range = metadata.range; + + // For non-multipart uploads, we always need to store an MD5 hash in + // `checksums`, but never explicitly stored one. Luckily, `R2Bucket#put()` + // always makes `etag` an MD5 hash. + const checksums: R2StringChecksums = { ...metadata.checksums }; + const etag = metadata.etag; + if (etag.length === 32 && HEX_REGEXP.test(etag)) { + checksums.md5 = metadata.etag; + } else if (etag.length === 24 && BASE64_REGEXP.test(etag)) { + // TODO: remove this when we switch underlying storage mechanisms + // Previous versions of Miniflare 3 base64 encoded `etag` instead + checksums.md5 = Buffer.from(etag, "base64").toString("hex"); + } else { + assert.fail("Expected `etag` to be an MD5 hash"); + } + this.checksums = checksums; } // Format for return to the Workers Runtime @@ -83,6 +103,13 @@ export class R2Object implements R2ObjectMetadata { k, v, })), + checksums: { + 0: this.checksums.md5, + 1: this.checksums.sha1, + 2: this.checksums.sha256, + 3: this.checksums.sha384, + 4: this.checksums.sha512, + }, }; } diff --git a/packages/tre/src/plugins/r2/schemas.ts b/packages/tre/src/plugins/r2/schemas.ts index d5faa7dd3..6b0aed103 100644 --- a/packages/tre/src/plugins/r2/schemas.ts +++ b/packages/tre/src/plugins/r2/schemas.ts @@ -111,10 +111,10 @@ export const R2PutRequestSchema = z httpFields: R2HttpFieldsSchema.optional(), // (renamed in transform) onlyIf: R2ConditionalSchema.optional(), md5: Base64DataSchema.optional(), // (intentionally base64, not hex) // TODO: make sure we're testing this is base64 - sha1: HexDataSchema.optional(), // TODO: support - sha256: HexDataSchema.optional(), // TODO: support - sha384: HexDataSchema.optional(), // TODO: support - sha512: HexDataSchema.optional(), // TODO: support + sha1: HexDataSchema.optional(), + sha256: HexDataSchema.optional(), + sha384: HexDataSchema.optional(), + sha512: HexDataSchema.optional(), }) .transform((value) => ({ method: value.method, diff --git a/packages/tre/src/plugins/r2/validator.ts b/packages/tre/src/plugins/r2/validator.ts index 8ab262670..2d0797bc1 100644 --- a/packages/tre/src/plugins/r2/validator.ts +++ b/packages/tre/src/plugins/r2/validator.ts @@ -1,4 +1,5 @@ import crypto from "crypto"; +import { R2StringChecksums } from "@cloudflare/workers-types/experimental"; import { BadDigest, EntityTooLarge, @@ -62,14 +63,37 @@ function testR2Conditional( return true; } + +export const R2_HASH_ALGORITHMS = [ + { name: "MD5", field: "md5" }, + { name: "SHA-1", field: "sha1" }, + { name: "SHA-256", field: "sha256" }, + { name: "SHA-384", field: "sha384" }, + { name: "SHA-512", field: "sha512" }, +] as const; +export type R2Hashes = Record< + typeof R2_HASH_ALGORITHMS[number]["field"], + Buffer | undefined +>; + export class Validator { - md5(value: Uint8Array, md5?: Buffer): Buffer { - const md5Hash = crypto.createHash("md5").update(value).digest(); - if (md5 !== undefined && !md5.equals(md5Hash)) { - throw new BadDigest(); + hash(value: Uint8Array, hashes: R2Hashes): R2StringChecksums { + const checksums: R2StringChecksums = {}; + for (const { name, field } of R2_HASH_ALGORITHMS) { + const providedHash = hashes[field]; + if (providedHash !== undefined) { + const computedHash = crypto.createHash(field).update(value).digest(); + if (!providedHash.equals(computedHash)) { + throw new BadDigest(name, providedHash, computedHash); + } + // Store computed hash to ensure consistent casing in returned checksums + // from `R2Object` + checksums[field] = computedHash.toString("hex"); + } } - return md5Hash; + return checksums; } + condition(meta: R2Object, onlyIf?: R2Conditional): Validator { // test conditional should it exist if (!testR2Conditional(onlyIf, meta) || meta?.size === 0) { From fea05feb1a9f9a6661e8677915876477ba4084dc Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 14:01:22 +0000 Subject: [PATCH 09/15] Support conditional `R2Bucket#put()` --- packages/tre/src/plugins/r2/gateway.ts | 12 +++++++++++- packages/tre/src/plugins/r2/validator.ts | 9 +++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index 51861f3de..88ac2a559 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -102,7 +102,17 @@ export class R2Gateway { value: Uint8Array, options: R2PutOptions ): Promise { - const checksums = validate.key(key).size(value).hash(value, options); + let meta: R2Object | undefined; + try { + meta = await this.head(key); + } catch (e) { + if (!(e instanceof NoSuchKey)) throw e; + } + const checksums = validate + .key(key) + .size(value) + .condition(meta, options.onlyIf) + .hash(value, options); // build metadata const md5Hash = crypto.createHash("md5").update(value).digest("hex"); diff --git a/packages/tre/src/plugins/r2/validator.ts b/packages/tre/src/plugins/r2/validator.ts index 2d0797bc1..a60792d4f 100644 --- a/packages/tre/src/plugins/r2/validator.ts +++ b/packages/tre/src/plugins/r2/validator.ts @@ -94,10 +94,11 @@ export class Validator { return checksums; } - condition(meta: R2Object, onlyIf?: R2Conditional): Validator { - // test conditional should it exist - if (!testR2Conditional(onlyIf, meta) || meta?.size === 0) { - throw new PreconditionFailed().attach(meta); + condition(meta?: R2Object, onlyIf?: R2Conditional): Validator { + if (onlyIf !== undefined && !testR2Conditional(onlyIf, meta)) { + let error = new PreconditionFailed(); + if (meta !== undefined) error = error.attach(meta); + throw error; } return this; } From adcd453307fbd50ff776091e35b524d82241d54c Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 14:04:40 +0000 Subject: [PATCH 10/15] Support `secondsGranularity` in `R2Conditional` and add tests --- packages/tre/src/plugins/r2/index.ts | 1 + packages/tre/src/plugins/r2/validator.ts | 76 ++++++------ .../tre/test/plugins/r2/validator.spec.ts | 114 ++++++++++++++++++ 3 files changed, 150 insertions(+), 41 deletions(-) create mode 100644 packages/tre/test/plugins/r2/validator.spec.ts diff --git a/packages/tre/src/plugins/r2/index.ts b/packages/tre/src/plugins/r2/index.ts index dab218d66..4a653e8be 100644 --- a/packages/tre/src/plugins/r2/index.ts +++ b/packages/tre/src/plugins/r2/index.ts @@ -46,3 +46,4 @@ export const R2_PLUGIN: Plugin< export * from "./r2Object"; export * from "./gateway"; export * from "./schemas"; +export { _testR2Conditional } from "./validator"; diff --git a/packages/tre/src/plugins/r2/validator.ts b/packages/tre/src/plugins/r2/validator.ts index a60792d4f..a7a786a3e 100644 --- a/packages/tre/src/plugins/r2/validator.ts +++ b/packages/tre/src/plugins/r2/validator.ts @@ -17,51 +17,45 @@ const UNPAIRED_SURROGATE_PAIR_REGEX = /^(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])$/; const MAX_VALUE_SIZE = 5 * 1_000 * 1_000 * 1_000 - 5 * 1_000 * 1_000; -// false -> the condition testing "failed" -function testR2Conditional( - conditional?: R2Conditional, - metadata?: R2ObjectMetadata +function identity(ms: number) { + return ms; +} +function truncateToSeconds(ms: number) { + return Math.floor(ms / 1000) * 1000; +} + +// Returns `true` iff the condition passed +/** @internal */ +export function _testR2Conditional( + cond: R2Conditional, + metadata?: Pick ): boolean { - const { etagMatches, etagDoesNotMatch, uploadedBefore, uploadedAfter } = - conditional ?? {}; + // Adapted from internal R2 gateway implementation. + // See also https://datatracker.ietf.org/doc/html/rfc7232#section-6. - // If the object doesn't exist if (metadata === undefined) { - // the etagDoesNotMatch and uploadedBefore automatically pass - // etagMatches and uploadedAfter automatically fail if they exist - return etagMatches === undefined && uploadedAfter === undefined; - } - - const { etag, uploaded } = metadata; - - // ifMatch check - const ifMatch = etagMatches ? etagMatches === etag : null; - if (ifMatch === false) return false; - - // ifNoMatch check - const ifNoneMatch = etagDoesNotMatch ? etagDoesNotMatch !== etag : null; - - if (ifNoneMatch === false) return false; - - // ifUnmodifiedSince check - if ( - ifMatch !== true && // if "ifMatch" is true, we ignore date checking - uploadedBefore !== undefined && - uploaded > uploadedBefore.getTime() - ) { - return false; - } - - // ifModifiedSince check - if ( - ifNoneMatch !== true && // if "ifNoneMatch" is true, we ignore date checking - uploadedAfter !== undefined && - uploaded < uploadedAfter.getTime() - ) { - return false; + const ifMatch = cond.etagMatches === undefined; + const ifModifiedSince = cond.uploadedAfter === undefined; + return ifMatch && ifModifiedSince; } - return true; + const { etag, uploaded: lastModifiedRaw } = metadata; + const ifMatch = cond.etagMatches === undefined || cond.etagMatches === etag; + const ifNoneMatch = + cond.etagDoesNotMatch === undefined || cond.etagDoesNotMatch !== etag; + + const maybeTruncate = cond.secondsGranularity ? truncateToSeconds : identity; + const lastModified = maybeTruncate(lastModifiedRaw); + const ifModifiedSince = + cond.uploadedAfter === undefined || + maybeTruncate(cond.uploadedAfter.getTime()) < lastModified || + (cond.etagDoesNotMatch !== undefined && ifNoneMatch); + const ifUnmodifiedSince = + cond.uploadedBefore === undefined || + lastModified < maybeTruncate(cond.uploadedBefore.getTime()) || + (cond.etagMatches !== undefined && ifMatch); + + return ifMatch && ifNoneMatch && ifModifiedSince && ifUnmodifiedSince; } export const R2_HASH_ALGORITHMS = [ @@ -95,7 +89,7 @@ export class Validator { } condition(meta?: R2Object, onlyIf?: R2Conditional): Validator { - if (onlyIf !== undefined && !testR2Conditional(onlyIf, meta)) { + if (onlyIf !== undefined && !_testR2Conditional(onlyIf, meta)) { let error = new PreconditionFailed(); if (meta !== undefined) error = error.attach(meta); throw error; diff --git a/packages/tre/test/plugins/r2/validator.spec.ts b/packages/tre/test/plugins/r2/validator.spec.ts new file mode 100644 index 000000000..45d367bed --- /dev/null +++ b/packages/tre/test/plugins/r2/validator.spec.ts @@ -0,0 +1,114 @@ +import { R2Conditional } from "@cloudflare/workers-types/experimental"; +import { R2ObjectMetadata, _testR2Conditional } from "@miniflare/tre"; +import test from "ava"; + +test("testR2Conditional: matches various conditions", (t) => { + // Adapted from internal R2 gateway tests + const etag = "test"; + const badEtag = "not-test"; + + const uploadedDate = new Date("2023-02-24T00:09:00.500Z"); + const pastDate = new Date(uploadedDate.getTime() - 30_000); + const futureDate = new Date(uploadedDate.getTime() + 30_000); + + const metadata: Pick = { + etag, + uploaded: uploadedDate.getTime(), + }; + + const using = (cond: R2Conditional) => _testR2Conditional(cond, metadata); + const usingMissing = (cond: R2Conditional) => _testR2Conditional(cond); + + // Check single conditions + t.true(using({ etagMatches: etag })); + t.false(using({ etagMatches: badEtag })); + + t.true(using({ etagDoesNotMatch: badEtag })); + t.false(using({ etagDoesNotMatch: etag })); + + t.false(using({ uploadedBefore: pastDate })); + t.true(using({ uploadedBefore: futureDate })); + + t.true(using({ uploadedAfter: pastDate })); + t.false(using({ uploadedBefore: pastDate })); + + // Check multiple conditions that evaluate to false + t.false(using({ etagMatches: etag, etagDoesNotMatch: etag })); + t.false(using({ etagMatches: etag, uploadedAfter: futureDate })); + t.false( + using({ + // `etagMatches` pass makes `uploadedBefore` pass, but `uploadedAfter` fails + etagMatches: etag, + uploadedAfter: futureDate, + uploadedBefore: pastDate, + }) + ); + t.false(using({ etagDoesNotMatch: badEtag, uploadedBefore: pastDate })); + t.false( + using({ + // `etagDoesNotMatch` pass makes `uploadedAfter` pass, but `uploadedBefore` fails + etagDoesNotMatch: badEtag, + uploadedAfter: futureDate, + uploadedBefore: pastDate, + }) + ); + t.false( + using({ + etagMatches: badEtag, + etagDoesNotMatch: badEtag, + uploadedAfter: pastDate, + uploadedBefore: futureDate, + }) + ); + + // Check multiple conditions that evaluate to true + t.true(using({ etagMatches: etag, etagDoesNotMatch: badEtag })); + // `etagMatches` pass makes `uploadedBefore` pass + t.true(using({ etagMatches: etag, uploadedBefore: pastDate })); + // `etagDoesNotMatch` pass makes `uploadedAfter` pass + t.true(using({ etagDoesNotMatch: badEtag, uploadedAfter: futureDate })); + t.true( + using({ + // `etagMatches` pass makes `uploadedBefore` pass + etagMatches: etag, + uploadedBefore: pastDate, + // `etagDoesNotMatch` pass makes `uploadedAfter` pass + etagDoesNotMatch: badEtag, + uploadedAfter: futureDate, + }) + ); + t.true( + using({ + uploadedBefore: futureDate, + // `etagDoesNotMatch` pass makes `uploadedAfter` pass + etagDoesNotMatch: badEtag, + uploadedAfter: futureDate, + }) + ); + t.true( + using({ + uploadedAfter: pastDate, + // `etagMatches` pass makes `uploadedBefore` pass + etagMatches: etag, + uploadedBefore: pastDate, + }) + ); + + // Check missing metadata fails with either `etagMatches` and `uploadedAfter` + t.false(usingMissing({ etagMatches: etag })); + t.false(usingMissing({ uploadedAfter: pastDate })); + t.false(usingMissing({ etagMatches: etag, uploadedAfter: pastDate })); + t.true(usingMissing({ etagDoesNotMatch: etag })); + t.true(usingMissing({ uploadedBefore: pastDate })); + t.true(usingMissing({ etagDoesNotMatch: etag, uploadedBefore: pastDate })); + t.false(usingMissing({ etagMatches: etag, uploadedBefore: pastDate })); + t.false(usingMissing({ etagDoesNotMatch: etag, uploadedAfter: pastDate })); + + // Check with second granularity + const justPastDate = new Date(uploadedDate.getTime() - 250); + const justFutureDate = new Date(uploadedDate.getTime() + 250); + t.true(using({ uploadedAfter: justPastDate })); + t.false(using({ uploadedAfter: justPastDate, secondsGranularity: true })); + t.true(using({ uploadedBefore: justFutureDate })); + t.false(using({ uploadedBefore: justFutureDate, secondsGranularity: true })); +}); From e3a9527acd15667ba80691f1fa824ca7ca12745c Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 14:05:09 +0000 Subject: [PATCH 11/15] Remove unpaired surrogate pair regexp R2 key validation This restriction no longer seems to apply --- packages/tre/src/plugins/r2/validator.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/packages/tre/src/plugins/r2/validator.ts b/packages/tre/src/plugins/r2/validator.ts index a7a786a3e..acebe5498 100644 --- a/packages/tre/src/plugins/r2/validator.ts +++ b/packages/tre/src/plugins/r2/validator.ts @@ -12,9 +12,6 @@ import { R2Conditional } from "./schemas"; export const MAX_LIST_KEYS = 1_000; const MAX_KEY_SIZE = 1024; - -const UNPAIRED_SURROGATE_PAIR_REGEX = - /^(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])$/; const MAX_VALUE_SIZE = 5 * 1_000 * 1_000 * 1_000 - 5 * 1_000 * 1_000; function identity(ms: number) { @@ -106,11 +103,7 @@ export class Validator { } key(key: string): Validator { - // Check key isn't too long and exists outside regex const keyLength = Buffer.byteLength(key); - if (UNPAIRED_SURROGATE_PAIR_REGEX.test(key)) { - throw new InvalidObjectName(); - } if (keyLength >= MAX_KEY_SIZE) { throw new InvalidObjectName(); } From 06e54332503ddc1462585419e44cd0f3bc326d0f Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 14:05:28 +0000 Subject: [PATCH 12/15] Make `R2Object#version` a hex string --- packages/tre/src/plugins/r2/r2Object.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tre/src/plugins/r2/r2Object.ts b/packages/tre/src/plugins/r2/r2Object.ts index 0962c3061..525eb97ec 100644 --- a/packages/tre/src/plugins/r2/r2Object.ts +++ b/packages/tre/src/plugins/r2/r2Object.ts @@ -44,7 +44,7 @@ export interface EncodedMetadata { } export function createVersion(): string { - return crypto.randomBytes(24).toString("base64"); + return crypto.randomBytes(16).toString("hex"); } /** From 887afbdda1c6bf0febbc369ca34ca8002ca9d420 Mon Sep 17 00:00:00 2001 From: bcoll Date: Sat, 25 Feb 2023 14:06:21 +0000 Subject: [PATCH 13/15] Port Miniflare 2's R2 tests --- ava.config.mjs | 1 - package.json | 2 +- packages/tre/src/plugins/r2/schemas.ts | 2 +- packages/tre/test/plugins/r2/index.spec.ts | 1181 ++++++++++++++++++++ packages/tre/test/test-shared/asserts.ts | 18 + packages/tre/test/test-shared/log.ts | 12 +- packages/tre/test/test-shared/miniflare.ts | 4 +- 7 files changed, 1213 insertions(+), 7 deletions(-) create mode 100644 packages/tre/test/plugins/r2/index.spec.ts create mode 100644 packages/tre/test/test-shared/asserts.ts diff --git a/ava.config.mjs b/ava.config.mjs index 15314df2c..92dee03a2 100644 --- a/ava.config.mjs +++ b/ava.config.mjs @@ -9,7 +9,6 @@ const rewritePaths = Object.fromEntries( export default { files: ["packages/*/test/**/*.spec.ts"], - timeout: "5m", nodeArguments: ["--no-warnings", "--experimental-vm-modules"], typescript: { compile: false, diff --git a/package.json b/package.json index 0e418ec67..5f41c3c84 100644 --- a/package.json +++ b/package.json @@ -60,6 +60,6 @@ "node": ">=16.13" }, "volta": { - "node": "18.2.0" + "node": "16.13.0" } } diff --git a/packages/tre/src/plugins/r2/schemas.ts b/packages/tre/src/plugins/r2/schemas.ts index 6b0aed103..a67fc3b61 100644 --- a/packages/tre/src/plugins/r2/schemas.ts +++ b/packages/tre/src/plugins/r2/schemas.ts @@ -110,7 +110,7 @@ export const R2PutRequestSchema = z customFields: RecordSchema.optional(), // (renamed in transform) httpFields: R2HttpFieldsSchema.optional(), // (renamed in transform) onlyIf: R2ConditionalSchema.optional(), - md5: Base64DataSchema.optional(), // (intentionally base64, not hex) // TODO: make sure we're testing this is base64 + md5: Base64DataSchema.optional(), // (intentionally base64, not hex) sha1: HexDataSchema.optional(), sha256: HexDataSchema.optional(), sha384: HexDataSchema.optional(), diff --git a/packages/tre/test/plugins/r2/index.spec.ts b/packages/tre/test/plugins/r2/index.spec.ts new file mode 100644 index 000000000..91a26638a --- /dev/null +++ b/packages/tre/test/plugins/r2/index.spec.ts @@ -0,0 +1,1181 @@ +import assert from "assert"; +import { Blob } from "buffer"; +import crypto from "crypto"; +import path from "path"; +import { blob } from "stream/consumers"; +import { ReadableStream } from "stream/web"; +import type { + R2Bucket, + R2Checksums, + R2Conditional, + R2GetOptions, + R2HTTPMetadata, + R2ListOptions, + R2MultipartOptions, + R2MultipartUpload, + R2Object, + R2ObjectBody, + R2Objects, + R2PutOptions, + R2Range, + R2StringChecksums, + Blob as WorkerBlob, + Headers as WorkerHeaders, + Response as WorkerResponse, +} from "@cloudflare/workers-types/experimental"; +import { + File, + FileStorage, + FormData, + Headers, + Miniflare, + MiniflareOptions, + Response, + viewToArray, + viewToBuffer, +} from "@miniflare/tre"; +import { Macro, ThrowsExpectation } from "ava"; +import { z } from "zod"; +import { + MiniflareTestContext, + miniflareTest, + useTmp, + utf8Decode, +} from "../../test-shared"; +import { isWithin } from "../../test-shared/asserts"; + +function hash(value: string, algorithm = "md5") { + return crypto.createHash(algorithm).update(value).digest("hex"); +} + +// R2-like API for sending requests to the test worker. These tests were +// ported from Miniflare 2, which provided this API natively. + +type ReducedR2Object = Omit< + R2Object, + "checksums" | "uploaded" | "writeHttpMetadata" +> & { checksums: R2StringChecksums; uploaded: string }; +type ReducedR2ObjectBody = ReducedR2Object & { body: number }; + +async function deconstructResponse(res: Response): Promise { + const formData = await res.formData(); + const payload = formData.get("payload"); + assert(typeof payload === "string"); + return JSON.parse(payload, (key, value) => { + if (typeof value === "object" && value !== null && "$type" in value) { + if (value.$type === "R2Object") { + const object = value as ReducedR2Object; + return new TestR2Object(object); + } else if (value.$type === "R2ObjectBody") { + const objectBody = value as ReducedR2ObjectBody; + const body = formData.get(objectBody.body.toString()); + // noinspection SuspiciousTypeOfGuard + assert(body instanceof File); + return new TestR2ObjectBody(objectBody, body); + } else if (value.$type === "Date") { + return new Date(value.value); + } + // TODO: multipart upload + } + return value; + }); +} + +function maybeJsonStringify(value: unknown): string { + if (value == null) return ""; + return JSON.stringify(value, (key, value) => { + const dateResult = z.string().datetime().safeParse(value); + if (dateResult.success) { + return { $type: "Date", value: new Date(dateResult.data).getTime() }; + } + if (value instanceof Headers) { + return { $type: "Headers", entries: [...value] }; + } + return value; + }); +} + +class TestR2Bucket implements R2Bucket { + constructor(private readonly mf: Miniflare, private readonly ns = "") {} + + async head(key: string): Promise { + const url = new URL(this.ns + key, "http://localhost"); + const res = await this.mf.dispatchFetch(url, { + method: "GET", + headers: { + Accept: "multipart/form-data", + "Test-Method": "HEAD", + }, + }); + return deconstructResponse(res); + } + + get( + key: string, + options: R2GetOptions & { + onlyIf: R2Conditional | Headers; + } + ): Promise; + get(key: string, options?: R2GetOptions): Promise; + async get( + key: string, + options?: R2GetOptions + ): Promise { + const url = `http://localhost/${encodeURIComponent(this.ns + key)}`; + const res = await this.mf.dispatchFetch(url, { + method: "GET", + headers: { + Accept: "multipart/form-data", + "Test-Options": maybeJsonStringify(options), + }, + }); + return deconstructResponse(res); + } + + // @ts-expect-error `@cloudflare/workers-type`'s `ReadableStream` type is + // incompatible with Node's + async put( + key: string, + value: + | ReadableStream + | ArrayBuffer + | ArrayBufferView + | string + | null + | Blob, + options?: R2PutOptions + ): Promise { + const url = `http://localhost/${encodeURIComponent(this.ns + key)}`; + const res = await this.mf.dispatchFetch(url, { + method: "PUT", + headers: { + Accept: "multipart/form-data", + "Test-Options": maybeJsonStringify(options), + }, + body: ArrayBuffer.isView(value) ? viewToArray(value) : value, + }); + return deconstructResponse(res); + } + + async delete(keys: string | string[]): Promise { + if (Array.isArray(keys)) keys = keys.map((key) => this.ns + key); + else keys = this.ns + keys; + await this.mf.dispatchFetch("http://localhost", { + method: "DELETE", + body: JSON.stringify(keys), + headers: { Accept: "multipart/form-data" }, + }); + } + + async list(options?: R2ListOptions): Promise { + const res = await this.mf.dispatchFetch("http://localhost", { + method: "GET", + headers: { + Accept: "multipart/form-data", + "Test-Method": "LIST", + "Test-Options": maybeJsonStringify(options), + }, + }); + return deconstructResponse(res); + } + + createMultipartUpload( + _key: string, + _options?: R2MultipartOptions + ): Promise { + assert.fail("TestR2Bucket#createMultipartUpload() not yet implemented"); + } + + resumeMultipartUpload(_key: string, _uploadId: string): R2MultipartUpload { + assert.fail("TestR2Bucket#resumeMultipartUpload() not yet implemented"); + } +} + +class TestR2Checksums implements R2Checksums { + readonly md5?: ArrayBuffer; + readonly sha1?: ArrayBuffer; + readonly sha256?: ArrayBuffer; + readonly sha384?: ArrayBuffer; + readonly sha512?: ArrayBuffer; + + constructor(private readonly checksums: R2StringChecksums) { + this.md5 = this.#decode(checksums.md5); + this.sha1 = this.#decode(checksums.sha1); + this.sha256 = this.#decode(checksums.sha256); + this.sha384 = this.#decode(checksums.sha384); + this.sha512 = this.#decode(checksums.sha512); + } + + #decode(checksum?: string) { + return checksum === undefined + ? undefined + : viewToBuffer(Buffer.from(checksum, "hex")); + } + + toJSON(): R2StringChecksums { + return this.checksums; + } +} + +class TestR2Object implements R2Object { + readonly key: string; + readonly version: string; + readonly size: number; + readonly etag: string; + readonly httpEtag: string; + readonly checksums: R2Checksums; + readonly uploaded: Date; + readonly httpMetadata?: R2HTTPMetadata; + readonly customMetadata?: Record; + readonly range?: R2Range; + + constructor(object: ReducedR2Object) { + this.key = object.key; + this.version = object.version; + this.size = object.size; + this.etag = object.etag; + this.httpEtag = object.httpEtag; + this.checksums = new TestR2Checksums(object.checksums); + this.uploaded = new Date(object.uploaded); + this.httpMetadata = object.httpMetadata; + this.customMetadata = object.customMetadata; + this.range = object.range; + } + + writeHttpMetadata(_headers: Headers): void { + // Fully-implemented by `workerd` + assert.fail("TestR2Object#writeHttpMetadata() not implemented"); + } +} + +class TestR2ObjectBody extends TestR2Object implements R2ObjectBody { + constructor(object: ReducedR2Object, readonly body: Blob) { + super(object); + } + + get bodyUsed(): boolean { + // Fully-implemented by `workerd` + assert.fail("TestR2Object#writeHttpMetadata() not implemented"); + return false; // TypeScript requires `get` accessors return + } + + arrayBuffer(): Promise { + return this.body.arrayBuffer(); + } + text(): Promise { + return this.body.text(); + } + async json(): Promise { + return JSON.parse(await this.body.text()); + } + // @ts-expect-error `@cloudflare/workers-type`'s `Blob` type is incompatible + // with Node's + blob(): Promise { + return Promise.resolve(this.body); + } +} + +interface Context extends MiniflareTestContext { + ns: string; + r2: TestR2Bucket; +} + +const opts: Partial = { + r2Buckets: { BUCKET: "bucket" }, + compatibilityFlags: ["r2_list_honor_include"], +}; +const test = miniflareTest<{ BUCKET: R2Bucket }, Context>( + opts, + async (global, request, env) => { + function maybeJsonParse(value: string | null): any { + if (value === null || value === "") return; + return JSON.parse(value, (key, value) => { + if (typeof value === "object" && value !== null && "$type" in value) { + if (value.$type === "Date") { + return new Date(value.value); + } + if (value.$type === "Headers") { + return new global.Headers(value.entries); + } + } + return value; + }); + } + + function reduceR2Object( + value: R2Object + ): ReducedR2Object & { $type: "R2Object" } { + return { + $type: "R2Object", + key: value.key, + version: value.version, + size: value.size, + etag: value.etag, + httpEtag: value.httpEtag, + checksums: value.checksums.toJSON(), + uploaded: value.uploaded.toISOString(), + httpMetadata: value.httpMetadata, + customMetadata: value.customMetadata, + range: value.range, + }; + } + async function constructResponse(thing: any): Promise { + // Stringify `thing` as JSON, replacing `R2Object(Body)`s with a + // plain-object representation. Reading bodies is asynchronous, but + // `JSON.stringify`-replacers must be synchronous, so record body + // reading `Promise`s, and attach the bodies in `FormData`. + const bodyPromises: Promise[] = []; + const payload = JSON.stringify(thing, (key, value) => { + if (typeof value === "object" && value !== null) { + // https://github.com/cloudflare/workerd/blob/c336d404a5fbe2c779b28a6ca54c338f89e2fea1/src/workerd/api/r2-bucket.h#L202 + if (value.constructor?.name === "HeadResult" /* R2Object */) { + const object = value as R2Object; + return reduceR2Object(object); + } + // https://github.com/cloudflare/workerd/blob/c336d404a5fbe2c779b28a6ca54c338f89e2fea1/src/workerd/api/r2-bucket.h#L255 + if (value.constructor?.name === "GetResult" /* R2ObjectBody */) { + const objectBody = value as R2ObjectBody; + const object = reduceR2Object(objectBody); + const bodyId = bodyPromises.length; + // Test bodies shouldn't be too big, so buffering them is fine + bodyPromises.push(objectBody.blob()); + return { ...object, $type: "R2ObjectBody", body: bodyId }; + } + // TODO: R2MultipartUpload + } + + if ( + typeof value === "string" && + // https://github.com/colinhacks/zod/blob/981af6503ee1be530fe525ac77ba95e1904ce24a/src/types.ts#L562 + /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/.test(value) + ) { + return { $type: "Date", value: new Date(value).getTime() }; + } + + return value; + }); + + // Construct `FormData` containing JSON-payload and all bodies + const formData = new global.FormData(); + formData.set("payload", payload); + const bodies = await Promise.all(bodyPromises); + bodies.forEach((body, i) => formData.set(i.toString(), body)); + + return new global.Response(formData); + } + + // Actual `HEAD` requests can't return bodies, but we'd like them to. + // Also, `LIST` is not a valid HTTP method. + const method = request.headers.get("Test-Method") ?? request.method; + const { pathname } = new URL(request.url); + const key = decodeURIComponent(pathname.substring(1)); + if (method === "HEAD") { + return constructResponse(await env.BUCKET.head(key)); + } else if (method === "GET") { + const optionsHeader = request.headers.get("Test-Options"); + const options = maybeJsonParse(optionsHeader); + return constructResponse(await env.BUCKET.get(key, options)); + } else if (method === "PUT") { + const optionsHeader = request.headers.get("Test-Options"); + const options = maybeJsonParse(optionsHeader); + return constructResponse( + await env.BUCKET.put(key, await request.arrayBuffer(), options) + ); + } else if (method === "DELETE") { + const keys = await request.json(); + await env.BUCKET.delete(keys); + return new global.Response(null, { status: 204 }); + } else if (method === "LIST") { + const optionsHeader = request.headers.get("Test-Options"); + const options = maybeJsonParse(optionsHeader); + return constructResponse(await env.BUCKET.list(options)); + } + + return new global.Response(null, { status: 405 }); + } +); +test.beforeEach((t) => { + // Namespace keys so tests which are accessing the same Miniflare instance + // and bucket don't have races from key collisions + const ns = `${Date.now()}_${Math.floor( + Math.random() * Number.MAX_SAFE_INTEGER + )}`; + t.context.ns = ns; + t.context.r2 = new TestR2Bucket(t.context.mf, ns); +}); + +const validatesKeyMacro: Macro< + [ + { + method: string; + f: (r2: TestR2Bucket, key?: any) => Promise; + } + ], + Context +> = { + title(providedTitle, { method }) { + return providedTitle ?? `${method}: validates key`; + }, + async exec(t, { method, f }) { + const { r2, ns } = t.context; + await t.throwsAsync(f(r2, "x".repeat(1025 - ns.length)), { + instanceOf: Error, + message: `${method}: The specified object name is not valid. (10020)`, + }); + }, +}; + +test("head: returns null for non-existent keys", async (t) => { + const { r2 } = t.context; + t.is(await r2.head("key"), null); +}); +test("head: returns metadata for existing keys", async (t) => { + const { r2, ns } = t.context; + const start = Date.now(); + await r2.put("key", "value", { + httpMetadata: { + contentType: "text/plain", + contentLanguage: "en-GB", + contentDisposition: 'attachment; filename="value.txt"', + contentEncoding: "gzip", + cacheControl: "max-age=3600", + cacheExpiry: new Date("Fri, 24 Feb 2023 00:00:00 GMT"), + }, + customMetadata: { key: "value" }, + }); + const object = await r2.head("key"); + assert(object !== null); + t.is(object.key, `${ns}key`); + t.regex(object.version, /^[0-9a-f]{32}$/); + t.is(object.size, "value".length); + t.is(object.etag, "2063c1608d6e0baf80249c42e2be5804"); + t.is(object.httpEtag, `"2063c1608d6e0baf80249c42e2be5804"`); + t.deepEqual(object.checksums.toJSON(), { + md5: "2063c1608d6e0baf80249c42e2be5804", + }); + t.deepEqual(object.httpMetadata, { + contentType: "text/plain", + contentLanguage: "en-GB", + contentDisposition: 'attachment; filename="value.txt"', + contentEncoding: "gzip", + cacheControl: "max-age=3600", + cacheExpiry: new Date("Fri, 24 Feb 2023 00:00:00 GMT"), + }); + t.deepEqual(object.customMetadata, { key: "value" }); + t.deepEqual(object.range, { offset: 0, length: 5 }); + isWithin(t, 3000, object.uploaded.getTime(), start); +}); +test(validatesKeyMacro, { method: "head", f: (r2, key) => r2.head(key) }); + +test("get: returns null for non-existent keys", async (t) => { + const { r2 } = t.context; + t.is(await r2.get("key"), null); +}); +test("get: returns metadata and body for existing keys", async (t) => { + const { r2, ns } = t.context; + const start = Date.now(); + await r2.put("key", "value", { + httpMetadata: { + contentType: "text/plain", + contentLanguage: "en-GB", + contentDisposition: 'attachment; filename="value.txt"', + contentEncoding: "gzip", + cacheControl: "max-age=3600", + cacheExpiry: new Date("Fri, 24 Feb 2023 00:00:00 GMT"), + }, + customMetadata: { key: "value" }, + }); + const body = await r2.get("key"); + assert(body !== null); + t.is(body.key, `${ns}key`); + t.regex(body.version, /^[0-9a-f]{32}$/); + t.is(body.size, "value".length); + t.is(body.etag, "2063c1608d6e0baf80249c42e2be5804"); + t.is(body.httpEtag, `"2063c1608d6e0baf80249c42e2be5804"`); + t.deepEqual(body.checksums.toJSON(), { + md5: "2063c1608d6e0baf80249c42e2be5804", + }); + t.deepEqual(body.httpMetadata, { + contentType: "text/plain", + contentLanguage: "en-GB", + contentDisposition: 'attachment; filename="value.txt"', + contentEncoding: "gzip", + cacheControl: "max-age=3600", + cacheExpiry: new Date("Fri, 24 Feb 2023 00:00:00 GMT"), + }); + t.deepEqual(body.customMetadata, { key: "value" }); + t.deepEqual(body.range, { offset: 0, length: 5 }); + isWithin(t, 3000, body.uploaded.getTime(), start); +}); +test(validatesKeyMacro, { method: "get", f: (r2, key) => r2.get(key) }); +test("get: range using object", async (t) => { + const { r2 } = t.context; + await r2.put("key", "value"); + + // Check with offset + let body = await r2.get("key", { range: { offset: 3 } }); + assert(body !== null); + t.deepEqual(body.range, { offset: 3, length: 2 }); + t.is(await body.text(), "ue"); + + // Check with length + body = await r2.get("key", { range: { length: 3 } }); + assert(body !== null); + t.deepEqual(body.range, { offset: 0, length: 3 }); + t.is(await body.text(), "val"); + // Check with overflowing length + body = await r2.get("key", { range: { length: 42 } }); + assert(body !== null); + t.deepEqual(body.range, { offset: 0, length: 5 }); + t.is(await body.text(), "value"); + + // Check with offset and length + body = await r2.get("key", { range: { offset: 1, length: 3 } }); + assert(body !== null); + t.deepEqual(body.range, { offset: 1, length: 3 }); + t.is(await body.text(), "alu"); + + // Check with suffix + body = await r2.get("key", { range: { suffix: 3 } }); + assert(body !== null); + t.deepEqual(body.range, { offset: 2, length: 3 }); + t.is(await body.text(), "lue"); + // Check with underflowing suffix + body = await r2.get("key", { range: { suffix: 42 } }); + assert(body !== null); + t.deepEqual(body.range, { offset: 0, length: 5 }); + t.is(await body.text(), "value"); + + // Check unsatisfiable ranges + const expectations: ThrowsExpectation = { + instanceOf: Error, + message: "get: The requested range is not satisfiable (10039)", + }; + await t.throwsAsync(r2.get("key", { range: { offset: 42 } }), expectations); + await t.throwsAsync(r2.get("key", { range: { length: 0 } }), expectations); + await t.throwsAsync(r2.get("key", { range: { suffix: 0 } }), expectations); + // `workerd` will validate all numbers are positive, and suffix not mixed with + // offset or length: + // https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L239-L265 +}); +test('get: range using "Range" header', async (t) => { + const { r2 } = t.context; + const value = "abcdefghijklmnopqrstuvwxyz"; + await r2.put("key", value); + const range = new Headers() as WorkerHeaders; + + // Check missing "Range" header returns full response + let body = await r2.get("key", { range }); + assert(body !== null); + t.is(await body.text(), value); + t.deepEqual(body.range, { offset: 0, length: 26 }); + + // Check "Range" with start and end returns partial response + range.set("Range", "bytes=3-6"); + body = await r2.get("key", { range }); + assert(body !== null); + t.is(await body.text(), "defg"); + t.deepEqual(body.range, { offset: 3, length: 4 }); + + // Check "Range" with just start returns partial response + range.set("Range", "bytes=10-"); + body = await r2.get("key", { range }); + assert(body !== null); + t.is(await body.text(), "klmnopqrstuvwxyz"); + t.deepEqual(body.range, { offset: 10, length: 16 }); + + // Check "Range" with just end returns partial response + range.set("Range", "bytes=-5"); + body = await r2.get("key", { range }); + assert(body !== null); + t.is(await body.text(), "vwxyz"); + t.deepEqual(body.range, { offset: 21, length: 5 }); + + // Check "Range" with multiple ranges returns full response + range.set("Range", "bytes=5-6,10-11"); + body = await r2.get("key", { range }); + assert(body !== null); + t.is(await body.text(), value); + t.deepEqual(body.range, { offset: 0, length: 26 }); +}); +test("get: returns body only if passes onlyIf", async (t) => { + const { r2 } = t.context; + const pastDate = new Date(Date.now() - 60_000); + await r2.put("key", "value"); + const futureDate = new Date(Date.now() + 60_000); + const etag = hash("value"); + const badEtag = hash("👻"); + + // `workerd` will handle extracting `onlyIf`s from `Header`s: + // https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L195-L201 + // Only doing basic tests here, more complex tests are in validator.spec.ts + + const pass = async (cond: R2Conditional) => { + const object = await r2.get("key", { onlyIf: cond }); + t.not(object, null); + t.true(object instanceof TestR2ObjectBody); + }; + const fail = async (cond: R2Conditional) => { + const object = await r2.get("key", { onlyIf: cond }); + t.not(object, null); + // Can't test if `object instanceof TestR2Object` as + // `TestR2ObjectBody extends TestR2Object` + t.false(object instanceof TestR2ObjectBody); + }; + + await pass({ etagMatches: etag }); + await fail({ etagMatches: badEtag }); + + await fail({ etagDoesNotMatch: etag }); + await pass({ etagDoesNotMatch: badEtag }); + + await pass({ uploadedBefore: futureDate }); + await fail({ uploadedBefore: pastDate }); + + await fail({ uploadedAfter: futureDate }); + await pass({ uploadedAfter: pastDate }); +}); + +test("put: returns metadata for created object", async (t) => { + const { r2, ns } = t.context; + const start = Date.now(); + // `workerd` will handle extracting `httpMetadata`s from `Header`s: + // https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L410-L420 + const object = await r2.put("key", "value", { + httpMetadata: { + contentType: "text/plain", + contentLanguage: "en-GB", + contentDisposition: 'attachment; filename="value.txt"', + contentEncoding: "gzip", + cacheControl: "max-age=3600", + cacheExpiry: new Date("Fri, 24 Feb 2023 00:00:00 GMT"), + }, + customMetadata: { key: "value" }, + }); + t.is(object.key, `${ns}key`); + t.regex(object.version, /^[0-9a-f]{32}$/); + t.is(object.size, "value".length); + t.is(object.etag, "2063c1608d6e0baf80249c42e2be5804"); + t.is(object.httpEtag, `"2063c1608d6e0baf80249c42e2be5804"`); + t.deepEqual(object.checksums.toJSON(), { + md5: "2063c1608d6e0baf80249c42e2be5804", + }); + t.deepEqual(object.httpMetadata, { + contentType: "text/plain", + contentLanguage: "en-GB", + contentDisposition: 'attachment; filename="value.txt"', + contentEncoding: "gzip", + cacheControl: "max-age=3600", + cacheExpiry: new Date("Fri, 24 Feb 2023 00:00:00 GMT"), + }); + t.deepEqual(object.customMetadata, { key: "value" }); + t.is(object.range, undefined); + isWithin(t, 3000, object.uploaded.getTime(), start); +}); +test("put: overrides existing keys", async (t) => { + const { r2 } = t.context; + await r2.put("key", "value1"); + await r2.put("key", "value2"); + const body = await r2.get("key"); + assert(body !== null); + t.is(await body.text(), "value2"); +}); +test(validatesKeyMacro, { method: "put", f: (r2, key) => r2.put(key, "v") }); +test("put: validates checksums", async (t) => { + const { r2 } = t.context; + const expectations = ( + name: string, + provided: string, + expected: string + ): ThrowsExpectation => ({ + instanceOf: Error, + message: [ + `put: The ${name} checksum you specified did not match what we received.`, + `You provided a ${name} checksum with value: ${provided}`, + `Actual ${name} was: ${expected} (10037)`, + ].join("\n"), + }); + + // `workerd` validates types, hex strings, hash lengths and that we're only + // specifying one hash: + // https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L441-L520 + + // Check only stores is computed hash matches + const md5 = hash("value", "md5"); + await r2.put("key", "value", { md5 }); + const badMd5 = md5.replace("0", "1"); + await t.throwsAsync( + r2.put("key", "value", { md5: badMd5 }), + expectations("MD5", badMd5, md5) + ); + let checksums = (await r2.head("key"))?.checksums.toJSON(); + t.deepEqual(checksums, { md5 }); + + const sha1 = hash("value", "sha1"); + await r2.put("key", "value", { sha1 }); + const badSha1 = sha1.replace("0", "1"); + await t.throwsAsync( + r2.put("key", "value", { sha1: badSha1 }), + expectations("SHA-1", badSha1, sha1) + ); + // Check `get()` returns checksums + checksums = (await r2.get("key"))?.checksums.toJSON(); + t.deepEqual(checksums, { md5, sha1 }); + + const sha256 = hash("value", "sha256"); + // Check always stores lowercase hash + await r2.put("key", "value", { sha256: sha256.toUpperCase() }); + const badSha256 = sha256.replace("0", "1"); + await t.throwsAsync( + r2.put("key", "value", { sha256: badSha256 }), + expectations("SHA-256", badSha256, sha256) + ); + checksums = (await r2.head("key"))?.checksums.toJSON(); + t.deepEqual(checksums, { md5, sha256 }); + + const sha384 = hash("value", "sha384"); + await r2.put("key", "value", { sha384 }); + const badSha384 = sha384.replace("0", "1"); + await t.throwsAsync( + r2.put("key", "value", { sha384: badSha384 }), + expectations("SHA-384", badSha384, sha384) + ); + checksums = (await r2.head("key"))?.checksums.toJSON(); + t.deepEqual(checksums, { md5, sha384 }); + + const sha512 = hash("value", "sha512"); + await r2.put("key", "value", { sha512 }); + const badSha512 = sha512.replace("0", "1"); + await t.throwsAsync( + r2.put("key", "value", { sha512: badSha512 }), + expectations("SHA-512", badSha512, sha512) + ); + checksums = (await r2.head("key"))?.checksums.toJSON(); + t.deepEqual(checksums, { md5, sha512 }); +}); +test("put: stores only if passes onlyIf", async (t) => { + const { r2 } = t.context; + const pastDate = new Date(Date.now() - 60_000); + const futureDate = new Date(Date.now() + 300_000); + const etag = hash("1"); + const badEtag = hash("👻"); + + const reset = () => r2.put("key", "1"); + await reset(); + + const pass = async (cond: R2Conditional) => { + const object = await r2.put("key", "2", { onlyIf: cond }); + t.not(object, null); + t.is(await (await r2.get("key"))?.text(), "2"); + await reset(); + }; + const fail = async (cond: R2Conditional) => { + const object = await r2.put("key", "2", { onlyIf: cond }); + t.is(object as R2Object | null, null); + t.is(await (await r2.get("key"))?.text(), "1"); + // No `reset()` as we've just checked we didn't update anything + }; + + // `workerd` will handle extracting `onlyIf`s from `Header`s: + // https://github.com/cloudflare/workerd/blob/4290f9717bc94647d9c8afd29602cdac97fdff1b/src/workerd/api/r2-bucket.c%2B%2B#L195-L201 + // Only doing basic tests here, more complex tests are in validator.spec.ts + + await pass({ etagMatches: etag }); + await fail({ etagMatches: badEtag }); + + await fail({ etagDoesNotMatch: etag }); + await pass({ etagDoesNotMatch: badEtag }); + + await pass({ uploadedBefore: futureDate }); + await fail({ uploadedBefore: pastDate }); + + await fail({ uploadedAfter: futureDate }); + await pass({ uploadedAfter: pastDate }); + + // Check non-existent key with failed condition + const object = await r2.put("no-key", "2", { onlyIf: { etagMatches: etag } }); + t.is(object as R2Object | null, null); +}); + +test("delete: deletes existing keys", async (t) => { + const { r2 } = t.context; + + // Check does nothing with non-existent key + await r2.delete("key"); + + // Check deletes single key + await r2.put("key", "value"); + t.not(await r2.head("key"), null); + await r2.delete("key"); + t.is(await r2.head("key"), null); + + // Check deletes multiple keys, skipping non-existent keys + await r2.put("key1", "value1"); + await r2.put("key2", "value2"); + await r2.put("key3", "value3"); + await r2.delete(["key1", "key200", "key3"]); + t.is(await r2.head("key1"), null); + t.not(await r2.head("key2"), null); + t.is(await r2.head("key3"), null); +}); +test(validatesKeyMacro, { method: "delete", f: (r2, key) => r2.delete(key) }); +test("delete: validates keys", validatesKeyMacro, { + method: "delete", + f: (r2, key) => r2.delete(["valid key", key]), +}); + +const listMacro: Macro< + [ + { + keys: string[]; + options?: R2ListOptions; + pages: string[][]; + } + ], + Context +> = { + title(providedTitle) { + return `list: ${providedTitle}`; + }, + async exec(t, { keys, options, pages }) { + const { r2, ns } = t.context; + + // Seed bucket + for (let i = 0; i < keys.length; i++) await r2.put(keys[i], `value${i}`); + + let lastCursor: string | undefined; + for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) { + const { objects, truncated, cursor } = await r2.list({ + ...options, + prefix: ns + (options?.prefix ?? ""), + cursor: options?.cursor ?? lastCursor, + startAfter: options?.startAfter ? ns + options.startAfter : undefined, + }); + + // Check objects in page match + const objectKeys = objects.map(({ key }) => key.substring(ns.length)); + const expectedKeys = pages[pageIndex]; + t.deepEqual(objectKeys, expectedKeys, `Unexpected page ${pageIndex}`); + + // Check other return values and advance cursor to next page + if (pageIndex === pages.length - 1) { + // Last Page + t.false(truncated); + t.is(cursor, undefined); + } else { + t.true(truncated); + t.not(cursor, undefined); + } + lastCursor = cursor; + } + }, +}; +test("lists keys in sorted order", listMacro, { + keys: ["key3", "key1", "key2", ", ", "!"], + pages: [["!", ", ", "key1", "key2", "key3"]], +}); +test("lists keys matching prefix", listMacro, { + keys: ["section1key1", "section1key2", "section2key1"], + options: { prefix: "section1" }, + pages: [["section1key1", "section1key2"]], +}); +test("returns an empty list with no keys", listMacro, { + keys: [], + pages: [[]], +}); +test("returns an empty list with no matching keys", listMacro, { + keys: ["key1", "key2", "key3"], + options: { prefix: "none" }, + pages: [[]], +}); +test("returns an empty list with an invalid cursor", listMacro, { + keys: ["key1", "key2", "key3"], + options: { cursor: "bad" }, + pages: [[]], +}); +test("paginates keys", listMacro, { + keys: ["key1", "key2", "key3"], + options: { limit: 2 }, + pages: [["key1", "key2"], ["key3"]], +}); +test("paginates keys matching prefix", listMacro, { + keys: ["section1key1", "section1key2", "section1key3", "section2key1"], + options: { prefix: "section1", limit: 2 }, + pages: [["section1key1", "section1key2"], ["section1key3"]], +}); +test("lists keys starting from startAfter exclusive", listMacro, { + keys: ["key1", "key2", "key3", "key4"], + options: { startAfter: "key2" }, + pages: [["key3", "key4"]], +}); +test( + "lists keys with startAfter and limit (where startAfter matches key)", + listMacro, + { + keys: ["key1", "key2", "key3", "key4"], + options: { startAfter: "key1", limit: 2 }, + // TODO(soon): this should be `[["key2", "key3"], ["key4"]]`, see comment in + // `gateway.ts` for details, we'll fix this with the new storage system + pages: [["key2"], ["key3", "key4"]], + } +); +test( + "lists keys with startAfter and limit (where startAfter doesn't match key)", + listMacro, + { + keys: ["key1", "key2", "key3", "key4"], + options: { startAfter: "key", limit: 2 }, + pages: [ + ["key1", "key2"], + ["key3", "key4"], + ], + } +); + +test("list: returns metadata with objects", async (t) => { + const { r2, ns } = t.context; + const start = Date.now(); + await r2.put("key", "value"); + const { objects } = await r2.list({ prefix: ns }); + t.is(objects.length, 1); + const object = objects[0]; + t.is(object.key, `${ns}key`); + t.regex(object.version, /^[0-9a-f]{32}$/); + t.is(object.size, "value".length); + t.is(object.etag, "2063c1608d6e0baf80249c42e2be5804"); + t.is(object.httpEtag, `"2063c1608d6e0baf80249c42e2be5804"`); + t.deepEqual(object.checksums.toJSON(), { + md5: "2063c1608d6e0baf80249c42e2be5804", + }); + t.deepEqual(object.httpMetadata, {}); + t.deepEqual(object.customMetadata, {}); + t.is(object.range, undefined); + isWithin(t, 3000, object.uploaded.getTime(), start); +}); +test("list: paginates with variable limit", async (t) => { + const { r2, ns } = t.context; + await r2.put("key1", "value1"); + await r2.put("key2", "value2"); + await r2.put("key3", "value3"); + + // Get first page + let result = await r2.list({ prefix: ns, limit: 1 }); + t.is(result.objects.length, 1); + t.is(result.objects[0].key, `${ns}key1`); + t.true(result.truncated); + t.not(result.cursor, undefined); + + // Get second page with different limit + result = await r2.list({ prefix: ns, limit: 2, cursor: result.cursor }); + t.is(result.objects.length, 2); + t.is(result.objects[0].key, `${ns}key2`); + t.is(result.objects[1].key, `${ns}key3`); + t.false(result.truncated); + t.is(result.cursor, undefined); +}); +test("list: returns keys inserted whilst paginating", async (t) => { + const { r2, ns } = t.context; + await r2.put("key1", "value1"); + await r2.put("key3", "value3"); + await r2.put("key5", "value5"); + + // Get first page + let result = await r2.list({ prefix: ns, limit: 2 }); + t.is(result.objects.length, 2); + t.is(result.objects[0].key, `${ns}key1`); + t.is(result.objects[1].key, `${ns}key3`); + t.true(result.truncated); + t.not(result.cursor, undefined); + + // Insert key2 and key4 + await r2.put("key2", "value2"); + await r2.put("key4", "value4"); + + // Get second page, expecting to see key4 but not key2 + result = await r2.list({ prefix: ns, limit: 2, cursor: result.cursor }); + t.is(result.objects.length, 2); + t.is(result.objects[0].key, `${ns}key4`); + t.is(result.objects[1].key, `${ns}key5`); + t.false(result.truncated); + t.is(result.cursor, undefined); +}); +test("list: validates limit", async (t) => { + const { r2 } = t.context; + // R2 actually accepts 0 and -1 as valid limits, but this is probably a bug + // TODO(now): report to R2 team + await t.throwsAsync(r2.list({ limit: 0 }), { + instanceOf: Error, + message: "list: MaxKeys params must be positive integer <= 1000. (10022)", + }); + await t.throwsAsync(r2.list({ limit: 1_001 }), { + instanceOf: Error, + message: "list: MaxKeys params must be positive integer <= 1000. (10022)", + }); +}); +test("list: includes httpMetadata and customMetadata if specified", async (t) => { + const { r2, ns } = t.context; + await r2.put("key1", "value1", { + httpMetadata: { contentEncoding: "gzip" }, + customMetadata: { foo: "bar" }, + }); + await r2.put("key2", "value2", { + httpMetadata: { contentType: "dinosaur" }, + customMetadata: { bar: "fiz" }, + }); + await r2.put("key3", "value3", { + httpMetadata: { contentLanguage: "en" }, + customMetadata: { fiz: "bang" }, + }); + + // Check no metadata included by default + let result = await r2.list({ prefix: ns }); + t.deepEqual(result.objects.length, 3); + t.deepEqual(result.objects[0].httpMetadata, {}); + t.deepEqual(result.objects[0].customMetadata, {}); + t.deepEqual(result.objects[1].httpMetadata, {}); + t.deepEqual(result.objects[1].customMetadata, {}); + t.deepEqual(result.objects[2].httpMetadata, {}); + t.deepEqual(result.objects[2].customMetadata, {}); + + // Check httpMetadata included if specified + result = await r2.list({ prefix: ns, include: ["httpMetadata"] }); + t.deepEqual(result.objects.length, 3); + t.deepEqual(result.objects[0].httpMetadata, { contentEncoding: "gzip" }); + t.deepEqual(result.objects[0].customMetadata, {}); + t.deepEqual(result.objects[1].httpMetadata, { contentType: "dinosaur" }); + t.deepEqual(result.objects[1].customMetadata, {}); + t.deepEqual(result.objects[2].httpMetadata, { contentLanguage: "en" }); + t.deepEqual(result.objects[2].customMetadata, {}); + + // Check customMetadata included if specified + result = await r2.list({ prefix: ns, include: ["customMetadata"] }); + t.deepEqual(result.objects.length, 3); + t.deepEqual(result.objects[0].httpMetadata, {}); + t.deepEqual(result.objects[0].customMetadata, { foo: "bar" }); + t.deepEqual(result.objects[1].httpMetadata, {}); + t.deepEqual(result.objects[1].customMetadata, { bar: "fiz" }); + t.deepEqual(result.objects[2].httpMetadata, {}); + t.deepEqual(result.objects[2].customMetadata, { fiz: "bang" }); + + // Check both included if specified + result = await r2.list({ + prefix: ns, + include: ["httpMetadata", "customMetadata"], + }); + t.deepEqual(result.objects.length, 3); + t.deepEqual(result.objects[0].httpMetadata, { contentEncoding: "gzip" }); + t.deepEqual(result.objects[0].customMetadata, { foo: "bar" }); + t.deepEqual(result.objects[1].httpMetadata, { contentType: "dinosaur" }); + t.deepEqual(result.objects[1].customMetadata, { bar: "fiz" }); + t.deepEqual(result.objects[2].httpMetadata, { contentLanguage: "en" }); + t.deepEqual(result.objects[2].customMetadata, { fiz: "bang" }); + + // `workerd` will validate the `include` array: + // https://github.com/cloudflare/workerd/blob/44907df95f231a2411d4e9767400951e55c6eb4c/src/workerd/api/r2-bucket.c%2B%2B#L737 +}); +test("list: returns correct delimitedPrefixes for delimiter and prefix", async (t) => { + const { r2, ns } = t.context; + const values: Record = { + // In lexicographic key order, so `allKeys` is sorted + "dir0/file0": "value0", + "dir0/file1": "value1", + "dir0/sub0/file2": "value2", + "dir0/sub0/file3": "value3", + "dir0/sub1/file4": "value4", + "dir0/sub1/file5": "value5", + "dir1/file6": "value6", + "dir1/file7": "value7", + file8: "value8", + file9: "value9", + }; + const allKeys = Object.keys(values); + for (const [key, value] of Object.entries(values)) await r2.put(key, value); + + const keys = (result: R2Objects) => + result.objects.map(({ key }) => key.substring(ns.length)); + const delimitedPrefixes = (result: R2Objects) => + result.delimitedPrefixes.map((prefix) => prefix.substring(ns.length)); + const allKeysWithout = (...exclude: string[]) => + allKeys.filter((value) => !exclude.includes(value)); + + // Check no/empty delimiter + let result = await r2.list({ prefix: ns }); + t.false(result.truncated); + t.deepEqual(keys(result), allKeys); + t.deepEqual(delimitedPrefixes(result), []); + result = await r2.list({ prefix: ns, delimiter: "" }); + t.false(result.truncated); + t.deepEqual(keys(result), allKeys); + t.deepEqual(delimitedPrefixes(result), []); + + // Check with file delimiter + result = await r2.list({ prefix: ns, delimiter: "file8" }); + t.false(result.truncated); + t.deepEqual(keys(result), allKeysWithout("file8")); + t.deepEqual(delimitedPrefixes(result), ["file8"]); + // ...and prefix + result = await r2.list({ prefix: `${ns}dir1/`, delimiter: "file6" }); + t.false(result.truncated); + t.deepEqual(keys(result), ["dir1/file7"]); + t.deepEqual(delimitedPrefixes(result), ["dir1/file6"]); + + // Check with "/" delimiter + result = await r2.list({ prefix: ns, delimiter: "/" }); + t.false(result.truncated); + t.deepEqual(keys(result), ["file8", "file9"]); + t.deepEqual(delimitedPrefixes(result), ["dir0/", "dir1/"]); + // ...and prefix + result = await r2.list({ prefix: `${ns}dir0/`, delimiter: "/" }); + t.false(result.truncated); + t.deepEqual(keys(result), ["dir0/file0", "dir0/file1"]); + t.deepEqual(delimitedPrefixes(result), ["dir0/sub0/", "dir0/sub1/"]); + result = await r2.list({ prefix: `${ns}dir0`, delimiter: "/" }); + t.false(result.truncated); + t.deepEqual(keys(result), []); + t.deepEqual(delimitedPrefixes(result), ["dir0/"]); + + // Check with limit (limit includes returned objects and delimitedPrefixes) + const opt: R2ListOptions = { prefix: `${ns}dir0/`, delimiter: "/", limit: 2 }; + result = await r2.list(opt); + t.true(result.truncated); + t.deepEqual(keys(result), ["dir0/file0", "dir0/file1"]); + t.deepEqual(delimitedPrefixes(result), []); + result = await r2.list({ ...opt, cursor: result.cursor }); + t.false(result.truncated); + t.deepEqual(keys(result), []); + t.deepEqual(delimitedPrefixes(result), ["dir0/sub0/", "dir0/sub1/"]); +}); + +test.serial("operations persist stored data", async (t) => { + const { r2, ns } = t.context; + + // Create new temporary file-system persistence directory + const tmp = await useTmp(t); + const storage = new FileStorage(path.join(tmp, "bucket")); + + // Set option, then reset after test + await t.context.setOptions({ ...opts, r2Persist: tmp }); + t.teardown(() => t.context.setOptions(opts)); + + // Check put respects persist + await r2.put("key", "value"); + const stored = await storage.get(`${ns}key`); + t.is(utf8Decode(stored?.value), "value"); + + // Check head respects persist + const object = await r2.head("key"); + t.is(object?.size, 5); + + // Check get respects persist + const objectBody = await r2.get("key"); + t.is(await objectBody?.text(), "value"); + + // Check list respects persist + const { objects } = await r2.list(); + t.is(objects.length, 1); + t.is(objects[0].size, 5); + + // Check delete respects persist + await r2.delete("key"); + t.false(await storage.has(`${ns}key`)); +}); diff --git a/packages/tre/test/test-shared/asserts.ts b/packages/tre/test/test-shared/asserts.ts new file mode 100644 index 000000000..be288e4d5 --- /dev/null +++ b/packages/tre/test/test-shared/asserts.ts @@ -0,0 +1,18 @@ +import assert from "assert"; +import { ExecutionContext } from "ava"; + +export function isWithin( + t: ExecutionContext, + epsilon: number, + actual?: number, + expected?: number +): void { + t.not(actual, undefined); + t.not(expected, undefined); + assert(actual !== undefined && expected !== undefined); + const difference = Math.abs(actual - expected); + t.true( + difference <= epsilon, + `${actual} is not within ${epsilon} of ${expected}, difference is ${difference}` + ); +} diff --git a/packages/tre/test/test-shared/log.ts b/packages/tre/test/test-shared/log.ts index 11ae3c507..cae591327 100644 --- a/packages/tre/test/test-shared/log.ts +++ b/packages/tre/test/test-shared/log.ts @@ -1,10 +1,13 @@ import { Log, LogLevel } from "@miniflare/tre"; +import { ExecutionContext } from "ava"; + +const consoleLog = new Log(LogLevel.VERBOSE); export type LogEntry = [level: LogLevel, message: string]; export class TestLog extends Log { logs: LogEntry[] = []; - constructor() { + constructor(private readonly t?: ExecutionContext) { super(LogLevel.VERBOSE); } @@ -17,7 +20,12 @@ export class TestLog extends Log { } error(message: Error): void { - throw message; + if (this.t === undefined) { + throw message; + } else { + consoleLog.error(message); + this.t.fail(message.stack); + } } logsAtLevel(level: LogLevel): string[] { diff --git a/packages/tre/test/test-shared/miniflare.ts b/packages/tre/test/test-shared/miniflare.ts index 9c556f085..2e242d2c4 100644 --- a/packages/tre/test/test-shared/miniflare.ts +++ b/packages/tre/test/test-shared/miniflare.ts @@ -53,7 +53,7 @@ export function miniflareTest< export default { async fetch(request, env, ctx) { try { - return handler(globalThis, request, env, ctx); + return await handler(globalThis, request, env, ctx); } catch (e) { const error = reduceError(e); return Response.json(error, { @@ -71,7 +71,7 @@ export function miniflareTest< const test = anyTest as TestFn; test.before(async (t) => { - const log = new TestLog(); + const log = new TestLog(t); const clock: TestClock = { timestamp: 1_000_000 }; // 1000s const clockFunction = () => clock.timestamp; From 6016107eadb0fd8386e353fbe8d47b8b27f213e5 Mon Sep 17 00:00:00 2001 From: bcoll Date: Mon, 27 Feb 2023 19:08:27 +0000 Subject: [PATCH 14/15] Enable GitHub actions on `tre` branch --- .github/workflows/test.yml | 22 +++++++++++----------- package.json | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c17091d0..87aa60130 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,9 +7,9 @@ name: Test on: push: - branches: [master] + branches: [master, tre] pull_request: - branches: [master] + branches: [master, tre] jobs: test: @@ -18,14 +18,14 @@ jobs: strategy: matrix: - os: [ubuntu-latest, windows-latest] - node: [16.13.0, 18.x] + os: [ubuntu-latest] # `windows-latest` not yet supported + node: [16.13.0, latest] # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node }} - uses: actions/setup-node@v2 + uses: actions/setup-node@v3 with: node-version: ${{ matrix.node }} - run: npm ci @@ -35,11 +35,11 @@ jobs: name: "Lint & Type Check" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Use Node.js 18.x - uses: actions/setup-node@v2 + - uses: actions/checkout@v3 + - name: Use Node.js LTS + uses: actions/setup-node@v3 with: - node-version: 18.x + node-version: lts/* - run: npm ci - run: npm run lint - - run: npm run types:build + - run: npm run types:bundle diff --git a/package.json b/package.json index 5f41c3c84..f8d661cc4 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "packages/*" ], "scripts": { + "postinstall": "patch-package", "build": "node scripts/build.mjs", "capnp:workerd": "capnpc -o ts packages/tre/src/runtime/config/workerd.capnp", "clean": "rimraf './packages/*/dist' './dist'", From 66cf2ec7d199ea158cbd00d0886569cac5b8b4d1 Mon Sep 17 00:00:00 2001 From: bcoll Date: Mon, 27 Feb 2023 19:56:05 +0000 Subject: [PATCH 15/15] Add validation for custom metadata size --- packages/tre/src/plugins/r2/errors.ts | 11 ++++ packages/tre/src/plugins/r2/gateway.ts | 1 + packages/tre/src/plugins/r2/validator.ts | 26 +++++++- packages/tre/test/plugins/r2/index.spec.ts | 72 +++++++++++++++++++--- 4 files changed, 98 insertions(+), 12 deletions(-) diff --git a/packages/tre/src/plugins/r2/errors.ts b/packages/tre/src/plugins/r2/errors.ts index a43422795..42cd45ea9 100644 --- a/packages/tre/src/plugins/r2/errors.ts +++ b/packages/tre/src/plugins/r2/errors.ts @@ -14,6 +14,7 @@ enum CfCode { InternalError = 10001, NoSuchObjectKey = 10007, EntityTooLarge = 100100, + MetadataTooLarge = 10012, InvalidObjectName = 10020, InvalidMaxKeys = 10022, InvalidArgument = 10029, @@ -109,6 +110,16 @@ export class EntityTooLarge extends R2Error { } } +export class MetadataTooLarge extends R2Error { + constructor() { + super( + Status.BadRequest, + "Your metadata headers exceed the maximum allowed metadata size.", + CfCode.MetadataTooLarge + ); + } +} + export class BadDigest extends R2Error { constructor( algorithm: "MD5" | "SHA-1" | "SHA-256" | "SHA-384" | "SHA-512", diff --git a/packages/tre/src/plugins/r2/gateway.ts b/packages/tre/src/plugins/r2/gateway.ts index 88ac2a559..be0bee5c4 100644 --- a/packages/tre/src/plugins/r2/gateway.ts +++ b/packages/tre/src/plugins/r2/gateway.ts @@ -111,6 +111,7 @@ export class R2Gateway { const checksums = validate .key(key) .size(value) + .metadataSize(options.customMetadata) .condition(meta, options.onlyIf) .hash(value, options); diff --git a/packages/tre/src/plugins/r2/validator.ts b/packages/tre/src/plugins/r2/validator.ts index acebe5498..73cee5484 100644 --- a/packages/tre/src/plugins/r2/validator.ts +++ b/packages/tre/src/plugins/r2/validator.ts @@ -5,6 +5,7 @@ import { EntityTooLarge, InvalidMaxKeys, InvalidObjectName, + MetadataTooLarge, PreconditionFailed, } from "./errors"; import { R2Object, R2ObjectMetadata } from "./r2Object"; @@ -12,7 +13,9 @@ import { R2Conditional } from "./schemas"; export const MAX_LIST_KEYS = 1_000; const MAX_KEY_SIZE = 1024; -const MAX_VALUE_SIZE = 5 * 1_000 * 1_000 * 1_000 - 5 * 1_000 * 1_000; +// https://developers.cloudflare.com/r2/platform/limits/ +const MAX_VALUE_SIZE = 5_000_000_000 - 5_000_000; // 5GB - 5MB +const MAX_METADATA_SIZE = 2048; // 2048B function identity(ms: number) { return ms; @@ -67,6 +70,14 @@ export type R2Hashes = Record< Buffer | undefined >; +function serialisedLength(x: string) { + // Adapted from internal R2 gateway implementation + for (let i = 0; i < x.length; i++) { + if (x.charCodeAt(i) >= 256) return x.length * 2; + } + return x.length; +} + export class Validator { hash(value: Uint8Array, hashes: R2Hashes): R2StringChecksums { const checksums: R2StringChecksums = {}; @@ -95,13 +106,24 @@ export class Validator { } size(value: Uint8Array): Validator { - // TODO: should we be validating httpMetadata/customMetadata size too if (value.byteLength > MAX_VALUE_SIZE) { throw new EntityTooLarge(); } return this; } + metadataSize(customMetadata?: Record): Validator { + if (customMetadata === undefined) return this; + let metadataLength = 0; + for (const [key, value] of Object.entries(customMetadata)) { + metadataLength += serialisedLength(key) + serialisedLength(value); + } + if (metadataLength > MAX_METADATA_SIZE) { + throw new MetadataTooLarge(); + } + return this; + } + key(key: string): Validator { const keyLength = Buffer.byteLength(key); if (keyLength >= MAX_KEY_SIZE) { diff --git a/packages/tre/test/plugins/r2/index.spec.ts b/packages/tre/test/plugins/r2/index.spec.ts index 91a26638a..9c75fdfac 100644 --- a/packages/tre/test/plugins/r2/index.spec.ts +++ b/packages/tre/test/plugins/r2/index.spec.ts @@ -146,13 +146,31 @@ class TestR2Bucket implements R2Bucket { options?: R2PutOptions ): Promise { const url = `http://localhost/${encodeURIComponent(this.ns + key)}`; + + let valueBlob: Blob; + if (value === null) { + valueBlob = new Blob([]); + } else if (value instanceof ArrayBuffer) { + valueBlob = new Blob([new Uint8Array(value)]); + } else if (ArrayBuffer.isView(value)) { + valueBlob = new Blob([viewToArray(value)]); + } else if (value instanceof ReadableStream) { + // @ts-expect-error `ReadableStream` is an `AsyncIterable` + valueBlob = await blob(value); + } else { + valueBlob = new Blob([value]); + } + + // We can't store options in headers as some put() tests include extended + // characters in them, and `undici` validates all headers are byte strings, + // so use a form data body instead + const formData = new FormData(); + formData.set("options", maybeJsonStringify(options)); + formData.set("value", valueBlob); const res = await this.mf.dispatchFetch(url, { method: "PUT", - headers: { - Accept: "multipart/form-data", - "Test-Options": maybeJsonStringify(options), - }, - body: ArrayBuffer.isView(value) ? viewToArray(value) : value, + headers: { Accept: "multipart/form-data" }, + body: formData, }); return deconstructResponse(res); } @@ -376,11 +394,12 @@ const test = miniflareTest<{ BUCKET: R2Bucket }, Context>( const options = maybeJsonParse(optionsHeader); return constructResponse(await env.BUCKET.get(key, options)); } else if (method === "PUT") { - const optionsHeader = request.headers.get("Test-Options"); - const options = maybeJsonParse(optionsHeader); - return constructResponse( - await env.BUCKET.put(key, await request.arrayBuffer(), options) - ); + const formData = await request.formData(); + const optionsData = formData.get("options"); + if (typeof optionsData !== "string") throw new TypeError(); + const options = maybeJsonParse(optionsData); + const value = formData.get("value"); + return constructResponse(await env.BUCKET.put(key, value, options)); } else if (method === "DELETE") { const keys = await request.json(); await env.BUCKET.delete(keys); @@ -796,6 +815,39 @@ test("put: stores only if passes onlyIf", async (t) => { const object = await r2.put("no-key", "2", { onlyIf: { etagMatches: etag } }); t.is(object as R2Object | null, null); }); +test("put: validates metadata size", async (t) => { + const { r2 } = t.context; + + // TODO(soon): add check for max value size once we have streaming support + // (don't really want to allocate 5GB buffers in tests :sweat_smile:) + + const expectations: ThrowsExpectation = { + instanceOf: Error, + message: + "put: Your metadata headers exceed the maximum allowed metadata size. (10012)", + }; + + // Check with ASCII characters + await r2.put("key", "value", { customMetadata: { key: "x".repeat(2045) } }); + await t.throwsAsync( + r2.put("key", "value", { customMetadata: { key: "x".repeat(2046) } }), + expectations + ); + await r2.put("key", "value", { customMetadata: { hi: "x".repeat(2046) } }); + + // Check with extended characters: note "🙂" is 2 UTF-16 code units, so + // `"🙂".length === 2`, and it requires 4 bytes to store + await r2.put("key", "value", { customMetadata: { key: "🙂".repeat(511) } }); // 3 + 4*511 = 2047 + await r2.put("key", "value", { customMetadata: { key1: "🙂".repeat(511) } }); // 4 + 4*511 = 2048 + await t.throwsAsync( + r2.put("key", "value", { customMetadata: { key12: "🙂".repeat(511) } }), // 5 + 4*511 = 2049 + expectations + ); + await t.throwsAsync( + r2.put("key", "value", { customMetadata: { key: "🙂".repeat(512) } }), // 3 + 4*512 = 2051 + expectations + ); +}); test("delete: deletes existing keys", async (t) => { const { r2 } = t.context;