diff --git a/README.md b/README.md index 6c88b571601..7966c03d7dd 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ These GitHub repositories provide supplementary resources for Rush Stack: | [/apps/rush](./apps/rush/) | [![npm version](https://badge.fury.io/js/%40microsoft%2Frush.svg)](https://badge.fury.io/js/%40microsoft%2Frush) | [changelog](./apps/rush/CHANGELOG.md) | [@microsoft/rush](https://www.npmjs.com/package/@microsoft/rush) | | [/apps/rush-mcp-server](./apps/rush-mcp-server/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Fmcp-server.svg)](https://badge.fury.io/js/%40rushstack%2Fmcp-server) | [changelog](./apps/rush-mcp-server/CHANGELOG.md) | [@rushstack/mcp-server](https://www.npmjs.com/package/@rushstack/mcp-server) | | [/apps/trace-import](./apps/trace-import/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Ftrace-import.svg)](https://badge.fury.io/js/%40rushstack%2Ftrace-import) | [changelog](./apps/trace-import/CHANGELOG.md) | [@rushstack/trace-import](https://www.npmjs.com/package/@rushstack/trace-import) | +| [/apps/zipsync](./apps/zipsync/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Fzipsync.svg)](https://badge.fury.io/js/%40rushstack%2Fzipsync) | [changelog](./apps/zipsync/CHANGELOG.md) | [@rushstack/zipsync](https://www.npmjs.com/package/@rushstack/zipsync) | | [/eslint/eslint-bulk](./eslint/eslint-bulk/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Feslint-bulk.svg)](https://badge.fury.io/js/%40rushstack%2Feslint-bulk) | [changelog](./eslint/eslint-bulk/CHANGELOG.md) | [@rushstack/eslint-bulk](https://www.npmjs.com/package/@rushstack/eslint-bulk) | | [/eslint/eslint-config](./eslint/eslint-config/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Feslint-config.svg)](https://badge.fury.io/js/%40rushstack%2Feslint-config) | [changelog](./eslint/eslint-config/CHANGELOG.md) | [@rushstack/eslint-config](https://www.npmjs.com/package/@rushstack/eslint-config) | | [/eslint/eslint-patch](./eslint/eslint-patch/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Feslint-patch.svg)](https://badge.fury.io/js/%40rushstack%2Feslint-patch) | [changelog](./eslint/eslint-patch/CHANGELOG.md) | [@rushstack/eslint-patch](https://www.npmjs.com/package/@rushstack/eslint-patch) | diff --git a/apps/zipsync/.npmignore b/apps/zipsync/.npmignore new file mode 100644 index 00000000000..bc349f9a4be --- /dev/null +++ b/apps/zipsync/.npmignore @@ -0,0 +1,32 @@ +# THIS IS A STANDARD TEMPLATE FOR .npmignore FILES IN THIS REPO. + +# Ignore all files by default, to avoid accidentally publishing unintended files. +* + +# Use negative patterns to bring back the specific things we want to publish. +!/bin/** +!/lib/** +!/lib-*/** +!/dist/** + +!CHANGELOG.md +!CHANGELOG.json +!heft-plugin.json +!rush-plugin-manifest.json +!ThirdPartyNotice.txt + +# Ignore certain patterns that should not get published. +/dist/*.stats.* +/lib/**/test/ +/lib-*/**/test/ +*.test.js + +# NOTE: These don't need to be specified, because NPM includes them automatically. +# +# package.json +# README.md +# LICENSE + +# --------------------------------------------------------------------------- +# DO NOT MODIFY ABOVE THIS LINE! Add any project-specific overrides below. +# --------------------------------------------------------------------------- diff --git a/apps/zipsync/LICENSE b/apps/zipsync/LICENSE new file mode 100644 index 00000000000..e75a1fe895f --- /dev/null +++ b/apps/zipsync/LICENSE @@ -0,0 +1,24 @@ +@rushstack/zipsync + +Copyright (c) Microsoft Corporation. All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/apps/zipsync/README.md b/apps/zipsync/README.md new file mode 100644 index 00000000000..661629a6f94 --- /dev/null +++ b/apps/zipsync/README.md @@ -0,0 +1,48 @@ +# @rushstack/zipsync + +zipsync is a focused tool for packing and unpacking build cache entries using a constrained subset of the ZIP format for high performance. It optimizes the common scenario where most files already exist in the target location and are unchanged. + +## Goals & Rationale + +- **Optimize partial unpack**: Most builds reuse the majority of previously produced outputs. Skipping rewrites preserves filesystem and page cache state. +- **Only write when needed**: Fewer syscalls. +- **Integrated cleanup**: Removes the need for a separate `rm -rf` pass; extra files and empty directories are pruned automatically. +- **ZIP subset**: Compatibility with malware scanners. +- **Fast inspection**: The central directory can be enumerated without inflating the entire archive (unlike tar+gzip). + +## How It Works + +### Pack Flow + +``` +for each file F + write LocalFileHeader(F) + stream chunks: + read -> hash + crc + maybe compress -> write + finalize compressor + write DataDescriptor(F) +add metadata entry (same pattern) +write central directory records +``` + +### Unpack Flow + +``` +load archive -> parse central dir -> read metadata +scan filesystem & delete extraneous entries +for each entry (except metadata): + if unchanged (sha1 matches) => skip + else extract (decompress if needed) +``` + +## Why ZIP (vs tar + gzip) + +Pros for this scenario: + +- Central directory enables cheap listing without decompressing entire payload. +- Widely understood / tooling-friendly (system explorers, scanners, CI tooling). +- Per-file compression keeps selective unpack simple (no need to inflate all bytes). + +Trade-offs: + +- Tar+gzip can exploit cross-file redundancy for better compressed size in datasets with many similar files. diff --git a/apps/zipsync/bin/zipsync b/apps/zipsync/bin/zipsync new file mode 100755 index 00000000000..aee68e80224 --- /dev/null +++ b/apps/zipsync/bin/zipsync @@ -0,0 +1,2 @@ +#!/usr/bin/env node +require('../lib/start.js'); diff --git a/apps/zipsync/config/jest.config.json b/apps/zipsync/config/jest.config.json new file mode 100644 index 00000000000..f385c6fdc0f --- /dev/null +++ b/apps/zipsync/config/jest.config.json @@ -0,0 +1,4 @@ +{ + "extends": "local-node-rig/profiles/default/config/jest.config.json", + "setupFilesAfterEnv": ["/config/jestSymbolDispose.js"] +} diff --git a/apps/zipsync/config/jestSymbolDispose.js b/apps/zipsync/config/jestSymbolDispose.js new file mode 100644 index 00000000000..25328e10b8c --- /dev/null +++ b/apps/zipsync/config/jestSymbolDispose.js @@ -0,0 +1,8 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +const disposeSymbol = Symbol('Symbol.dispose'); +const asyncDisposeSymbol = Symbol('Symbol.asyncDispose'); + +Symbol.asyncDispose ??= asyncDisposeSymbol; +Symbol.dispose ??= disposeSymbol; diff --git a/apps/zipsync/config/rig.json b/apps/zipsync/config/rig.json new file mode 100644 index 00000000000..165ffb001f5 --- /dev/null +++ b/apps/zipsync/config/rig.json @@ -0,0 +1,7 @@ +{ + // The "rig.json" file directs tools to look for their config files in an external package. + // Documentation for this system: https://www.npmjs.com/package/@rushstack/rig-package + "$schema": "https://developer.microsoft.com/json-schemas/rig-package/rig.schema.json", + + "rigPackageName": "local-node-rig" +} diff --git a/apps/zipsync/eslint.config.js b/apps/zipsync/eslint.config.js new file mode 100644 index 00000000000..c15e6077310 --- /dev/null +++ b/apps/zipsync/eslint.config.js @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +const nodeTrustedToolProfile = require('local-node-rig/profiles/default/includes/eslint/flat/profile/node-trusted-tool'); +const friendlyLocalsMixin = require('local-node-rig/profiles/default/includes/eslint/flat/mixins/friendly-locals'); + +module.exports = [ + ...nodeTrustedToolProfile, + ...friendlyLocalsMixin, + { + files: ['**/*.ts', '**/*.tsx'], + languageOptions: { + parserOptions: { + tsconfigRootDir: __dirname + } + } + } +]; diff --git a/apps/zipsync/package.json b/apps/zipsync/package.json new file mode 100644 index 00000000000..86b2e9965c7 --- /dev/null +++ b/apps/zipsync/package.json @@ -0,0 +1,31 @@ +{ + "name": "@rushstack/zipsync", + "version": "0.0.0", + "description": "CLI tool for creating and extracting ZIP archives with intelligent filesystem synchronization", + "repository": { + "type": "git", + "url": "https://github.com/microsoft/rushstack.git", + "directory": "apps/zipsync" + }, + "bin": { + "zipsync": "./bin/zipsync" + }, + "license": "MIT", + "scripts": { + "start": "node lib/start", + "build": "heft build --clean", + "_phase:build": "heft run --only build -- --clean", + "_phase:test": "heft run --only test -- --clean" + }, + "dependencies": { + "@rushstack/terminal": "workspace:*", + "@rushstack/ts-command-line": "workspace:*", + "typescript": "~5.8.2", + "@rushstack/lookup-by-path": "workspace:*" + }, + "devDependencies": { + "@rushstack/heft": "workspace:*", + "eslint": "~9.25.1", + "local-node-rig": "workspace:*" + } +} diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts new file mode 100644 index 00000000000..922ea25fff5 --- /dev/null +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { CommandLineParser } from '@rushstack/ts-command-line/lib/providers/CommandLineParser'; +import type { + CommandLineFlagParameter, + IRequiredCommandLineStringParameter, + IRequiredCommandLineChoiceParameter, + IRequiredCommandLineStringListParameter +} from '@rushstack/ts-command-line/lib/index'; +import type { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; + +import type { IZipSyncMode, ZipSyncOptionCompression } from './zipSyncUtils'; +import { pack, unpack } from './index'; + +export class ZipSyncCommandLineParser extends CommandLineParser { + private readonly _debugParameter: CommandLineFlagParameter; + private readonly _verboseParameter: CommandLineFlagParameter; + private readonly _modeParameter: IRequiredCommandLineChoiceParameter; + private readonly _archivePathParameter: IRequiredCommandLineStringParameter; + private readonly _baseDirParameter: IRequiredCommandLineStringParameter; + private readonly _targetDirectoriesParameter: IRequiredCommandLineStringListParameter; + private readonly _compressionParameter: IRequiredCommandLineChoiceParameter; + private readonly _terminal: ITerminal; + private readonly _terminalProvider: ConsoleTerminalProvider; + + public constructor(terminalProvider: ConsoleTerminalProvider, terminal: ITerminal) { + super({ + toolFilename: 'zipsync', + toolDescription: '' + }); + + this._terminal = terminal; + this._terminalProvider = terminalProvider; + + this._debugParameter = this.defineFlagParameter({ + parameterLongName: '--debug', + parameterShortName: '-d', + description: 'Show the full call stack if an error occurs while executing the tool' + }); + + this._verboseParameter = this.defineFlagParameter({ + parameterLongName: '--verbose', + parameterShortName: '-v', + description: 'Show verbose output' + }); + + this._modeParameter = this.defineChoiceParameter({ + parameterLongName: '--mode', + parameterShortName: '-m', + description: + 'The mode of operation: "pack" to create a zip archive, or "unpack" to extract files from a zip archive', + alternatives: ['pack', 'unpack'], + required: true + }); + + this._archivePathParameter = this.defineStringParameter({ + parameterLongName: '--archive-path', + parameterShortName: '-a', + description: 'Zip file path', + argumentName: 'ARCHIVE_PATH', + required: true + }); + + this._targetDirectoriesParameter = this.defineStringListParameter({ + parameterLongName: '--target-directory', + parameterShortName: '-t', + description: 'Target directories to pack or unpack', + argumentName: 'TARGET_DIRECTORIES', + required: true + }); + + this._baseDirParameter = this.defineStringParameter({ + parameterLongName: '--base-dir', + parameterShortName: '-b', + description: 'Base directory for relative paths within the archive', + argumentName: 'BASE_DIR', + required: true + }); + + this._compressionParameter = this.defineChoiceParameter({ + parameterLongName: '--compression', + parameterShortName: '-z', + description: + 'Compression strategy when packing. "deflate" and "zlib" attempts compression for every file (keeps only if smaller); "auto" first skips likely-compressed types before attempting "deflate" compression; "store" disables compression.', + alternatives: ['store', 'deflate', 'zstd', 'auto'], + required: true + }); + } + + protected override async onExecuteAsync(): Promise { + if (this._debugParameter.value) { + // eslint-disable-next-line no-debugger + debugger; + this._terminalProvider.debugEnabled = true; + this._terminalProvider.verboseEnabled = true; + } + if (this._verboseParameter.value) { + this._terminalProvider.verboseEnabled = true; + } + try { + if (this._modeParameter.value === 'pack') { + pack({ + terminal: this._terminal, + archivePath: this._archivePathParameter.value, + targetDirectories: this._targetDirectoriesParameter.values, + baseDir: this._baseDirParameter.value, + compression: this._compressionParameter.value + }); + } else if (this._modeParameter.value === 'unpack') { + unpack({ + terminal: this._terminal, + archivePath: this._archivePathParameter.value, + targetDirectories: this._targetDirectoriesParameter.values, + baseDir: this._baseDirParameter.value + }); + } + } catch (error) { + this._terminal.writeErrorLine('\n' + error.stack); + } + } +} diff --git a/apps/zipsync/src/__snapshots__/start.test.ts.snap b/apps/zipsync/src/__snapshots__/start.test.ts.snap new file mode 100644 index 00000000000..6c68a44b411 --- /dev/null +++ b/apps/zipsync/src/__snapshots__/start.test.ts.snap @@ -0,0 +1,34 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`CLI Tool Tests should display help for "zipsync --help" 1`] = ` +" +zipsync 0.0.0 - https://rushstack.io + +usage: zipsync [-h] [-d] [-v] -m {pack,unpack} -a ARCHIVE_PATH -t + TARGET_DIRECTORIES -b BASE_DIR -z {store,deflate,zstd,auto} + + +Optional arguments: + -h, --help Show this help message and exit. + -d, --debug Show the full call stack if an error occurs while + executing the tool + -v, --verbose Show verbose output + -m {pack,unpack}, --mode {pack,unpack} + The mode of operation: \\"pack\\" to create a zip archive, + or \\"unpack\\" to extract files from a zip archive + -a ARCHIVE_PATH, --archive-path ARCHIVE_PATH + Zip file path + -t TARGET_DIRECTORIES, --target-directory TARGET_DIRECTORIES + Target directories to pack or unpack + -b BASE_DIR, --base-dir BASE_DIR + Base directory for relative paths within the archive + -z {store,deflate,zstd,auto}, --compression {store,deflate,zstd,auto} + Compression strategy when packing. \\"deflate\\" and + \\"zlib\\" attempts compression for every file (keeps + only if smaller); \\"auto\\" first skips + likely-compressed types before attempting \\"deflate\\" + compression; \\"store\\" disables compression. + +For detailed help about a specific command, use: zipsync -h +" +`; diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts new file mode 100644 index 00000000000..fdbdaa67d6b --- /dev/null +++ b/apps/zipsync/src/benchmark.test.ts @@ -0,0 +1,513 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. +/* eslint-disable no-console */ + +import { execSync } from 'child_process'; +import { tmpdir, cpus, platform, release, arch, totalmem } from 'os'; +import * as path from 'path'; +import * as fs from 'fs'; +import { createHash, randomUUID } from 'crypto'; + +import { NoOpTerminalProvider, Terminal } from '@rushstack/terminal'; + +import type { ZipSyncOptionCompression } from './zipSyncUtils'; +import { pack } from './pack'; +import { unpack } from './unpack'; + +const compressionOptions = ['store', 'deflate', 'zstd', 'auto'] satisfies ZipSyncOptionCompression[]; + +// create a tempdir and setup dummy files there for benchmarking +const NUM_FILES = 1000; // number of files per subdir +let tempDir: string; +const runId = randomUUID(); +async function setupDemoDataAsync(): Promise { + console.log('Setting up demo data for benchmark...'); + tempDir = path.join(tmpdir(), `zipsync-benchmark-${runId}`); + fs.mkdirSync(tempDir, { recursive: true }); + + const demoSubDir1 = path.join(tempDir, 'demo-data', 'subdir1'); + fs.mkdirSync(demoSubDir1, { recursive: true }); + const demoSubDir2 = path.join(tempDir, 'demo-data', 'subdir2'); + fs.mkdirSync(demoSubDir2, { recursive: true }); + + for (let i = 0; i < NUM_FILES; i++) { + const filePath1 = path.join(demoSubDir1, `file${i}.txt`); + fs.writeFileSync(filePath1, `This is file ${i} in subdir1\n`.repeat(1000), { encoding: 'utf-8' }); + const filePath2 = path.join(demoSubDir2, `file${i}.txt`); + fs.writeFileSync(filePath2, `This is file ${i} in subdir2\n`.repeat(1000), { encoding: 'utf-8' }); + } + + console.log(`Demo data setup complete in ${tempDir}`); +} + +async function cleanupDemoDataAsync(): Promise { + if (tempDir && fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + console.log(`Cleaned up temp directory: ${tempDir}`); + } +} + +beforeAll(async () => { + await setupDemoDataAsync(); +}); + +afterAll(async () => { + await cleanupDemoDataAsync(); +}); + +// Collect timings for table output after all tests +interface IMeasurement { + name: string; + kind: string; + phase: 'pack' | 'unpack'; + ms: number; + // Only for pack phase: archive size in bytes and compression ratio (archiveSize / uncompressedSourceSize) + sizeBytes?: number; +} +const measurements: IMeasurement[] = []; +// Allow specifying iterations via env BENCH_ITERATIONS. Defaults to 0 to avoid running the benchmark unless explicitly enabled. +function detectIterations(): number { + let iter = 0; + const envParsed: number = parseInt(process.env.BENCH_ITERATIONS || '', 10); + if (!isNaN(envParsed) && envParsed > 0) { + iter = envParsed; + } + return iter; +} +const ITERATIONS: number = detectIterations(); + +function measureFn(callback: () => void): number { + const start: number = performance.now(); + callback(); + return performance.now() - start; +} + +interface IBenchContext { + archive: string; + demoDir: string; // source demo data directory + unpackDir: string; +} + +interface IBenchCommands { + // Function that performs the packing. Receives archive path and demoDir. + pack: (ctx: IBenchContext) => void; + // Function that performs the unpack. Receives archive and unpackDir. + unpack: (ctx: IBenchContext) => void; + archive: string; + unpackDir: string; + populateUnpackDir?: 'full' | 'partial'; + cleanBeforeUnpack?: boolean; +} + +function bench(kind: string, commands: IBenchCommands): void { + const demoDataPath = path.join(tempDir, 'demo-data'); + const srcDir = demoDataPath; + // Compute total uncompressed source size once per bench invocation + // We intentionally no longer compute total source size for ratio; only archive size is tracked. + function verifyUnpack(unpackDir: string): void { + // Compare file listings and hashes + function buildMap(root: string): Map { + const map = new Map(); + function walk(current: string): void { + for (const entry of fs.readdirSync(current, { withFileTypes: true })) { + const full = path.join(current, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.isFile()) { + const rel = path.relative(root, full).replace(/\\/g, '/'); + const buf = fs.readFileSync(full); + const hash = createHash('sha256').update(buf).digest('hex'); + map.set(rel, { size: buf.length, hash }); + } + } + } + walk(root); + return map; + } + const srcMap = buildMap(srcDir); + const dstMap = buildMap(unpackDir); + if (srcMap.size !== dstMap.size) { + throw new Error( + `Verification failed (${kind}): file count mismatch src=${srcMap.size} dst=${dstMap.size}` + ); + } + for (const [rel, meta] of srcMap) { + const other = dstMap.get(rel); + if (!other) throw new Error(`Verification failed (${kind}): missing file ${rel}`); + if (other.size !== meta.size || other.hash !== meta.hash) { + throw new Error(`Verification failed (${kind}): content mismatch in ${rel}`); + } + } + } + for (let i = 0; i < ITERATIONS; i++) { + // Ensure previous artifacts removed + if (fs.existsSync(commands.archive)) fs.rmSync(commands.archive, { force: true }); + if (commands.populateUnpackDir === 'full') { + fs.cpSync(srcDir, commands.unpackDir, { recursive: true }); + } else if (commands.populateUnpackDir === 'partial') { + // Copy half the files + for (let j = 0; j < NUM_FILES / 2; j++) { + const file1 = path.join(srcDir, 'subdir1', `file${j}.txt`); + const file2 = path.join(srcDir, 'subdir2', `file${j}.txt`); + const dest1 = path.join(commands.unpackDir, 'subdir1', `file${j}.txt`); + const dest2 = path.join(commands.unpackDir, 'subdir2', `file${j}.txt`); + fs.mkdirSync(path.dirname(dest1), { recursive: true }); + fs.mkdirSync(path.dirname(dest2), { recursive: true }); + fs.copyFileSync(file1, dest1); + fs.copyFileSync(file2, dest2); + } + } + + let archiveSize: number | undefined; + const packMs: number = measureFn(() => { + commands.pack({ archive: commands.archive, demoDir: demoDataPath, unpackDir: commands.unpackDir }); + try { + const stat = fs.statSync(commands.archive); + archiveSize = stat.size; + } catch { + // ignore if archive not found + } + }); + measurements.push({ + name: `${kind}#${i + 1}`, + kind, + phase: 'pack', + ms: packMs, + sizeBytes: archiveSize + }); + + const unpackMs: number = measureFn(() => { + if (commands.cleanBeforeUnpack) { + fs.rmSync(commands.unpackDir, { recursive: true, force: true }); + fs.mkdirSync(commands.unpackDir, { recursive: true }); + } + commands.unpack({ archive: commands.archive, demoDir: demoDataPath, unpackDir: commands.unpackDir }); + }); + measurements.push({ name: `${kind}#${i + 1}`, kind, phase: 'unpack', ms: unpackMs }); + verifyUnpack(commands.unpackDir); + } +} + +function benchZipSyncScenario( + kind: string, + compression: ZipSyncOptionCompression, + existingFiles: 'all' | 'none' | 'partial' +): void { + if (!tempDir) throw new Error('Temp directory is not set up.'); + const terminal = new Terminal(new NoOpTerminalProvider()); + bench(kind, { + pack: ({ archive, demoDir }) => { + const { filesPacked } = pack({ + archivePath: archive, + targetDirectories: ['subdir1', 'subdir2'], + baseDir: demoDir, + compression, + terminal + }); + console.log(`Files packed: ${filesPacked}`); + }, + unpack: ({ archive, unpackDir }) => { + const { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } = unpack({ + archivePath: archive, + targetDirectories: ['subdir1', 'subdir2'], + baseDir: unpackDir, + terminal + }); + console.log( + `Files extracted: ${filesExtracted}, files skipped: ${filesSkipped}, files deleted: ${filesDeleted}, folders deleted: ${foldersDeleted}, other entries deleted: ${otherEntriesDeleted}` + ); + }, + archive: path.join(tempDir, `archive-zipsync-${compression}.zip`), + unpackDir: path.join(tempDir, `unpacked-zipsync-${compression}-${existingFiles}`), + populateUnpackDir: existingFiles === 'all' ? 'full' : existingFiles === 'partial' ? 'partial' : undefined, + cleanBeforeUnpack: false // cleaning is handled internally by zipsync + }); +} + +// the benchmarks are skipped by default because they require external tools (tar, zip) to be installed +describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { + it('tar', () => { + if (!isTarAvailable()) { + console.log('Skipping tar test because tar is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('tar', { + pack: ({ archive, demoDir }) => execSync(`tar -cf "${archive}" -C "${demoDir}" .`), + unpack: ({ archive, unpackDir }) => execSync(`tar -xf "${archive}" -C "${unpackDir}"`), + archive: path.join(tempDir, 'archive.tar'), + unpackDir: path.join(tempDir, 'unpacked-tar'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('tar-gz', () => { + if (!isTarAvailable()) { + console.log('Skipping tar test because tar is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('tar-gz', { + pack: ({ archive, demoDir }) => execSync(`tar -czf "${archive}" -C "${demoDir}" .`), + unpack: ({ archive, unpackDir }) => execSync(`tar -xzf "${archive}" -C "${unpackDir}"`), + archive: path.join(tempDir, 'archive.tar.gz'), + unpackDir: path.join(tempDir, 'unpacked-tar-gz'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('zip-store', () => { + if (!isZipAvailable()) { + console.log('Skipping zip test because zip is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('zip-store', { + pack: ({ archive, demoDir }) => execSync(`zip -r -Z store "${archive}" .`, { cwd: demoDir }), + unpack: ({ archive, unpackDir }) => execSync(`unzip "${archive}" -d "${unpackDir}"`), + archive: path.join(tempDir, 'archive.zip'), + unpackDir: path.join(tempDir, 'unpacked-zip'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('zip-deflate', () => { + if (!isZipAvailable()) { + console.log('Skipping zip test because zip is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('zip-deflate', { + pack: ({ archive, demoDir }) => execSync(`zip -r -Z deflate -9 "${archive}" .`, { cwd: demoDir }), + unpack: ({ archive, unpackDir }) => execSync(`unzip "${archive}" -d "${unpackDir}"`), + archive: path.join(tempDir, 'archive-deflate.zip'), + unpackDir: path.join(tempDir, 'unpacked-zip-deflate'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + const existingFileOptions: ['all', 'none', 'partial'] = ['all', 'none', 'partial']; + compressionOptions.forEach((compression) => { + if (compression === 'zstd') { + const [major, minor] = process.versions.node.split('.').map((x) => parseInt(x, 10)); + if (major < 22 || (major === 22 && minor < 15)) { + console.warn(`Skipping zstd test on Node ${process.versions.node}`); + return; + } + } + existingFileOptions.forEach((existingFiles) => { + it(`zipsync-${compression}-${existingFiles}-existing`, () => { + benchZipSyncScenario(`zipsync-${compression}-${existingFiles}-existing`, compression, existingFiles); + }); + }); + }); +}); + +afterAll(() => { + if (!measurements.length) return; + interface IStats { + kind: string; + phase: string; + n: number; + min: number; + max: number; + mean: number; + p95: number; + std: number; + sizeMean?: number; // only for pack + } + const groups: Map = new Map(); + for (const m of measurements) { + const key: string = `${m.kind}|${m.phase}`; + let bucket = groups.get(key); + if (!bucket) { + bucket = { times: [], sizes: [] }; + groups.set(key, bucket); + } + bucket.times.push(m.ms); + if (typeof m.sizeBytes === 'number') bucket.sizes.push(m.sizeBytes); + } + const stats: IStats[] = []; + function percentile(sorted: number[], p: number): number { + if (!sorted.length) return 0; + const idx: number = Math.min(sorted.length - 1, Math.ceil((p / 100) * sorted.length) - 1); + return sorted[idx]; + } + for (const [key, bucket] of groups) { + const [kind, phase] = key.split('|'); + bucket.times.sort((a, b) => a - b); + const arr = bucket.times; + const n = arr.length; + const min = arr[0]; + const max = arr[n - 1]; + const sum = arr.reduce((a, b) => a + b, 0); + const mean = sum / n; + const variance = arr.reduce((a, b) => a + (b - mean) * (b - mean), 0) / n; + const std = Math.sqrt(variance); + const p95 = percentile(arr, 95); + const sizeMean = bucket.sizes.length + ? bucket.sizes.reduce((a, b) => a + b, 0) / bucket.sizes.length + : undefined; + stats.push({ kind, phase, n, min, max, mean, std, p95, sizeMean }); + } + // Organize into groups + const groupsDef: Array<{ title: string; baseline: string; members: string[] }> = [ + { + title: 'Compressed (baseline: tar-gz)', + baseline: 'tar-gz', + members: [ + 'tar-gz', + 'zip-deflate', + 'zipsync-zstd-all-existing', + 'zipsync-zstd-none-existing', + 'zipsync-zstd-partial-existing', + 'zipsync-deflate-all-existing', + 'zipsync-deflate-none-existing', + 'zipsync-deflate-partial-existing', + 'zipsync-auto-all-existing', + 'zipsync-auto-none-existing', + 'zipsync-auto-partial-existing' + ] + }, + { + title: 'Uncompressed (baseline: tar)', + baseline: 'tar', + members: [ + 'tar', + 'zip-store', + 'zipsync-store-all-existing', + 'zipsync-store-none-existing', + 'zipsync-store-partial-existing' + ] + } + ]; + // Build per-group markdown tables (no Group column) for each phase + function buildGroupTable( + group: { title: string; baseline: string; members: string[] }, + phase: 'pack' | 'unpack' + ): string[] { + // Human readable bytes formatter + function formatBytes(bytes: number): string { + const units = ['B', 'KB', 'MB', 'GB']; + let value = bytes; + let i = 0; + while (value >= 1024 && i < units.length - 1) { + value /= 1024; + i++; + } + const formatted = value >= 100 ? value.toFixed(0) : value >= 10 ? value.toFixed(1) : value.toFixed(2); + return `${formatted} ${units[i]}`; + } + const headers = + phase === 'pack' + ? ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed (x)', 'size'] + : ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed (x)']; + const lines: string[] = []; + lines.push('| ' + headers.join(' | ') + ' |'); + const align: string[] = headers.map((header, idx) => (idx === 0 ? '---' : '---:')); + lines.push('| ' + align.join(' | ') + ' |'); + const baselineStats: IStats | undefined = stats.find( + (s) => s.kind === group.baseline && s.phase === phase + ); + for (const member of group.members) { + const s: IStats | undefined = stats.find((st) => st.kind === member && st.phase === phase); + if (!s) continue; + const isBaseline: boolean = member === group.baseline; + const speedFactor: number = baselineStats ? baselineStats.mean / s.mean : 1; + const cols: string[] = [ + (isBaseline ? '**' : '') + s.kind + (isBaseline ? '**' : ''), + s.min.toFixed(2), + s.mean.toFixed(2), + s.p95.toFixed(2), + s.max.toFixed(2), + s.std.toFixed(2), + speedFactor.toFixed(2) + 'x' + ]; + if (phase === 'pack') { + cols.push(s.sizeMean !== undefined ? formatBytes(Math.round(s.sizeMean)) : ''); + } + lines.push('| ' + cols.join(' | ') + ' |'); + } + return lines; + } + const outputLines: string[] = []; + outputLines.push('# Benchmark Results'); + outputLines.push(''); + outputLines.push( + ` +This document contains performance measurements for packing and unpacking a synthetic dataset using tar, zip, and zipsync. + +The dataset consists of two directory trees (subdir1, subdir2) populated with ${NUM_FILES} text files each. + +zipsync scenarios +* "all-existing": unpack directory is fully populated with existing files +* "none-existing": unpack directory is empty +* "partial-existing": unpack directory contains half of the files + +zip and tar scenarios clean the unpack directory before unpacking. This time is included in the measurements because +zipsync internally handles cleaning as part of its operation. +` + ); + outputLines.push(''); + // System info + try { + const cpuList = cpus(); + const cpuModelRaw: string | undefined = cpuList[0]?.model; + const cpuModel: string = cpuModelRaw ? cpuModelRaw.replace(/\|/g, ' ').trim() : 'unknown'; + const logicalCores: number = cpuList.length || 0; + const memGB: string = (totalmem() / 1024 ** 3).toFixed(1); + outputLines.push('**System**'); + outputLines.push(''); + outputLines.push('| OS | Arch | Node | CPU | Logical Cores | Memory |'); + outputLines.push('| --- | --- | --- | --- | ---: | --- |'); + outputLines.push( + `| ${platform()} ${release()} | ${arch()} | ${process.version} | ${cpuModel} | ${logicalCores} | ${memGB} GB |` + ); + outputLines.push(''); + } catch { + // ignore system info errors + } + outputLines.push(`Iterations: ${ITERATIONS}`); + outputLines.push(''); + for (const g of groupsDef) { + outputLines.push(`## ${g.title}`); + outputLines.push(''); + outputLines.push('### Unpack Phase'); + outputLines.push(''); + outputLines.push(...buildGroupTable(g, 'unpack')); + outputLines.push(''); + outputLines.push('### Pack Phase'); + outputLines.push(''); + outputLines.push(...buildGroupTable(g, 'pack')); + outputLines.push(''); + } + const resultText = outputLines.join('\n'); + console.log(resultText); + try { + const resultFile = path.join(__dirname, '..', 'temp', `benchmark-results.md`); + fs.writeFileSync(resultFile, resultText, { encoding: 'utf-8' }); + console.log(`Benchmark results written to: ${resultFile}`); + } catch (e) { + console.warn('Failed to write benchmark results file:', (e as Error).message); + } +}); +function isZipAvailable(): boolean { + try { + const checkZip = process.platform === 'win32' ? 'where zip' : 'command -v zip'; + const checkUnzip = process.platform === 'win32' ? 'where unzip' : 'command -v unzip'; + execSync(checkZip, { stdio: 'ignore' }); + execSync(checkUnzip, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} +function isTarAvailable(): boolean { + try { + const checkTar = process.platform === 'win32' ? 'where tar' : 'command -v tar'; + execSync(checkTar, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} diff --git a/apps/zipsync/src/compress.ts b/apps/zipsync/src/compress.ts new file mode 100644 index 00000000000..1c89ef8c58e --- /dev/null +++ b/apps/zipsync/src/compress.ts @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { Transform } from 'node:stream'; +import zlib from 'node:zlib'; + +type OutputChunkHandler = (chunk: Uint8Array, lengthBytes: number) => void; + +const kError: unique symbol = (() => { + // Create an instance of Deflate so that we can get our hands on the internal error symbol + // It isn't exported. + const reference: zlib.Deflate = zlib.createDeflateRaw(); + const kErrorResult: symbol | undefined = Object.getOwnPropertySymbols(reference).find((x) => + x.toString().includes('kError') + ); + if (kErrorResult === undefined) { + throw new Error('Unable to find the internal error symbol in node:zlib'); + } + reference.close(); + return kErrorResult; + // Casting `symbol` to the exact symbol of this definition +})() as typeof kError; + +/** + * Internal members of all Zlib compressors. + * Needed to + */ +interface IZlibInternals { + /** + * The native binding to Zlib. + */ + _handle: IHandle | undefined; + /** + * The flush flag passed to each call other than the last one for this implementation. + * Varies by compressor. + */ + _defaultFlushFlag: number; + /** + * The flush flag passed to the final call for this implementation. + * Varies by compressor. + */ + _finishFlushFlag: number; + /** + * The number of bytes read from the input and written to the output. + */ + _writeState: [number, number]; + /** + * The internal error state + */ + [kError]: Error | undefined; +} + +type Compressor = Transform & IZlibInternals; + +interface IHandle { + /** + * Closes the handle and releases resources. + * Ensure that this is always invoked. + */ + close(): void; + /** + * Compresses up to `inLen` bytes from `chunk` starting at `inOff`. + * Writes up to `outLen` bytes to `output` starting at `outOff`. + * @param flushFlag - The flush flag to the compressor implementation. Defines the behavior when reaching the end of the input. + * @param chunk - The buffer containing the data to be compressed + * @param inOff - The offset in bytes to start reading from `chunk` + * @param inLen - The maximum number of bytes to read from `chunk` + * @param output - The buffer to write the compressed data to + * @param outOff - The offset in bytes to start writing to `output` at + * @param outLen - The maximum number of bytes to write to `output`. + */ + writeSync( + flushFlag: number, + chunk: Uint8Array, + inOff: number, + inLen: number, + output: Uint8Array, + outOff: number, + outLen: number + ): void; +} + +export type IIncrementalZlib = Disposable & { + update: (inputBuffer: Uint8Array) => void; +}; + +// zstd is available in Node 22+ +type IExtendedZlib = typeof zlib & { + createZstdCompress: (options?: zlib.ZlibOptions) => Transform; + createZstdDecompress: (options?: zlib.ZlibOptions) => Transform; +}; + +export type IncrementalZlibMode = 'deflate' | 'inflate' | 'zstd-compress' | 'zstd-decompress'; + +export function createIncrementalZlib( + outputBuffer: Uint8Array, + handleOutputChunk: OutputChunkHandler, + mode: IncrementalZlibMode +): IIncrementalZlib { + // The zlib constructors all allocate a buffer of size chunkSize using Buffer.allocUnsafe + // We want to ensure that that invocation doesn't allocate a buffer. + // Unfortunately the minimum value of `chunkSize` to the constructor is non-zero + + let compressor: Compressor | undefined; + + const savedAllocUnsafe: typeof Buffer.allocUnsafe = Buffer.allocUnsafe; + + try { + //@ts-expect-error + Buffer.allocUnsafe = () => outputBuffer; + switch (mode) { + case 'inflate': + compressor = zlib.createInflateRaw({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + break; + case 'deflate': + compressor = zlib.createDeflateRaw({ + chunkSize: outputBuffer.byteLength, + level: zlib.constants.Z_BEST_COMPRESSION + }) as unknown as Transform & IZlibInternals; + break; + case 'zstd-compress': + // available in Node 22.15+ + compressor = (zlib as IExtendedZlib).createZstdCompress({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + break; + case 'zstd-decompress': + // available in Node 22.15+ + compressor = (zlib as IExtendedZlib).createZstdDecompress({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + break; + default: + // Unsupported mode (types currently restrict to 'deflate' | 'inflate') + break; + } + } finally { + Buffer.allocUnsafe = savedAllocUnsafe; + } + + if (!compressor) { + throw new Error('Failed to create zlib instance'); + } + + const handle: IHandle = compressor._handle!; + + return { + [Symbol.dispose]: () => { + if (compressor._handle) { + compressor._handle.close(); + compressor._handle = undefined; + } + }, + update: function processInputChunk(inputBuffer: Uint8Array): void { + let error: Error | undefined; + + // Directive to the compressor on reaching the end of the current input buffer + // Default value is to expect more data + let flushFlag: number = compressor._defaultFlushFlag; + + let bytesInInputBuffer: number = inputBuffer.byteLength; + + if (bytesInInputBuffer <= 0) { + // Ensure the value is non-negative + // We will call the compressor one last time with 0 bytes of input + bytesInInputBuffer = 0; + // Tell the compressor to flush anything in its internal buffer and write any needed trailer. + flushFlag = compressor._finishFlushFlag; + } + + let availInBefore: number = bytesInInputBuffer; + let inOff: number = 0; + let availOutAfter: number = 0; + let availInAfter: number | undefined; + + const state: [number, number] = compressor._writeState; + + do { + handle.writeSync( + flushFlag, + inputBuffer, // in + inOff, // in_off + availInBefore, // in_len + outputBuffer, // out + 0, // out_off + outputBuffer.byteLength // out_len + ); + + if (error) { + throw error; + } else if (compressor[kError]) { + throw compressor[kError]; + } + + availOutAfter = state[0]; + availInAfter = state[1]; + + const inDelta: number = availInBefore - availInAfter; + + const have: number = outputBuffer.byteLength - availOutAfter; + if (have > 0) { + handleOutputChunk(outputBuffer, have); + } + + // These values get reset if we have new data, + // so we can update them even if we're done + inOff += inDelta; + availInBefore = availInAfter; + } while (availOutAfter === 0); + } + }; +} diff --git a/apps/zipsync/src/crc32.test.ts b/apps/zipsync/src/crc32.test.ts new file mode 100644 index 00000000000..fb73eb5f9d5 --- /dev/null +++ b/apps/zipsync/src/crc32.test.ts @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as zlib from 'zlib'; + +import { fallbackCrc32 } from './crc32'; + +describe('crc32', () => { + it('fallbackCrc32 should match zlib.crc32', () => { + if (!zlib.crc32) { + // eslint-disable-next-line no-console + console.log('Skipping test because zlib.crc32 is not available in this Node.js version'); + return; + } + + const testData = [ + Buffer.from('hello world', 'utf-8'), + Buffer.alloc(0), // empty buffer + Buffer.from('hello crc32', 'utf-8'), + Buffer.from([-1, 2, 3, 4, 5, 255, 0, 128]) + ]; + + let fallbackCrc: number = 0; + let zlibCrc: number = 0; + + for (const data of testData) { + fallbackCrc = fallbackCrc32(data, fallbackCrc); + zlibCrc = zlib.crc32(data, zlibCrc); + } + + fallbackCrc = fallbackCrc >>> 0; + zlibCrc = zlibCrc >>> 0; + + expect(fallbackCrc).toBe(zlibCrc); + }); +}); diff --git a/apps/zipsync/src/crc32.ts b/apps/zipsync/src/crc32.ts new file mode 100644 index 00000000000..18017c58852 --- /dev/null +++ b/apps/zipsync/src/crc32.ts @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as zlib from 'node:zlib'; + +let crcTable: Uint32Array | undefined; + +function initCrcTable(): Uint32Array { + if (crcTable) { + return crcTable; + } + + crcTable = new Uint32Array(256); + for (let i: number = 0; i < 256; i++) { + let crcEntry: number = i; + for (let j: number = 0; j < 8; j++) { + // eslint-disable-next-line no-bitwise + crcEntry = crcEntry & 1 ? 0xedb88320 ^ (crcEntry >>> 1) : crcEntry >>> 1; + } + crcTable[i] = crcEntry; + } + return crcTable; +} + +export function fallbackCrc32(data: Buffer, value: number = 0): number { + const table: Uint32Array = initCrcTable(); + value = (value ^ 0xffffffff) >>> 0; + + for (let i: number = 0; i < data.length; i++) { + // eslint-disable-next-line no-bitwise + value = table[(value ^ data[i]) & 0xff] ^ (value >>> 8); + } + + value = (value ^ 0xffffffff) >>> 0; + return value; +} + +export const crc32Builder: (data: Buffer, value?: number) => number = + zlib.crc32 ?? fallbackCrc32; diff --git a/apps/zipsync/src/fs.ts b/apps/zipsync/src/fs.ts new file mode 100644 index 00000000000..ced3056a6c6 --- /dev/null +++ b/apps/zipsync/src/fs.ts @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { default as fs, type OpenMode } from 'node:fs'; + +interface IInternalDisposableFileHandle extends Disposable { + fd: number; +} + +export interface IDisposableFileHandle extends IInternalDisposableFileHandle { + readonly fd: number; +} + +export const DISPOSE_SYMBOL: typeof Symbol.dispose = Symbol.dispose ?? Symbol.for('Symbol.dispose'); + +export function getDisposableFileHandle(path: string, openMode: OpenMode): IDisposableFileHandle { + const result: IInternalDisposableFileHandle = { + fd: fs.openSync(path, openMode), + [DISPOSE_SYMBOL]: () => { + if (!isNaN(result.fd)) { + fs.closeSync(result.fd); + result.fd = NaN; + } + } + }; + + return result; +} + +export function rmdirSync(dirPath: string): boolean { + try { + fs.rmdirSync(dirPath); + return true; + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') { + // Not found, ignore + } else { + throw e; + } + } + return false; +} + +export function unlinkSync(filePath: string): boolean { + try { + fs.unlinkSync(filePath); + return true; + } catch (e) { + if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { + // Not found, ignore + } else { + throw e; + } + } + return false; +} diff --git a/apps/zipsync/src/hash.ts b/apps/zipsync/src/hash.ts new file mode 100644 index 00000000000..6ba6e59a587 --- /dev/null +++ b/apps/zipsync/src/hash.ts @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { readSync, fstatSync, type Stats } from 'node:fs'; +import { createHash, type Hash } from 'node:crypto'; + +const buffer: Buffer = Buffer.allocUnsafeSlow(1 << 24); + +export function computeFileHash(fd: number): string | false { + try { + const hash: Hash = createHash('sha1'); + let totalBytesRead: number = 0; + let bytesRead: number; + do { + bytesRead = readSync(fd, buffer, 0, buffer.length, -1); + if (bytesRead <= 0) { + break; + } + totalBytesRead += bytesRead; + hash.update(buffer.subarray(0, bytesRead)); + } while (bytesRead > 0); + if (totalBytesRead === 0) { + // Sometimes directories get treated as empty files + const stat: Stats = fstatSync(fd); + if (!stat.isFile()) { + return false; + } + } + + return hash.digest('hex'); + } catch (err) { + // There is a bug in node-core-library where it doesn't handle if the operation was on a file descriptor + if (err.code === 'EISDIR' || err.code === 'ENOENT' || err.code === 'ENOTDIR') { + return false; + } + throw err; + } +} + +export function calculateSHA1(data: Buffer): string { + return createHash('sha1').update(data).digest('hex'); +} diff --git a/apps/zipsync/src/index.test.ts b/apps/zipsync/src/index.test.ts new file mode 100644 index 00000000000..5fe98676aca --- /dev/null +++ b/apps/zipsync/src/index.test.ts @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as path from 'node:path'; +import * as fs from 'node:fs'; + +import { NoOpTerminalProvider } from '@rushstack/terminal/lib/NoOpTerminalProvider'; +import { Terminal } from '@rushstack/terminal/lib/Terminal'; + +import { pack } from './pack'; +import { unpack } from './unpack'; +import { getDemoDataDirectoryDisposable } from './testUtils'; +import type { ZipSyncOptionCompression } from './zipSyncUtils'; + +describe('zipSync tests', () => { + it(`basic pack test`, () => { + const compressionOptions = ['store', 'deflate', 'zstd', 'auto'] satisfies ZipSyncOptionCompression[]; + compressionOptions.forEach((compression) => { + if (compression === 'zstd') { + const [major, minor] = process.versions.node.split('.').map((x) => parseInt(x, 10)); + if (major < 22 || (major === 22 && minor < 15)) { + // eslint-disable-next-line no-console + console.warn(`Skipping zstd test on Node ${process.versions.node}`); + return; + } + } + + using demoDataDisposable = getDemoDataDirectoryDisposable(5); + const { targetDirectories, baseDir, metadata } = demoDataDisposable; + + const terminal = new Terminal(new NoOpTerminalProvider()); + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const packResult = pack({ + terminal: terminal, + compression, + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchObject({ filesPacked: 21, metadata }); + + using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); + const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; + + const unpackResult = unpack({ + terminal: terminal, + archivePath, + baseDir: unpackBaseDir, + targetDirectories + }); + + expect(unpackResult).toMatchObject({ + filesDeleted: 0, + filesExtracted: 12, + filesSkipped: 8, + foldersDeleted: 0, + metadata + }); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } + } + }); + }); +}); diff --git a/apps/zipsync/src/index.ts b/apps/zipsync/src/index.ts new file mode 100644 index 00000000000..11913cee85c --- /dev/null +++ b/apps/zipsync/src/index.ts @@ -0,0 +1,5 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +export { pack, type IZipSyncPackResult, type IZipSyncPackOptions } from './pack'; +export { unpack, type IZipSyncUnpackResult, type IZipSyncUnpackOptions } from './unpack'; diff --git a/apps/zipsync/src/pack.ts b/apps/zipsync/src/pack.ts new file mode 100644 index 00000000000..177fc6fec6c --- /dev/null +++ b/apps/zipsync/src/pack.ts @@ -0,0 +1,425 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as crypto from 'node:crypto'; +import * as zlib from 'node:zlib'; + +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; + +import { crc32Builder } from './crc32'; +import { DISPOSE_SYMBOL, getDisposableFileHandle, type IDisposableFileHandle } from './fs'; +import { type IIncrementalZlib, type IncrementalZlibMode, createIncrementalZlib } from './compress'; +import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; +import { + writeLocalFileHeader, + writeDataDescriptor, + writeCentralDirectoryHeader, + writeEndOfCentralDirectory, + ZSTD_COMPRESSION, + DEFLATE_COMPRESSION, + STORE_COMPRESSION, + type ZipMetaCompressionMethod, + type IFileEntry, + dosDateTime +} from './zipUtils'; +import { calculateSHA1 } from './hash'; +import { + type ZipSyncOptionCompression, + type IMetadata, + type IDirQueueItem, + METADATA_VERSION, + METADATA_FILENAME, + defaultBufferSize +} from './zipSyncUtils'; + +/** + * File extensions for which additional DEFLATE/ZSTD compression is unlikely to help. + * Used by the 'auto' compression heuristic to avoid wasting CPU on data that is already + * compressed (images, media, existing archives, fonts, etc.). + */ +const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = + /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; + +/** + * Basic heuristic: skip re-compressing file types that are already compressed. + */ +function isLikelyAlreadyCompressed(filename: string): boolean { + return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); +} + +/** + * Map zip compression method code -> incremental zlib mode label + */ +const zlibPackModes: Record = { + [ZSTD_COMPRESSION]: 'zstd-compress', + [DEFLATE_COMPRESSION]: 'deflate', + [STORE_COMPRESSION]: undefined +} as const; + +/** + * Public facing CLI option -> actual zip method used for a file we decide to compress. + */ +const zipSyncCompressionOptions: Record = { + store: STORE_COMPRESSION, + deflate: DEFLATE_COMPRESSION, + zstd: ZSTD_COMPRESSION, + auto: DEFLATE_COMPRESSION +} as const; + +/** + * @public + * Options for zipsync + */ +export interface IZipSyncPackOptions { + /** + * \@rushstack/terminal compatible terminal for logging + */ + terminal: ITerminal; + /** + * Zip file path + */ + archivePath: string; + /** + * Target directories to pack (relative to baseDir) + */ + targetDirectories: ReadonlyArray; + /** + * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) + */ + baseDir: string; + /** + * Compression mode. If set to 'deflate', file data will be compressed using raw DEFLATE (method 8) when this + * produces a smaller result; otherwise it will fall back to 'store' per-file. + */ + compression: ZipSyncOptionCompression; + /** + * Optional buffer that can be provided to avoid internal allocations. + */ + inputBuffer?: Buffer; + /** + * Optional buffer that can be provided to avoid internal allocations. + */ + outputBuffer?: Buffer; +} + +export interface IZipSyncPackResult { + filesPacked: number; + metadata: IMetadata; +} + +/** + * Create a zipsync archive by enumerating target directories, then streaming each file into the + * output zip using the local file header + (optional compressed data) + data descriptor pattern. + * + * Performance characteristics: + * - Single pass per file (no read-then-compress-then-write buffering). CRC32 + SHA-1 are computed + * while streaming so the metadata JSON can later be used for selective unpack. + * - Data descriptor usage (bit 3) allows writing headers before we know sizes or CRC32. + * - A single timestamp (captured once) is applied to all entries for determinism. + * - Metadata entry is added as a normal zip entry at the end (before central directory) so legacy + * tools can still list/extract it, while zipsync can quickly parse file hashes. + */ +export function pack({ + archivePath, + targetDirectories: rawTargetDirectories, + baseDir: rawBaseDir, + compression, + terminal, + inputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize), + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize) +}: IZipSyncPackOptions): IZipSyncPackResult { + const baseDir: string = path.resolve(rawBaseDir); + const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); + terminal.writeLine(`Packing to ${archivePath} from ${rawTargetDirectories.join(', ')}`); + + markStart('pack.total'); + terminal.writeDebugLine('Starting pack'); + // Pass 1: enumerate files with a queue to avoid deep recursion + markStart('pack.enumerate'); + + const filePaths: string[] = []; + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, depth: 0 })); + + while (queue.length) { + const { dir: currentDir, depth } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + if ( + e && + ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') + ) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}. Ignoring.`); + continue; + } else { + throw e; + } + } + + for (const item of items) { + const fullPath: string = path.join(currentDir, item.name); + if (item.isFile()) { + const relativePath: string = path.relative(baseDir, fullPath).replace(/\\/g, '/'); + terminal.writeVerboseLine(`${padding}${item.name}`); + filePaths.push(relativePath); + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: fullPath, depth: depth + 1 }); + } else { + throw new Error(`Unexpected item (not file or directory): ${fullPath}. Aborting.`); + } + } + } + + terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); + markEnd('pack.enumerate'); + + // Pass 2: stream each file: read chunks -> hash + (maybe) compress -> write local header + data descriptor. + markStart('pack.prepareEntries'); + + terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); + using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); + let currentOffset: number = 0; + /** + * Write a raw chunk to the archive file descriptor, updating current offset. + */ + function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { + let offset: number = 0; + while (lengthBytes > 0 && offset < chunk.byteLength) { + // In practice this call always writes all data at once, but the spec says it is not an error + // for it to not do so. Possibly that situation comes up when writing to something that is not + // an ordinary file. + const written: number = fs.writeSync(zipFile.fd, chunk, offset, lengthBytes); + lengthBytes -= written; + offset += written; + } + currentOffset += offset; + } + /** Convenience wrapper for writing multiple buffers sequentially. */ + function writeChunksToZip(chunks: Uint8Array[]): void { + for (const chunk of chunks) { + writeChunkToZip(chunk); + } + } + + const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); + /** + * Stream a single file into the archive. + * Steps: + * 1. Decide compression (based on user choice + heuristic). + * 2. Emit local file header (sizes/CRC zeroed because we use a data descriptor). + * 3. Read file in 32 MiB chunks: update SHA-1 + CRC32; optionally feed compressor or write raw. + * 4. Flush compressor (if any) and write trailing data descriptor containing sizes + CRC. + * 5. Return populated entry metadata for later central directory + JSON metadata. + */ + function writeFileEntry(relativePath: string): IFileEntry { + const fullPath: string = path.join(baseDir, relativePath); + + /** + * Read file in large fixed-size buffer; invoke callback for each filled chunk. + */ + const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( + onChunk: (bytesInInputBuffer: number) => void + ): void => { + using inputDisposable: IDisposableFileHandle = getDisposableFileHandle(fullPath, 'r'); + + let bytesInInputBuffer: number = 0; + // The entire input buffer will be drained in each loop iteration + // So run until EOF + while (!isNaN(inputDisposable.fd)) { + bytesInInputBuffer = fs.readSync(inputDisposable.fd, inputBuffer, 0, inputBuffer.byteLength, -1); + + if (bytesInInputBuffer <= 0) { + // EOF, close the input fd + inputDisposable[DISPOSE_SYMBOL](); + } + + onChunk(bytesInInputBuffer); + } + }; + + let shouldCompress: boolean = false; + if (compression === 'deflate' || compression === 'zstd') { + shouldCompress = true; + } else if (compression === 'auto') { + // Heuristic: skip compression for small files or likely-already-compressed files + if (!isLikelyAlreadyCompressed(relativePath)) { + shouldCompress = true; + } else { + terminal.writeVerboseLine( + `Skip compression heuristically (already-compressed) for ${relativePath} (size unknown at this point)` + ); + } + } + + const compressionMethod: ZipMetaCompressionMethod = shouldCompress + ? zipSyncCompressionOptions[compression] + : zipSyncCompressionOptions.store; + + const entry: IFileEntry = { + filename: relativePath, + size: 0, + compressedSize: 0, + crc32: 0, + sha1Hash: '', + localHeaderOffset: currentOffset, + compressionMethod, + dosDateTime: dosDateTimeNow + }; + + writeChunksToZip(writeLocalFileHeader(entry)); + + const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); + let crc32: number = 0; + let uncompressedSize: number = 0; + let compressedSize: number = 0; + + /** + * Compressor instance (deflate or zstd) created only if needed. + */ + using incrementalZlib: IIncrementalZlib | undefined = shouldCompress + ? createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + writeChunkToZip(chunk, lengthBytes); + compressedSize += lengthBytes; + }, + zlibPackModes[compressionMethod]! + ) + : undefined; + + // Read input file in chunks, update hashes, and either compress or write raw. + readInputInChunks((bytesInInputBuffer: number) => { + const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); + sha1HashBuilder.update(slice); + crc32 = crc32Builder(slice, crc32); + if (incrementalZlib) { + incrementalZlib.update(slice); + } else { + writeChunkToZip(slice, bytesInInputBuffer); + } + uncompressedSize += bytesInInputBuffer; + }); + + // finalize hashes, compression + incrementalZlib?.update(Buffer.alloc(0)); + crc32 = crc32 >>> 0; + const sha1Hash: string = sha1HashBuilder.digest('hex'); + + if (!shouldCompress) { + compressedSize = uncompressedSize; + } + + entry.size = uncompressedSize; + entry.compressedSize = compressedSize; + entry.crc32 = crc32; + entry.sha1Hash = sha1Hash; + + // Trailing data descriptor now that final CRC/sizes are known. + writeChunkToZip(writeDataDescriptor(entry)); + + terminal.writeVerboseLine( + `${relativePath} (sha1=${entry.sha1Hash}, crc32=${entry.crc32.toString(16)}, size=${ + entry.size + }, compressed=${entry.compressedSize}, method=${entry.compressionMethod}, compressed ${( + 100 - + (entry.compressedSize / entry.size) * 100 + ).toFixed(1)}%)` + ); + return entry; + } + + const entries: IFileEntry[] = []; + // Emit all file entries in enumeration order. + for (const relativePath of filePaths) { + entries.push(writeFileEntry(relativePath)); + } + + markEnd('pack.prepareEntries'); + terminal.writeLine(`Prepared ${entries.length} file entries`); + + markStart('pack.metadata.build'); + const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; + // Build metadata map used for selective unpack (size + SHA‑1 per file). + for (const entry of entries) { + metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; + } + + const metadataContent: string = JSON.stringify(metadata); + const metadataBuffer: Buffer = Buffer.from(metadataContent, 'utf8'); + terminal.writeDebugLine( + `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` + ); + + let metadataCompressionMethod: ZipMetaCompressionMethod = zipSyncCompressionOptions.store; + let metadataData: Buffer = metadataBuffer; + let metadataCompressedSize: number = metadataBuffer.length; + // Compress metadata (deflate) iff user allowed compression and it helps (>64 bytes & smaller result). + if (compression !== 'store' && metadataBuffer.length > 64) { + const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); + if (compressed.length < metadataBuffer.length) { + metadataCompressionMethod = zipSyncCompressionOptions.deflate; + metadataData = compressed; + metadataCompressedSize = compressed.length; + terminal.writeDebugLine( + `Metadata compressed (orig=${metadataBuffer.length}, compressed=${compressed.length})` + ); + } else { + terminal.writeDebugLine('Metadata compression skipped (not smaller)'); + } + } + + const metadataEntry: IFileEntry = { + filename: METADATA_FILENAME, + size: metadataBuffer.length, + compressedSize: metadataCompressedSize, + crc32: crc32Builder(metadataBuffer), + sha1Hash: calculateSHA1(metadataBuffer), + localHeaderOffset: currentOffset, + compressionMethod: metadataCompressionMethod, + dosDateTime: dosDateTimeNow + }; + + writeChunksToZip(writeLocalFileHeader(metadataEntry)); + writeChunkToZip(metadataData, metadataCompressedSize); + writeChunkToZip(writeDataDescriptor(metadataEntry)); + + entries.push(metadataEntry); + terminal.writeVerboseLine(`Total entries including metadata: ${entries.length}`); + + markEnd('pack.metadata.build'); + + markStart('pack.write.entries'); + const outputDir: string = path.dirname(archivePath); + fs.mkdirSync(outputDir, { recursive: true }); + + markEnd('pack.write.entries'); + + markStart('pack.write.centralDirectory'); + const centralDirOffset: number = currentOffset; + // Emit central directory records. + for (const entry of entries) { + writeChunksToZip(writeCentralDirectoryHeader(entry)); + } + const centralDirSize: number = currentOffset - centralDirOffset; + markEnd('pack.write.centralDirectory'); + + // Write end of central directory + markStart('pack.write.eocd'); + writeChunkToZip(writeEndOfCentralDirectory(centralDirOffset, centralDirSize, entries.length)); + terminal.writeDebugLine('EOCD record written'); + markEnd('pack.write.eocd'); + + markEnd('pack.total'); + const total: number = getDuration('pack.total'); + emitSummary('pack', terminal); + terminal.writeLine(`Successfully packed ${entries.length} files in ${formatDuration(total)}`); + return { filesPacked: entries.length, metadata }; +} diff --git a/apps/zipsync/src/packWorker.ts b/apps/zipsync/src/packWorker.ts new file mode 100644 index 00000000000..39e82fe8177 --- /dev/null +++ b/apps/zipsync/src/packWorker.ts @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { parentPort as rawParentPort, type MessagePort } from 'node:worker_threads'; + +import { Terminal } from '@rushstack/terminal/lib/Terminal'; +import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; + +import { type IZipSyncPackOptions, type IZipSyncPackResult, pack } from './pack'; +import { defaultBufferSize } from './zipSyncUtils'; + +export { type IZipSyncPackOptions, type IZipSyncPackResult } from './pack'; + +export interface IHashWorkerData { + basePath: string; +} + +export interface IZipSyncPackCommandMessage { + type: 'zipsync-pack'; + id: number; + options: Omit; +} + +export interface IZipSyncPackWorkerResult { + zipSyncReturn: IZipSyncPackResult; + zipSyncLogs: string; +} + +interface IZipSyncSuccessMessage { + id: number; + type: 'zipsync-pack'; + result: IZipSyncPackWorkerResult; +} + +export interface IZipSyncPackErrorMessage { + type: 'error'; + id: number; + args: { + message: string; + stack: string; + zipSyncLogs: string; + }; +} + +export type IHostToWorkerMessage = IZipSyncPackCommandMessage; +export type IWorkerToHostMessage = IZipSyncSuccessMessage | IZipSyncPackErrorMessage; + +if (!rawParentPort) { + throw new Error('This module must be run in a worker thread.'); +} +const parentPort: MessagePort = rawParentPort; + +let inputBuffer: Buffer | undefined = undefined; +let outputBuffer: Buffer | undefined = undefined; + +function handleMessage(message: IHostToWorkerMessage | false): void { + if (message === false) { + parentPort.removeAllListeners(); + parentPort.close(); + return; + } + + const terminalProvider: StringBufferTerminalProvider = new StringBufferTerminalProvider(); + const terminal: Terminal = new Terminal(terminalProvider); + + try { + switch (message.type) { + case 'zipsync-pack': { + const { options } = message; + if (!inputBuffer) { + inputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize); + } + if (!outputBuffer) { + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize); + } + + const successMessage: IZipSyncSuccessMessage = { + type: message.type, + id: message.id, + result: { + zipSyncReturn: pack({ ...options, terminal, inputBuffer, outputBuffer }), + zipSyncLogs: terminalProvider.getOutput() + } + }; + return parentPort.postMessage(successMessage); + } + } + } catch (err) { + const errorMessage: IZipSyncPackErrorMessage = { + type: 'error', + id: message.id, + args: { + message: (err as Error).message, + stack: (err as Error).stack || '', + zipSyncLogs: terminalProvider.getOutput() + } + }; + parentPort.postMessage(errorMessage); + } +} + +parentPort.on('message', handleMessage); diff --git a/apps/zipsync/src/packWorkerAsync.ts b/apps/zipsync/src/packWorkerAsync.ts new file mode 100644 index 00000000000..71e7b1db062 --- /dev/null +++ b/apps/zipsync/src/packWorkerAsync.ts @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { Worker } from 'node:worker_threads'; + +import type { + IWorkerToHostMessage, + IHostToWorkerMessage, + IZipSyncPackWorkerResult, + IZipSyncPackOptions +} from './packWorker'; + +export type { IZipSyncPackWorkerResult } from './packWorker'; + +export async function packWorkerAsync( + options: Omit +): Promise { + const { Worker } = await import('node:worker_threads'); + + const worker: Worker = new Worker(require.resolve('./packWorker')); + + return new Promise((resolve, reject) => { + worker.on('message', (message: IWorkerToHostMessage) => { + switch (message.type) { + case 'zipsync-pack': { + resolve(message.result); + break; + } + case 'error': { + const error: Error = new Error(message.args.message); + error.stack = message.args.stack; + reject(error); + break; + } + default: { + const exhaustiveCheck: never = message; + throw new Error(`Unexpected message type: ${JSON.stringify(exhaustiveCheck)}`); + } + } + }); + + worker.on('error', (err) => { + reject(err); + }); + + worker.on('exit', (code) => { + if (code !== 0) { + reject(new Error(`Worker stopped with exit code ${code}`)); + } + }); + + const commandMessage: IHostToWorkerMessage = { + type: 'zipsync-pack', + id: 0, + options + }; + worker.postMessage(commandMessage); + }).finally(() => { + worker.postMessage(false); + }); +} diff --git a/apps/zipsync/src/perf.ts b/apps/zipsync/src/perf.ts new file mode 100644 index 00000000000..1e4677a8eee --- /dev/null +++ b/apps/zipsync/src/perf.ts @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { PerformanceEntry } from 'node:perf_hooks'; +import { performance } from 'node:perf_hooks'; + +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; + +export function markStart(name: string): void { + performance.mark(`zipsync:${name}:start`); +} +export function markEnd(name: string): void { + const base: string = `zipsync:${name}`; + performance.mark(`${base}:end`); + performance.measure(base, `${base}:start`, `${base}:end`); +} +export function getDuration(name: string): number { + const measures: PerformanceEntry[] = performance.getEntriesByName( + `zipsync:${name}` + ) as unknown as PerformanceEntry[]; + if (measures.length === 0) return 0; + return measures[measures.length - 1].duration; +} +export function formatDuration(ms: number): string { + return ms >= 1000 ? (ms / 1000).toFixed(2) + 's' : ms.toFixed(2) + 'ms'; +} +export function emitSummary(operation: 'pack' | 'unpack', term: ITerminal): void { + const totalName: string = `${operation}.total`; + // Ensure total is measured + markEnd(totalName); + const totalDuration: number = getDuration(totalName); + const prefix: string = `zipsync:${operation}.`; + const measures: PerformanceEntry[] = performance.getEntriesByType( + 'measure' + ) as unknown as PerformanceEntry[]; + const rows: Array<{ name: string; dur: number }> = []; + for (const m of measures) { + if (!m.name.startsWith(prefix)) continue; + if (m.name === `zipsync:${totalName}`) continue; + // Extract segment name (remove prefix) + const segment: string = m.name.substring(prefix.length); + rows.push({ name: segment, dur: m.duration }); + } + rows.sort((a, b) => b.dur - a.dur); + const lines: string[] = rows.map((r) => { + const pct: number = totalDuration ? (r.dur / totalDuration) * 100 : 0; + return ` ${r.name}: ${formatDuration(r.dur)} (${pct.toFixed(1)}%)`; + }); + lines.push(` TOTAL ${operation}.total: ${formatDuration(totalDuration)}`); + term.writeVerboseLine(`Performance summary (${operation}):\n` + lines.join('\n')); + // Cleanup marks/measures to avoid unbounded growth + performance.clearMarks(); + performance.clearMeasures(); +} diff --git a/apps/zipsync/src/start.test.ts b/apps/zipsync/src/start.test.ts new file mode 100644 index 00000000000..ce4cb7fba4d --- /dev/null +++ b/apps/zipsync/src/start.test.ts @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { execSync } from 'child_process'; + +describe('CLI Tool Tests', () => { + it('should display help for "zipsync --help"', () => { + const startOutput = execSync('node lib/start.js --help').toString(); + expect(startOutput).toMatchSnapshot(); + }); +}); diff --git a/apps/zipsync/src/start.ts b/apps/zipsync/src/start.ts new file mode 100644 index 00000000000..f1bf39e55c0 --- /dev/null +++ b/apps/zipsync/src/start.ts @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; +import { Terminal } from '@rushstack/terminal/lib/Terminal'; + +import { version } from '../package.json'; +import { ZipSyncCommandLineParser } from './ZipSyncCommandLineParser'; + +const toolVersion: string = version; + +const consoleTerminalProvider: ConsoleTerminalProvider = new ConsoleTerminalProvider(); +const terminal: Terminal = new Terminal(consoleTerminalProvider); + +terminal.writeLine(); +terminal.writeLine(`zipsync ${toolVersion} - https://rushstack.io`); +terminal.writeLine(); + +const commandLine: ZipSyncCommandLineParser = new ZipSyncCommandLineParser(consoleTerminalProvider, terminal); +commandLine.executeAsync().catch((error) => { + terminal.writeError(error); +}); diff --git a/apps/zipsync/src/testUtils.ts b/apps/zipsync/src/testUtils.ts new file mode 100644 index 00000000000..ed1555c3cd8 --- /dev/null +++ b/apps/zipsync/src/testUtils.ts @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { tmpdir } from 'node:os'; +import * as path from 'node:path'; +import * as fs from 'node:fs'; +import * as crypto from 'node:crypto'; +import type { IMetadata } from './zipSyncUtils'; + +export function getTempDir(): string { + const randomId: string = crypto.randomUUID(); + const tempDir: string = path.join(tmpdir(), `zipsync-test-${randomId}`); + fs.mkdirSync(tempDir); + return tempDir; +} + +export function getDemoDataDirectoryDisposable(numFiles: number): { + targetDirectories: string[]; + baseDir: string; + metadata: IMetadata; + [Symbol.dispose](): void; +} { + const baseDir: string = getTempDir(); + + const metadata: IMetadata = { files: {}, version: '1.0' }; + + const targetDirectories: string[] = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( + (folderName) => { + const dataDir: string = path.join(baseDir, folderName); + fs.mkdirSync(dataDir, { recursive: true }); + const subdir: string = path.join(dataDir, 'subdir'); + fs.mkdirSync(subdir); + for (let i: number = 0; i < numFiles; ++i) { + const filePath: string = path.join(subdir, `file-${i}.txt`); + const content: string = `This is file ${i} in ${folderName}/subdir\n`; + const sha1Hash: string = crypto.createHash('sha1').update(content).digest('hex'); + fs.writeFileSync(filePath, content, { encoding: 'utf-8' }); + const relativeFilePath: string = path.relative(baseDir, filePath).replace(/\\/g, '/'); + metadata.files[relativeFilePath] = { size: content.length, sha1Hash }; + } + return folderName; + } + ); + + return { + targetDirectories, + baseDir, + metadata, + [Symbol.dispose]() { + fs.rmSync(baseDir, { recursive: true, force: true }); + } + }; +} diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts new file mode 100644 index 00000000000..f4592c34d16 --- /dev/null +++ b/apps/zipsync/src/unpack.ts @@ -0,0 +1,368 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as zlib from 'node:zlib'; + +import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; +import type { ITerminal } from '@rushstack/terminal'; + +import { getDisposableFileHandle, rmdirSync, unlinkSync, type IDisposableFileHandle } from './fs'; +import { type IIncrementalZlib, type IncrementalZlibMode, createIncrementalZlib } from './compress'; +import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; +import { + findEndOfCentralDirectory, + parseCentralDirectoryHeader, + getFileFromZip, + ZSTD_COMPRESSION, + DEFLATE_COMPRESSION, + STORE_COMPRESSION, + type IEndOfCentralDirectory, + type ICentralDirectoryHeaderParseResult, + type ZipMetaCompressionMethod +} from './zipUtils'; +import { computeFileHash } from './hash'; +import { + defaultBufferSize, + METADATA_FILENAME, + METADATA_VERSION, + type IDirQueueItem, + type IMetadata +} from './zipSyncUtils'; + +const zlibUnpackModes: Record = { + [ZSTD_COMPRESSION]: 'zstd-decompress', + [DEFLATE_COMPRESSION]: 'inflate', + [STORE_COMPRESSION]: undefined +} as const; + +/** + * @public + * Options for zipsync + */ +export interface IZipSyncUnpackOptions { + /** + * \@rushstack/terminal compatible terminal for logging + */ + terminal: ITerminal; + /** + * Zip file path + */ + archivePath: string; + /** + * Target directories to unpack, relative to baseDir + */ + targetDirectories: ReadonlyArray; + /** + * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) + */ + baseDir: string; + /** + * Optional buffer that can be provided to avoid internal allocations. + */ + outputBuffer?: Buffer; +} + +export interface IZipSyncUnpackResult { + metadata: IMetadata; + filesExtracted: number; + filesSkipped: number; + filesDeleted: number; + foldersDeleted: number; + otherEntriesDeleted: number; +} + +/** + * Unpack a zipsync archive into the provided target directories. + */ +export function unpack({ + archivePath, + targetDirectories: rawTargetDirectories, + baseDir: rawBaseDir, + terminal, + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize) +}: IZipSyncUnpackOptions): IZipSyncUnpackResult { + const baseDir: string = path.resolve(rawBaseDir); + const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); + terminal.writeLine(`Unpacking to ${rawTargetDirectories.join(', ')} from ${archivePath}`); + + markStart('unpack.total'); + terminal.writeDebugLine('Starting unpackZip'); + + // Read entire archive into memory (build cache entries are expected to be relatively small/medium). + markStart('unpack.read.archive'); + const zipBuffer: Buffer = fs.readFileSync(archivePath); + terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); + markEnd('unpack.read.archive'); + + // Locate & parse central directory so we have random-access metadata for all entries. + markStart('unpack.parse.centralDirectory'); + const zipTree: LookupByPath = new LookupByPath(); + const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); + + const centralDirBuffer: Buffer = zipBuffer.subarray( + endOfCentralDir.centralDirOffset, + endOfCentralDir.centralDirOffset + endOfCentralDir.centralDirSize + ); + terminal.writeDebugLine( + `Central directory slice size=${centralDirBuffer.length} (expected=${endOfCentralDir.centralDirSize})` + ); + + let metadataEntry: ICentralDirectoryHeaderParseResult | undefined; + const entries: Array = []; + let offset: number = 0; + + for (let i: number = 0; i < endOfCentralDir.totalCentralDirRecords; i++) { + const result: ICentralDirectoryHeaderParseResult = parseCentralDirectoryHeader(centralDirBuffer, offset); + zipTree.setItem(result.filename, true); + + if (result.filename === METADATA_FILENAME) { + if (metadataEntry) { + throw new Error('Multiple metadata entries found in archive'); + } + metadataEntry = result; + } + + entries.push(result); + offset = result.nextOffset; + terminal.writeDebugLine( + `Parsed central entry ${result.filename} (method=${result.header.compressionMethod}, compSize=${result.header.compressedSize})` + ); + } + markEnd('unpack.parse.centralDirectory'); + + if (!metadataEntry) { + throw new Error(`Metadata entry not found in archive`); + } + + markStart('unpack.read.metadata'); + terminal.writeDebugLine('Metadata entry found, reading'); + const metadataZipBuffer: Buffer = getFileFromZip(zipBuffer, metadataEntry); + + let metadataBuffer: Buffer; + if (metadataEntry.header.compressionMethod === STORE_COMPRESSION) { + metadataBuffer = metadataZipBuffer; + } else if (metadataEntry.header.compressionMethod === DEFLATE_COMPRESSION) { + metadataBuffer = zlib.inflateRawSync(metadataZipBuffer); + if (metadataBuffer.length !== metadataEntry.header.uncompressedSize) { + throw new Error( + `Metadata size mismatch (expected ${metadataEntry.header.uncompressedSize}, got ${metadataBuffer.length})` + ); + } + } else { + throw new Error(`Unsupported compression method for metadata: ${metadataEntry.header.compressionMethod}`); + } + + const metadata: IMetadata = JSON.parse(metadataBuffer.toString('utf8')) as IMetadata; + + if (metadata.version !== METADATA_VERSION) { + throw new Error(`Unsupported metadata version: ${metadata.version}`); + } + + terminal.writeDebugLine( + `Metadata (version=${metadata.version}) parsed (fileCount=${Object.keys(metadata.files).length}, rawSize=${metadataBuffer.length})` + ); + markEnd('unpack.read.metadata'); + + terminal.writeLine(`Found ${entries.length} files in archive`); + + // Ensure root target directories exist (they may be empty initially for cache misses). + for (const targetDirectory of targetDirectories) { + fs.mkdirSync(targetDirectory, { recursive: true }); + terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); + } + + let extractedCount: number = 0; + let skippedCount: number = 0; + let deletedFilesCount: number = 0; + let deletedOtherCount: number = 0; + let deletedFoldersCount: number = 0; + let scanCount: number = 0; + + const dirsToCleanup: string[] = []; + + // Phase: scan filesystem to delete entries not present in archive and record empty dirs for later removal. + markStart('unpack.scan.existing'); + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ + dir, + depth: 0, + node: zipTree.getNodeAtPrefix(path.relative(baseDir, dir)) + })); + + while (queue.length) { + const { dir: currentDir, depth, node } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}`); + continue; + } + + for (const item of items) { + scanCount++; + // check if exists in zipTree, if not delete + const relativePath: string = path + .relative(baseDir, path.join(currentDir, item.name)) + .replace(/\\/g, '/'); + + const childNode: IReadonlyPathTrieNode | undefined = node?.children?.get(item.name); + + if (item.isFile()) { + terminal.writeVerboseLine(`${padding}${item.name}`); + if (!childNode?.value) { + terminal.writeDebugLine(`Deleting file: ${relativePath}`); + if (unlinkSync(relativePath)) { + deletedFilesCount++; + } + } + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: relativePath, depth: depth + 1, node: childNode }); + if (!childNode || childNode.value) { + dirsToCleanup.push(relativePath); + } + } else { + terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); + if (unlinkSync(relativePath)) { + deletedOtherCount++; + } + } + } + } + + // Try to delete now-empty directories (created in previous builds but not in this archive). + for (const dir of dirsToCleanup) { + // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. + if (rmdirSync(dir)) { + terminal.writeDebugLine(`Deleted empty directory: ${dir}`); + deletedFoldersCount++; + } + } + + terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); + markEnd('unpack.scan.existing'); + + markStart('unpack.extract.loop'); + + /** + * Stream-decompress (or copy) an individual file from the archive into place. + * We allocate a single large output buffer reused for all inflation operations to limit GC. + */ + function extractFileFromZip(targetPath: string, entry: ICentralDirectoryHeaderParseResult): void { + terminal.writeDebugLine(`Extracting file: ${entry.filename}`); + const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); + let fileData: Buffer; + using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); + if (entry.header.compressionMethod === STORE_COMPRESSION) { + fileData = fileZipBuffer; + let writeOffset: number = 0; + while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { + const written: number = fs.writeSync( + fileHandle.fd, + fileData, + writeOffset, + fileData.length - writeOffset + ); + writeOffset += written; + } + } else if ( + entry.header.compressionMethod === DEFLATE_COMPRESSION || + entry.header.compressionMethod === ZSTD_COMPRESSION + ) { + using incrementalZlib: IIncrementalZlib = createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + let writeOffset: number = 0; + while (lengthBytes > 0 && writeOffset < chunk.byteLength) { + const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); + lengthBytes -= written; + writeOffset += written; + } + }, + zlibUnpackModes[entry.header.compressionMethod]! + ); + incrementalZlib.update(fileZipBuffer); + incrementalZlib.update(Buffer.alloc(0)); + } else { + throw new Error( + `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + ); + } + } + + /** + * Decide whether a file needs extraction by comparing existing file SHA‑1 vs metadata. + * If file is missing or hash differs we extract; otherwise we skip to preserve existing inode/data. + */ + function shouldExtract(targetPath: string, entry: ICentralDirectoryHeaderParseResult): boolean { + if (metadata) { + const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; + + if (metadataFile) { + try { + using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); + const existingHash: string | false = computeFileHash(existingFile.fd); + if (existingHash === metadataFile.sha1Hash) { + return false; + } + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT') { + terminal.writeDebugLine(`File does not exist, will extract: ${entry.filename}`); + } else { + throw e; + } + } + } + } + return true; + } + + const dirsCreated: Set = new Set(); + + // Iterate all entries excluding metadata; create parent dirs lazily; selective extraction. + for (const entry of entries) { + if (entry.filename === METADATA_FILENAME) { + continue; + } + + const targetPath: string = path.join(baseDir, entry.filename); + const targetDir: string = path.dirname(targetPath); + if (!dirsCreated.has(targetDir)) { + fs.mkdirSync(targetDir, { recursive: true }); + dirsCreated.add(targetDir); + } + + if (shouldExtract(targetPath, entry)) { + extractFileFromZip(targetPath, entry); + extractedCount++; + } else { + skippedCount++; + terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); + } + } + markEnd('unpack.extract.loop'); + + markEnd('unpack.total'); + const unpackTotal: number = getDuration('unpack.total'); + terminal.writeLine( + `Extraction complete: ${extractedCount} extracted, ${skippedCount} skipped, ${deletedFilesCount} deleted, ${deletedFoldersCount} folders deleted, ${deletedOtherCount} other entries deleted in ${formatDuration( + unpackTotal + )}` + ); + emitSummary('unpack', terminal); + terminal.writeDebugLine('unpackZip finished'); + return { + metadata, + filesExtracted: extractedCount, + filesSkipped: skippedCount, + filesDeleted: deletedFilesCount, + foldersDeleted: deletedFoldersCount, + otherEntriesDeleted: deletedOtherCount + }; +} diff --git a/apps/zipsync/src/unpackWorker.ts b/apps/zipsync/src/unpackWorker.ts new file mode 100644 index 00000000000..b8ddedb5fa5 --- /dev/null +++ b/apps/zipsync/src/unpackWorker.ts @@ -0,0 +1,98 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { parentPort as rawParentPort, type MessagePort } from 'node:worker_threads'; + +import { Terminal } from '@rushstack/terminal/lib/Terminal'; +import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; + +import { type IZipSyncUnpackOptions, type IZipSyncUnpackResult, unpack } from './unpack'; +import { defaultBufferSize } from './zipSyncUtils'; + +export { type IZipSyncUnpackOptions, type IZipSyncUnpackResult } from './unpack'; + +export interface IHashWorkerData { + basePath: string; +} + +export interface IZipSyncUnpackCommandMessage { + type: 'zipsync-unpack'; + id: number; + options: Omit; +} + +export interface IZipSyncUnpackWorkerResult { + zipSyncReturn: IZipSyncUnpackResult; + zipSyncLogs: string; +} + +interface IZipSyncUnpackSuccessMessage { + id: number; + type: 'zipsync-unpack'; + result: IZipSyncUnpackWorkerResult; +} + +export interface IZipSyncUnpackErrorMessage { + type: 'error'; + id: number; + args: { + message: string; + stack: string; + zipSyncLogs: string; + }; +} + +export type IHostToWorkerMessage = IZipSyncUnpackCommandMessage; +export type IWorkerToHostMessage = IZipSyncUnpackSuccessMessage | IZipSyncUnpackErrorMessage; + +if (!rawParentPort) { + throw new Error('This module must be run in a worker thread.'); +} +const parentPort: MessagePort = rawParentPort; + +let outputBuffer: Buffer | undefined = undefined; + +function handleMessage(message: IHostToWorkerMessage | false): void { + if (message === false) { + parentPort.removeAllListeners(); + parentPort.close(); + return; + } + + const terminalProvider: StringBufferTerminalProvider = new StringBufferTerminalProvider(); + const terminal: Terminal = new Terminal(terminalProvider); + + try { + switch (message.type) { + case 'zipsync-unpack': { + const { options } = message; + if (!outputBuffer) { + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize); + } + + const successMessage: IZipSyncUnpackSuccessMessage = { + type: message.type, + id: message.id, + result: { + zipSyncReturn: unpack({ ...options, terminal, outputBuffer }), + zipSyncLogs: terminalProvider.getOutput() + } + }; + return parentPort.postMessage(successMessage); + } + } + } catch (err) { + const errorMessage: IZipSyncUnpackErrorMessage = { + type: 'error', + id: message.id, + args: { + message: (err as Error).message, + stack: (err as Error).stack || '', + zipSyncLogs: terminalProvider.getOutput() + } + }; + parentPort.postMessage(errorMessage); + } +} + +parentPort.on('message', handleMessage); diff --git a/apps/zipsync/src/unpackWorkerAsync.ts b/apps/zipsync/src/unpackWorkerAsync.ts new file mode 100644 index 00000000000..73714a016b7 --- /dev/null +++ b/apps/zipsync/src/unpackWorkerAsync.ts @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { Worker } from 'node:worker_threads'; + +import type { + IWorkerToHostMessage, + IHostToWorkerMessage, + IZipSyncUnpackWorkerResult, + IZipSyncUnpackOptions +} from './unpackWorker'; + +export type { IZipSyncUnpackWorkerResult } from './unpackWorker'; + +export async function unpackWorkerAsync( + options: Omit +): Promise { + const { Worker } = await import('node:worker_threads'); + + const worker: Worker = new Worker(require.resolve('./unpackWorker')); + + return new Promise((resolve, reject) => { + worker.on('message', (message: IWorkerToHostMessage) => { + switch (message.type) { + case 'zipsync-unpack': { + resolve(message.result); + break; + } + case 'error': { + const error: Error = new Error(message.args.message); + error.stack = message.args.stack; + reject(error); + break; + } + default: { + const exhaustiveCheck: never = message; + throw new Error(`Unexpected message type: ${JSON.stringify(exhaustiveCheck)}`); + } + } + }); + + worker.on('error', (err) => { + reject(err); + }); + + worker.on('exit', (code) => { + if (code !== 0) { + reject(new Error(`Worker stopped with exit code ${code}`)); + } + }); + + const commandMessage: IHostToWorkerMessage = { + type: 'zipsync-unpack', + id: 0, + options + }; + worker.postMessage(commandMessage); + }).finally(() => { + worker.postMessage(false); + }); +} diff --git a/apps/zipsync/src/workerAsync.test.ts b/apps/zipsync/src/workerAsync.test.ts new file mode 100644 index 00000000000..1392f51223f --- /dev/null +++ b/apps/zipsync/src/workerAsync.test.ts @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as path from 'node:path'; +import * as fs from 'node:fs'; + +import { unpackWorkerAsync } from './unpackWorkerAsync'; +import { packWorkerAsync } from './packWorkerAsync'; +import { getDemoDataDirectoryDisposable } from './testUtils'; + +describe('zipSyncWorkerAsync tests', () => { + it('basic pack test', async () => { + using demoDataDisposable = getDemoDataDirectoryDisposable(5); + const { targetDirectories, baseDir, metadata } = demoDataDisposable; + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const { zipSyncReturn: packResult } = await packWorkerAsync({ + compression: 'deflate', + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchObject({ filesPacked: 21, metadata }); + + using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); + const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; + + const { zipSyncReturn: unpackResult } = await unpackWorkerAsync({ + archivePath, + baseDir: unpackBaseDir, + targetDirectories + }); + + expect(unpackResult).toMatchObject({ + filesDeleted: 0, + filesExtracted: 12, + filesSkipped: 8, + foldersDeleted: 0, + metadata + }); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } + } + }); +}); diff --git a/apps/zipsync/src/zipSyncUtils.ts b/apps/zipsync/src/zipSyncUtils.ts new file mode 100644 index 00000000000..3e03062eb84 --- /dev/null +++ b/apps/zipsync/src/zipSyncUtils.ts @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { IReadonlyPathTrieNode } from '@rushstack/lookup-by-path/lib/LookupByPath'; + +export const METADATA_FILENAME: string = '__zipsync_metadata__.json'; +export const METADATA_VERSION: string = '1.0'; + +export interface IDirQueueItem { + dir: string; + depth: number; + node?: IReadonlyPathTrieNode | undefined; +} + +export interface IMetadataFileRecord { + size: number; + sha1Hash: string; +} + +export interface IMetadata { + version: string; + files: Record; +} + +export type IZipSyncMode = 'pack' | 'unpack'; + +export type ZipSyncOptionCompression = 'store' | 'deflate' | 'zstd' | 'auto'; + +export const defaultBufferSize: number = 1 << 25; // 32 MiB diff --git a/apps/zipsync/src/zipUtils.ts b/apps/zipsync/src/zipUtils.ts new file mode 100644 index 00000000000..5dbd1d1f59c --- /dev/null +++ b/apps/zipsync/src/zipUtils.ts @@ -0,0 +1,411 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +/** + * Low-level ZIP structure helpers used by the zipsync pack/unpack pipeline. + * + * Spec reference: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + */ + +/** + * Local file header signature PK\x03\x04 + */ +const LOCAL_FILE_HEADER_SIGNATURE: number = 0x04034b50; // PK\x03\x04 +/** + * Central directory file header signature PK\x01\x02 + */ +const CENTRAL_DIR_HEADER_SIGNATURE: number = 0x02014b50; // PK\x01\x02 +/** + * End of central directory signature PK\x05\x06 + */ +const END_OF_CENTRAL_DIR_SIGNATURE: number = 0x06054b50; // PK\x05\x06 +/** + * Data descriptor signature PK\x07\x08 + */ +const DATA_DESCRIPTOR_SIGNATURE: number = 0x08074b50; // PK\x07\x08 + +export const STORE_COMPRESSION: 0 = 0; +export const DEFLATE_COMPRESSION: 8 = 8; +export const ZSTD_COMPRESSION: 93 = 93; +export type ZipMetaCompressionMethod = + | typeof STORE_COMPRESSION + | typeof DEFLATE_COMPRESSION + | typeof ZSTD_COMPRESSION; + +export interface IFileEntry { + filename: string; + size: number; + compressedSize: number; + crc32: number; + sha1Hash: string; + localHeaderOffset: number; + compressionMethod: ZipMetaCompressionMethod; + dosDateTime: { time: number; date: number }; +} + +export interface ILocalFileHeader { + signature: number; + versionNeeded: number; + flags: number; + compressionMethod: number; + lastModTime: number; + lastModDate: number; + crc32: number; + compressedSize: number; + uncompressedSize: number; + filenameLength: number; + extraFieldLength: number; +} + +export interface ICentralDirectoryHeader { + signature: number; + versionMadeBy: number; + versionNeeded: number; + flags: number; + compressionMethod: number; + lastModTime: number; + lastModDate: number; + crc32: number; + compressedSize: number; + uncompressedSize: number; + filenameLength: number; + extraFieldLength: number; + commentLength: number; + diskNumberStart: number; + internalFileAttributes: number; + externalFileAttributes: number; + localHeaderOffset: number; +} + +export interface IEndOfCentralDirectory { + signature: number; + diskNumber: number; + centralDirStartDisk: number; + centralDirRecordsOnDisk: number; + totalCentralDirRecords: number; + centralDirSize: number; + centralDirOffset: number; + commentLength: number; +} + +function writeUInt32LE(buffer: Buffer, value: number, offset: number): void { + buffer.writeUInt32LE(value, offset); +} + +function writeUInt16LE(buffer: Buffer, value: number, offset: number): void { + buffer.writeUInt16LE(value, offset); +} + +function readUInt32LE(buffer: Buffer, offset: number): number { + return buffer.readUInt32LE(offset); +} + +function readUInt16LE(buffer: Buffer, offset: number): number { + return buffer.readUInt16LE(offset); +} + +/** + * Convert a JS Date into packed DOS time/date fields used by classic ZIP. + * Seconds are stored /2 (range 0-29 => 0-58s). Years are offset from 1980. + */ +export function dosDateTime(date: Date): { time: number; date: number } { + /* eslint-disable no-bitwise */ + const time: number = + ((date.getHours() & 0x1f) << 11) | ((date.getMinutes() & 0x3f) << 5) | ((date.getSeconds() / 2) & 0x1f); + + const dateVal: number = + (((date.getFullYear() - 1980) & 0x7f) << 9) | + (((date.getMonth() + 1) & 0xf) << 5) | + (date.getDate() & 0x1f); + /* eslint-enable no-bitwise */ + + return { time, date: dateVal }; +} + +/** + * Reusable scratch buffer for the fixed-length local file header (30 bytes). + * Using a single Buffer avoids per-file allocations; callers must copy/use synchronously. + */ +const localFileHeaderBuffer: Buffer = Buffer.allocUnsafe(30); +/** + * Write the fixed portion of a local file header for an entry (with data descriptor flag set) and + * return the header buffer plus the variable-length filename buffer. + * + * Layout (little-endian): + * signature(4) versionNeeded(2) flags(2) method(2) modTime(2) modDate(2) + * crc32(4) compSize(4) uncompSize(4) nameLen(2) extraLen(2) + * + * Because we set bit 3 of the general purpose flag, crc32/compSize/uncompSize are zero here and the + * actual values appear later in a trailing data descriptor record. This enables streaming without + * buffering entire file contents beforehand. + */ +export function writeLocalFileHeader( + entry: IFileEntry +): [fileHeaderWithoutVariableLengthData: Buffer, fileHeaderVariableLengthData: Buffer] { + const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); + + const { time, date } = entry.dosDateTime; + + let offset: number = 0; + writeUInt32LE(localFileHeaderBuffer, LOCAL_FILE_HEADER_SIGNATURE, offset); + offset += 4; + writeUInt16LE(localFileHeaderBuffer, 20, offset); // version needed + offset += 2; + // General purpose bit flag: set bit 3 (0x0008) to indicate presence of data descriptor + // Per APPNOTE: when bit 3 is set, CRC-32 and sizes in local header are set to zero and + // the actual values are stored in the data descriptor that follows the file data. + writeUInt16LE(localFileHeaderBuffer, 0x0008, offset); // flags (data descriptor) + offset += 2; + writeUInt16LE(localFileHeaderBuffer, entry.compressionMethod, offset); // compression method (0=store,8=deflate) + offset += 2; + writeUInt16LE(localFileHeaderBuffer, time, offset); // last mod time + offset += 2; + writeUInt16LE(localFileHeaderBuffer, date, offset); // last mod date + offset += 2; + // With bit 3 set, these three fields MUST be zero in the local header + writeUInt32LE(localFileHeaderBuffer, 0, offset); // crc32 (placeholder, real value in data descriptor) + offset += 4; + writeUInt32LE(localFileHeaderBuffer, 0, offset); // compressed size (placeholder) + offset += 4; + writeUInt32LE(localFileHeaderBuffer, 0, offset); // uncompressed size (placeholder) + offset += 4; + writeUInt16LE(localFileHeaderBuffer, filenameBuffer.length, offset); // filename length + offset += 2; + writeUInt16LE(localFileHeaderBuffer, 0, offset); // extra field length + offset += 2; + + return [localFileHeaderBuffer, filenameBuffer]; +} + +/** + * Reusable scratch buffer for central directory entries (fixed-length 46 bytes before filename) + */ +const centralDirHeaderBuffer: Buffer = Buffer.allocUnsafe(46); +/** + * Write a central directory header referencing an already written local file entry. + * Central directory consolidates the final CRC + sizes (always present here) and provides a table + * for fast enumeration without scanning the archive sequentially. + */ +export function writeCentralDirectoryHeader(entry: IFileEntry): Buffer[] { + const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); + + const now: Date = new Date(); + const { time, date } = dosDateTime(now); + + let offset: number = 0; + writeUInt32LE(centralDirHeaderBuffer, CENTRAL_DIR_HEADER_SIGNATURE, offset); + offset += 4; + writeUInt16LE(centralDirHeaderBuffer, 20, offset); // version made by + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, 20, offset); // version needed + offset += 2; + // Mirror flags used in local header (bit 3 set to indicate data descriptor was used) + writeUInt16LE(centralDirHeaderBuffer, 0x0008, offset); // flags + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, entry.compressionMethod, offset); // compression method + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, time, offset); // last mod time + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, date, offset); // last mod date + offset += 2; + writeUInt32LE(centralDirHeaderBuffer, entry.crc32, offset); // crc32 + offset += 4; + writeUInt32LE(centralDirHeaderBuffer, entry.compressedSize, offset); // compressed size + offset += 4; + writeUInt32LE(centralDirHeaderBuffer, entry.size, offset); // uncompressed size + offset += 4; + writeUInt16LE(centralDirHeaderBuffer, filenameBuffer.length, offset); // filename length + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // extra field length + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // comment length + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // disk number start + offset += 2; + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // internal file attributes + offset += 2; + writeUInt32LE(centralDirHeaderBuffer, 0, offset); // external file attributes + offset += 4; + writeUInt32LE(centralDirHeaderBuffer, entry.localHeaderOffset, offset); // local header offset + offset += 4; + + return [centralDirHeaderBuffer, filenameBuffer]; +} + +/** + * Data descriptor: signature(4) crc32(4) compSize(4) uncompSize(4) + */ +const dataDescriptorBuffer: Buffer = Buffer.allocUnsafe(16); +/** + * Write the trailing data descriptor for an entry. Only used because we set flag bit 3 in the + * local file header allowing deferred CRC/size calculation. + */ +export function writeDataDescriptor(entry: IFileEntry): Buffer { + let offset: number = 0; + writeUInt32LE(dataDescriptorBuffer, DATA_DESCRIPTOR_SIGNATURE, offset); // signature PK\x07\x08 + offset += 4; + writeUInt32LE(dataDescriptorBuffer, entry.crc32, offset); // crc32 + offset += 4; + writeUInt32LE(dataDescriptorBuffer, entry.compressedSize, offset); // compressed size + offset += 4; + writeUInt32LE(dataDescriptorBuffer, entry.size, offset); // uncompressed size + return dataDescriptorBuffer; +} + +/** + * End of central directory (EOCD) record (22 bytes when comment length = 0) + */ +const endOfCentralDirBuffer: Buffer = Buffer.allocUnsafe(22); +/** + * Write the EOCD record referencing the accumulated central directory. We omit archive comments + * and do not support ZIP64 (sufficient for build cache archive sizes today). + */ +export function writeEndOfCentralDirectory( + centralDirOffset: number, + centralDirSize: number, + entryCount: number +): Buffer { + let offset: number = 0; + writeUInt32LE(endOfCentralDirBuffer, END_OF_CENTRAL_DIR_SIGNATURE, offset); + offset += 4; + writeUInt16LE(endOfCentralDirBuffer, 0, offset); // disk number + offset += 2; + writeUInt16LE(endOfCentralDirBuffer, 0, offset); // central dir start disk + offset += 2; + writeUInt16LE(endOfCentralDirBuffer, entryCount, offset); // central dir records on disk + offset += 2; + writeUInt16LE(endOfCentralDirBuffer, entryCount, offset); // total central dir records + offset += 2; + writeUInt32LE(endOfCentralDirBuffer, centralDirSize, offset); // central dir size + offset += 4; + writeUInt32LE(endOfCentralDirBuffer, centralDirOffset, offset); // central dir offset + offset += 4; + writeUInt16LE(endOfCentralDirBuffer, 0, offset); // comment length + + return endOfCentralDirBuffer; +} + +interface ILocalFileHeaderParseResult { + header: ILocalFileHeader; + nextOffset: number; +} + +/** + * Parse a local file header at the provided offset. Minimal validation: signature check only. + * Returns header plus the offset pointing just past the variable-length name+extra field. + */ +export function parseLocalFileHeader(buffer: Buffer, offset: number): ILocalFileHeaderParseResult { + const signature: number = readUInt32LE(buffer, offset); + if (signature !== LOCAL_FILE_HEADER_SIGNATURE) { + throw new Error( + `Unexpected local file header signature at offset ${offset.toString(16)}: ${signature.toString(16)}` + ); + } + const header: ILocalFileHeader = { + signature, + versionNeeded: readUInt16LE(buffer, offset + 4), + flags: readUInt16LE(buffer, offset + 6), + compressionMethod: readUInt16LE(buffer, offset + 8), + lastModTime: readUInt16LE(buffer, offset + 10), + lastModDate: readUInt16LE(buffer, offset + 12), + crc32: readUInt32LE(buffer, offset + 14), + compressedSize: readUInt32LE(buffer, offset + 18), + uncompressedSize: readUInt32LE(buffer, offset + 22), + filenameLength: readUInt16LE(buffer, offset + 26), + extraFieldLength: readUInt16LE(buffer, offset + 28) + }; + + return { + header, + nextOffset: offset + 30 + header.filenameLength + header.extraFieldLength + }; +} + +export interface ICentralDirectoryHeaderParseResult { + header: ICentralDirectoryHeader; + filename: string; + nextOffset: number; +} + +/** + * Parse a central directory header at the given offset (within a sliced central directory buffer). + * Returns header, filename string, and nextOffset pointing to the next structure. + */ +export function parseCentralDirectoryHeader( + buffer: Buffer, + offset: number +): ICentralDirectoryHeaderParseResult { + const signature: number = readUInt32LE(buffer, offset); + if (signature !== CENTRAL_DIR_HEADER_SIGNATURE) { + throw new Error( + `Unexpected central directory signature at offset ${offset.toString(16)}: ${signature.toString(16)}` + ); + } + const header: ICentralDirectoryHeader = { + signature, + versionMadeBy: readUInt16LE(buffer, offset + 4), + versionNeeded: readUInt16LE(buffer, offset + 6), + flags: readUInt16LE(buffer, offset + 8), + compressionMethod: readUInt16LE(buffer, offset + 10), + lastModTime: readUInt16LE(buffer, offset + 12), + lastModDate: readUInt16LE(buffer, offset + 14), + crc32: readUInt32LE(buffer, offset + 16), + compressedSize: readUInt32LE(buffer, offset + 20), + uncompressedSize: readUInt32LE(buffer, offset + 24), + filenameLength: readUInt16LE(buffer, offset + 28), + extraFieldLength: readUInt16LE(buffer, offset + 30), + commentLength: readUInt16LE(buffer, offset + 32), + diskNumberStart: readUInt16LE(buffer, offset + 34), + internalFileAttributes: readUInt16LE(buffer, offset + 36), + externalFileAttributes: readUInt32LE(buffer, offset + 38), + localHeaderOffset: readUInt32LE(buffer, offset + 42) + }; + + offset += 46; + + const filename: string = buffer.toString('utf8', offset, offset + header.filenameLength); + + return { + header, + filename, + nextOffset: offset + header.filenameLength + header.extraFieldLength + header.commentLength + }; +} + +/** + * Locate the EOCD record by reverse scanning. Since we never write a comment the EOCD will be the + * first matching signature encountered scanning backwards from the end. + */ +export function findEndOfCentralDirectory(buffer: Buffer): IEndOfCentralDirectory { + for (let i: number = buffer.length - 22; i >= 0; i--) { + if (readUInt32LE(buffer, i) === END_OF_CENTRAL_DIR_SIGNATURE) { + return { + signature: readUInt32LE(buffer, i), + diskNumber: readUInt16LE(buffer, i + 4), + centralDirStartDisk: readUInt16LE(buffer, i + 6), + centralDirRecordsOnDisk: readUInt16LE(buffer, i + 8), + totalCentralDirRecords: readUInt16LE(buffer, i + 10), + centralDirSize: readUInt32LE(buffer, i + 12), + centralDirOffset: readUInt32LE(buffer, i + 16), + commentLength: readUInt16LE(buffer, i + 20) + }; + } + } + + throw new Error('End of central directory not found'); +} + +/** + * Slice out the (possibly compressed) file data bytes for a central directory entry. + * Caller will decompress if needed based on entry.header.compressionMethod. + */ +export function getFileFromZip(zipBuffer: Buffer, entry: ICentralDirectoryHeaderParseResult): Buffer { + const { header: localFileHeader } = parseLocalFileHeader(zipBuffer, entry.header.localHeaderOffset); + const localDataOffset: number = + entry.header.localHeaderOffset + 30 + localFileHeader.filenameLength + localFileHeader.extraFieldLength; + const fileZipBuffer: Buffer = zipBuffer.subarray( + localDataOffset, + localDataOffset + entry.header.compressedSize + ); + return fileZipBuffer; +} diff --git a/apps/zipsync/tsconfig.json b/apps/zipsync/tsconfig.json new file mode 100644 index 00000000000..dac21d04081 --- /dev/null +++ b/apps/zipsync/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "./node_modules/local-node-rig/profiles/default/tsconfig-base.json" +} diff --git a/common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..51759d89a49 --- /dev/null +++ b/common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/lookup-by-path", + "comment": "Expose getNodeAtPrefix API to allow getting nodes with undefined values.", + "type": "patch" + } + ], + "packageName": "@rushstack/lookup-by-path" +} \ No newline at end of file diff --git a/common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..211ddf456cc --- /dev/null +++ b/common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/ts-command-line", + "comment": "Add IRequiredCommandLineStringListParameter.", + "type": "patch" + } + ], + "packageName": "@rushstack/ts-command-line" +} \ No newline at end of file diff --git a/common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..327c9aae6dd --- /dev/null +++ b/common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/zipsync", + "comment": "Add zipsync tool to pack and unpack build cache entries.", + "type": "patch" + } + ], + "packageName": "@rushstack/zipsync" +} \ No newline at end of file diff --git a/common/config/rush/nonbrowser-approved-packages.json b/common/config/rush/nonbrowser-approved-packages.json index b2dde4e55ed..7b3939e4cf7 100644 --- a/common/config/rush/nonbrowser-approved-packages.json +++ b/common/config/rush/nonbrowser-approved-packages.json @@ -370,6 +370,10 @@ "name": "@rushstack/worker-pool", "allowedCategories": [ "libraries" ] }, + { + "name": "@rushstack/zipsync", + "allowedCategories": [ "libraries" ] + }, { "name": "@serverless-stack/aws-lambda-ric", "allowedCategories": [ "tests" ] diff --git a/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml b/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml index 120705e8971..44c87b07487 100644 --- a/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml +++ b/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml @@ -6345,7 +6345,6 @@ packages: resolution: {integrity: sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==} engines: {node: '>=14.17'} hasBin: true - dev: true /unbox-primitive@1.0.2: resolution: {integrity: sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==} @@ -6725,6 +6724,21 @@ packages: - '@types/node' dev: true + file:../../../apps/zipsync(@types/node@20.17.19): + resolution: {directory: ../../../apps/zipsync, type: directory} + id: file:../../../apps/zipsync + name: '@rushstack/zipsync' + hasBin: true + dependencies: + '@rushstack/lookup-by-path': file:../../../libraries/lookup-by-path(@types/node@20.17.19) + '@rushstack/node-core-library': file:../../../libraries/node-core-library(@types/node@20.17.19) + '@rushstack/terminal': file:../../../libraries/terminal(@types/node@20.17.19) + '@rushstack/ts-command-line': file:../../../libraries/ts-command-line(@types/node@20.17.19) + semver: 7.5.4 + typescript: 5.8.2 + transitivePeerDependencies: + - '@types/node' + file:../../../eslint/eslint-config(eslint@9.25.1)(typescript@4.9.5): resolution: {directory: ../../../eslint/eslint-config, type: directory} id: file:../../../eslint/eslint-config @@ -7114,6 +7128,7 @@ packages: '@rushstack/stream-collator': file:../../../libraries/stream-collator(@types/node@20.17.19) '@rushstack/terminal': file:../../../libraries/terminal(@types/node@20.17.19) '@rushstack/ts-command-line': file:../../../libraries/ts-command-line(@types/node@20.17.19) + '@rushstack/zipsync': file:../../../apps/zipsync(@types/node@20.17.19) '@yarnpkg/lockfile': 1.0.2 builtin-modules: 3.1.0 cli-table: 0.3.11 diff --git a/common/config/subspaces/build-tests-subspace/repo-state.json b/common/config/subspaces/build-tests-subspace/repo-state.json index 078bc879c4a..5264367b934 100644 --- a/common/config/subspaces/build-tests-subspace/repo-state.json +++ b/common/config/subspaces/build-tests-subspace/repo-state.json @@ -1,6 +1,6 @@ // DO NOT MODIFY THIS FILE MANUALLY BUT DO COMMIT IT. It is generated and used by Rush. { - "pnpmShrinkwrapHash": "a4362af2793dd557efe7e9f005f3e2f376eb2eda", + "pnpmShrinkwrapHash": "2ac01ba33e09661dc0e7d7faa36d215bb3d3b91e", "preferredVersionsHash": "550b4cee0bef4e97db6c6aad726df5149d20e7d9", "packageJsonInjectedDependenciesHash": "79ac135cb61506457e8d49c7ec1342d419bde3e2" } diff --git a/common/config/subspaces/default/pnpm-lock.yaml b/common/config/subspaces/default/pnpm-lock.yaml index f0ab142cd09..834d96fe91a 100644 --- a/common/config/subspaces/default/pnpm-lock.yaml +++ b/common/config/subspaces/default/pnpm-lock.yaml @@ -449,6 +449,31 @@ importers: specifier: workspace:* version: link:../../rigs/local-node-rig + ../../../apps/zipsync: + dependencies: + '@rushstack/lookup-by-path': + specifier: workspace:* + version: link:../../libraries/lookup-by-path + '@rushstack/terminal': + specifier: workspace:* + version: link:../../libraries/terminal + '@rushstack/ts-command-line': + specifier: workspace:* + version: link:../../libraries/ts-command-line + typescript: + specifier: ~5.8.2 + version: 5.8.2 + devDependencies: + '@rushstack/heft': + specifier: workspace:* + version: link:../heft + eslint: + specifier: ~9.25.1 + version: 9.25.1(supports-color@8.1.1) + local-node-rig: + specifier: workspace:* + version: link:../../rigs/local-node-rig + ../../../build-tests-samples/heft-node-basic-tutorial: devDependencies: '@rushstack/heft': @@ -10468,7 +10493,7 @@ packages: dependencies: '@pnpm/crypto.base32-hash': 3.0.1 '@pnpm/types': 12.2.0 - semver: 7.6.3 + semver: 7.7.2 dev: false /@pnpm/error@1.4.0: @@ -14600,7 +14625,7 @@ packages: eslint: 9.25.1(supports-color@8.1.1) json-stable-stringify-without-jsonify: 1.0.1 lodash.merge: 4.6.2 - semver: 7.6.3 + semver: 7.7.2 transitivePeerDependencies: - supports-color - typescript @@ -14809,7 +14834,7 @@ packages: fast-glob: 3.3.2 is-glob: 4.0.3 minimatch: 9.0.5 - semver: 7.6.3 + semver: 7.7.2 ts-api-utils: 2.0.1(typescript@4.9.5) typescript: 4.9.5 transitivePeerDependencies: @@ -14828,7 +14853,7 @@ packages: fast-glob: 3.3.2 is-glob: 4.0.3 minimatch: 9.0.5 - semver: 7.6.3 + semver: 7.7.2 ts-api-utils: 2.0.1(typescript@5.8.2) typescript: 5.8.2 transitivePeerDependencies: @@ -19319,7 +19344,7 @@ packages: espree: 10.3.0 esquery: 1.6.0 parse-imports-exports: 0.2.4 - semver: 7.6.3 + semver: 7.7.2 spdx-expression-parse: 4.0.0 transitivePeerDependencies: - supports-color @@ -27813,16 +27838,10 @@ packages: dependencies: lru-cache: 6.0.0 - /semver@7.6.3: - resolution: {integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==} - engines: {node: '>=10'} - hasBin: true - /semver@7.7.2: resolution: {integrity: sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==} engines: {node: '>=10'} hasBin: true - dev: false /send@0.17.2: resolution: {integrity: sha512-UJYB6wFSJE3G00nEivR5rgWp8c2xXvJ3OPWPhmuteU0IKj8nKbG3DrjiOmLwpnHGYWAVwA69zmTm++YG0Hmwww==} diff --git a/common/config/subspaces/default/repo-state.json b/common/config/subspaces/default/repo-state.json index 711e482da42..bf2c1a8ba42 100644 --- a/common/config/subspaces/default/repo-state.json +++ b/common/config/subspaces/default/repo-state.json @@ -1,5 +1,5 @@ // DO NOT MODIFY THIS FILE MANUALLY BUT DO COMMIT IT. It is generated and used by Rush. { - "pnpmShrinkwrapHash": "b3b0018c5869d606a645e2b69ef6c53f9d2bf483", + "pnpmShrinkwrapHash": "99186b016ffe5874093a1b9fb71c52c903b86978", "preferredVersionsHash": "61cd419c533464b580f653eb5f5a7e27fe7055ca" } diff --git a/common/reviews/api/lookup-by-path.api.md b/common/reviews/api/lookup-by-path.api.md index 6c69844f179..66336af9e63 100644 --- a/common/reviews/api/lookup-by-path.api.md +++ b/common/reviews/api/lookup-by-path.api.md @@ -31,6 +31,7 @@ export interface IReadonlyLookupByPath extends Iterable<[strin findChildPathFromSegments(childPathSegments: Iterable): TItem | undefined; findLongestPrefixMatch(query: string, delimiter?: string): IPrefixMatch | undefined; get(query: string, delimiter?: string): TItem | undefined; + getNodeAtPrefix(query: string, delimiter?: string): IReadonlyPathTrieNode | undefined; groupByChild(infoByPath: Map, delimiter?: string): Map>; has(query: string, delimiter?: string): boolean; get size(): number; @@ -57,6 +58,7 @@ export class LookupByPath implements IReadonlyLookupByPath): TItem | undefined; findLongestPrefixMatch(query: string, delimiter?: string): IPrefixMatch | undefined; get(key: string, delimiter?: string): TItem | undefined; + getNodeAtPrefix(query: string, delimiter?: string): IReadonlyPathTrieNode | undefined; groupByChild(infoByPath: Map, delimiter?: string): Map>; has(key: string, delimiter?: string): boolean; static iteratePathSegments(serializedPath: string, delimiter?: string): Iterable; diff --git a/common/reviews/api/ts-command-line.api.md b/common/reviews/api/ts-command-line.api.md index 3541c82bb73..dd85f73df4b 100644 --- a/common/reviews/api/ts-command-line.api.md +++ b/common/reviews/api/ts-command-line.api.md @@ -425,6 +425,12 @@ export interface IRequiredCommandLineIntegerParameter extends CommandLineInteger readonly value: number; } +// @public +export interface IRequiredCommandLineStringListParameter extends CommandLineStringListParameter { + // (undocumented) + values: ReadonlyArray; +} + // @public export interface IRequiredCommandLineStringParameter extends CommandLineStringParameter { // (undocumented) diff --git a/libraries/lookup-by-path/src/LookupByPath.ts b/libraries/lookup-by-path/src/LookupByPath.ts index b1ac287960d..a6b3353371e 100644 --- a/libraries/lookup-by-path/src/LookupByPath.ts +++ b/libraries/lookup-by-path/src/LookupByPath.ts @@ -186,6 +186,15 @@ export interface IReadonlyLookupByPath extends Iterable<[strin * @param infoByPath - The info to be grouped, keyed by path */ groupByChild(infoByPath: Map, delimiter?: string): Map>; + + /** + * Retrieves the trie node at the specified prefix, if it exists. + * + * @param query - The prefix to check for + * @param delimiter - The path delimiter + * @returns The trie node at the specified prefix, or `undefined` if no node was found + */ + getNodeAtPrefix(query: string, delimiter?: string): IReadonlyPathTrieNode | undefined; } /** @@ -526,6 +535,16 @@ export class LookupByPath implements IReadonlyLookupByPath | undefined { + return this._findNodeAtPrefix(query, delimiter); + } + /** * Iterates through progressively longer prefixes of a given string and returns as soon * as the number of candidate items that match the prefix are 1 or 0. diff --git a/libraries/ts-command-line/src/index.ts b/libraries/ts-command-line/src/index.ts index dc4b17d60e9..f88f6c55c49 100644 --- a/libraries/ts-command-line/src/index.ts +++ b/libraries/ts-command-line/src/index.ts @@ -40,7 +40,10 @@ export { CommandLineStringParameter, type IRequiredCommandLineStringParameter } from './parameters/CommandLineStringParameter'; -export { CommandLineStringListParameter } from './parameters/CommandLineStringListParameter'; +export { + CommandLineStringListParameter, + type IRequiredCommandLineStringListParameter +} from './parameters/CommandLineStringListParameter'; export { CommandLineIntegerParameter, type IRequiredCommandLineIntegerParameter diff --git a/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts b/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts index c62eed59971..ade590da365 100644 --- a/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts +++ b/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts @@ -5,6 +5,14 @@ import type { ICommandLineStringListDefinition } from './CommandLineDefinition'; import { CommandLineParameterWithArgument, CommandLineParameterKind } from './BaseClasses'; import { EnvironmentVariableParser } from './EnvironmentVariableParser'; +/** + * The data type returned by {@link CommandLineParameterProvider.(defineStringParameter:2)}. + * @public + */ +export interface IRequiredCommandLineStringListParameter extends CommandLineStringListParameter { + values: ReadonlyArray; +} + /** * The data type returned by {@link CommandLineParameterProvider.defineStringListParameter}. * @public diff --git a/libraries/ts-command-line/src/providers/CommandLineParser.ts b/libraries/ts-command-line/src/providers/CommandLineParser.ts index 7855cd5a301..a72ae6d4f7d 100644 --- a/libraries/ts-command-line/src/providers/CommandLineParser.ts +++ b/libraries/ts-command-line/src/providers/CommandLineParser.ts @@ -2,7 +2,7 @@ // See LICENSE in the project root for license information. import type * as argparse from 'argparse'; -import { Colorize } from '@rushstack/terminal'; +import { Colorize } from '@rushstack/terminal/lib/Colorize'; import type { CommandLineAction } from './CommandLineAction'; import type { AliasCommandLineAction } from './AliasCommandLineAction'; diff --git a/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts b/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts index a535c1e77cc..d72c558c34f 100644 --- a/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts +++ b/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import { AnsiEscape } from '@rushstack/terminal'; +import { AnsiEscape } from '@rushstack/terminal/lib/AnsiEscape'; import { ScopedCommandLineAction } from '../providers/ScopedCommandLineAction'; import type { CommandLineStringParameter } from '../parameters/CommandLineStringParameter'; diff --git a/rush.json b/rush.json index 80a47febb84..c4ef84b429a 100644 --- a/rush.json +++ b/rush.json @@ -489,6 +489,12 @@ "reviewCategory": "libraries", "shouldPublish": true }, + { + "packageName": "@rushstack/zipsync", + "projectFolder": "apps/zipsync", + "reviewCategory": "libraries", + "shouldPublish": true + }, // "build-tests" folder (alphabetical order) {