From e869fcce9130a0885f34d0722609387821bc638a Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Mon, 15 Sep 2025 17:40:33 -0700 Subject: [PATCH 01/20] zipsync --- apps/zipsync/.npmignore | 32 + apps/zipsync/LICENSE | 24 + apps/zipsync/README.md | 22 + apps/zipsync/config/jest.config.json | 4 + apps/zipsync/config/jestSymbolDispose.js | 8 + apps/zipsync/config/rig.json | 7 + apps/zipsync/eslint.config.js | 21 + apps/zipsync/package.json | 34 + apps/zipsync/src/ZipSyncCommandLineParser.ts | 120 ++++ .../src/__snapshots__/start.test.ts.snap | 33 + .../src/__snapshots__/zipSync.test.ts.snap | 187 +++++ .../zipSyncWorkerAsync.test.ts.snap | 187 +++++ apps/zipsync/src/benchmark.test.ts | 495 +++++++++++++ apps/zipsync/src/compress.ts | 189 +++++ apps/zipsync/src/crc32.test.ts | 34 + apps/zipsync/src/crc32.ts | 45 ++ apps/zipsync/src/disposableFileHandle.ts | 28 + apps/zipsync/src/perf.ts | 53 ++ apps/zipsync/src/start.test.ts | 11 + apps/zipsync/src/start.ts | 22 + apps/zipsync/src/zipSync.test.ts | 95 +++ apps/zipsync/src/zipSync.ts | 680 ++++++++++++++++++ apps/zipsync/src/zipSyncWorker.ts | 84 +++ apps/zipsync/src/zipSyncWorkerAsync.test.ts | 89 +++ apps/zipsync/src/zipSyncWorkerAsync.ts | 56 ++ apps/zipsync/src/zipUtils.ts | 356 +++++++++ apps/zipsync/tsconfig.json | 3 + .../rush/browser-approved-packages.json | 4 + .../build-tests-subspace/pnpm-lock.yaml | 17 +- .../build-tests-subspace/repo-state.json | 4 +- .../config/subspaces/default/pnpm-lock.yaml | 53 +- .../config/subspaces/default/repo-state.json | 2 +- common/reviews/api/lookup-by-path.api.md | 2 + common/reviews/api/rush-lib.api.md | 2 +- common/reviews/api/ts-command-line.api.md | 6 + libraries/lookup-by-path/src/LookupByPath.ts | 19 + libraries/rush-lib/package.json | 1 + libraries/rush-lib/src/logic/RushConstants.ts | 2 +- .../logic/buildCache/OperationBuildCache.ts | 160 ++--- libraries/ts-command-line/src/index.ts | 5 +- .../CommandLineStringListParameter.ts | 8 + .../src/providers/CommandLineParser.ts | 2 +- .../src/test/ScopedCommandLineAction.test.ts | 2 +- rush.json | 6 + 44 files changed, 3108 insertions(+), 106 deletions(-) create mode 100644 apps/zipsync/.npmignore create mode 100644 apps/zipsync/LICENSE create mode 100644 apps/zipsync/README.md create mode 100644 apps/zipsync/config/jest.config.json create mode 100644 apps/zipsync/config/jestSymbolDispose.js create mode 100644 apps/zipsync/config/rig.json create mode 100644 apps/zipsync/eslint.config.js create mode 100644 apps/zipsync/package.json create mode 100644 apps/zipsync/src/ZipSyncCommandLineParser.ts create mode 100644 apps/zipsync/src/__snapshots__/start.test.ts.snap create mode 100644 apps/zipsync/src/__snapshots__/zipSync.test.ts.snap create mode 100644 apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap create mode 100644 apps/zipsync/src/benchmark.test.ts create mode 100644 apps/zipsync/src/compress.ts create mode 100644 apps/zipsync/src/crc32.test.ts create mode 100644 apps/zipsync/src/crc32.ts create mode 100644 apps/zipsync/src/disposableFileHandle.ts create mode 100644 apps/zipsync/src/perf.ts create mode 100644 apps/zipsync/src/start.test.ts create mode 100644 apps/zipsync/src/start.ts create mode 100644 apps/zipsync/src/zipSync.test.ts create mode 100644 apps/zipsync/src/zipSync.ts create mode 100644 apps/zipsync/src/zipSyncWorker.ts create mode 100644 apps/zipsync/src/zipSyncWorkerAsync.test.ts create mode 100644 apps/zipsync/src/zipSyncWorkerAsync.ts create mode 100644 apps/zipsync/src/zipUtils.ts create mode 100644 apps/zipsync/tsconfig.json diff --git a/apps/zipsync/.npmignore b/apps/zipsync/.npmignore new file mode 100644 index 00000000000..bc349f9a4be --- /dev/null +++ b/apps/zipsync/.npmignore @@ -0,0 +1,32 @@ +# THIS IS A STANDARD TEMPLATE FOR .npmignore FILES IN THIS REPO. + +# Ignore all files by default, to avoid accidentally publishing unintended files. +* + +# Use negative patterns to bring back the specific things we want to publish. +!/bin/** +!/lib/** +!/lib-*/** +!/dist/** + +!CHANGELOG.md +!CHANGELOG.json +!heft-plugin.json +!rush-plugin-manifest.json +!ThirdPartyNotice.txt + +# Ignore certain patterns that should not get published. +/dist/*.stats.* +/lib/**/test/ +/lib-*/**/test/ +*.test.js + +# NOTE: These don't need to be specified, because NPM includes them automatically. +# +# package.json +# README.md +# LICENSE + +# --------------------------------------------------------------------------- +# DO NOT MODIFY ABOVE THIS LINE! Add any project-specific overrides below. +# --------------------------------------------------------------------------- diff --git a/apps/zipsync/LICENSE b/apps/zipsync/LICENSE new file mode 100644 index 00000000000..e75a1fe895f --- /dev/null +++ b/apps/zipsync/LICENSE @@ -0,0 +1,24 @@ +@rushstack/zipsync + +Copyright (c) Microsoft Corporation. All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/apps/zipsync/README.md b/apps/zipsync/README.md new file mode 100644 index 00000000000..a607dc2d67d --- /dev/null +++ b/apps/zipsync/README.md @@ -0,0 +1,22 @@ +# @rushstack/zipsync + +zipsync is a tool to pack and unpack zip archives. It is designed as a single-purpose tool to pack and unpack build cache entries. + +## Implementation + +### Unpack + +- Read the zip central directory record at the end of the zip file and enumerate zip entries +- Parse the zipsync metadata file in the archive. This contains the SHA-1 hashes of the files +- Enumerate the target directories, cleanup any files or folders that aren't in the archive +- If a file exists with matching size + SHA‑1, skip writing; else unpack it + +### Pack + +- Enumerate the target directories. +- For each file compute a SHA-1 hash for the zipsync metadata file, and the CRC32 (required by zip format), then compress it if needed. Write the headers and file contents to the zip archive. +- Write the metadata file to the zip archive and the zip central directory record. + +## Constraints + +Though archives created by zipsync can be used by other zip compatible programs, the opposite is not the case. zipsync only implements a subset of zip features to achieve greater performance. diff --git a/apps/zipsync/config/jest.config.json b/apps/zipsync/config/jest.config.json new file mode 100644 index 00000000000..f385c6fdc0f --- /dev/null +++ b/apps/zipsync/config/jest.config.json @@ -0,0 +1,4 @@ +{ + "extends": "local-node-rig/profiles/default/config/jest.config.json", + "setupFilesAfterEnv": ["/config/jestSymbolDispose.js"] +} diff --git a/apps/zipsync/config/jestSymbolDispose.js b/apps/zipsync/config/jestSymbolDispose.js new file mode 100644 index 00000000000..25328e10b8c --- /dev/null +++ b/apps/zipsync/config/jestSymbolDispose.js @@ -0,0 +1,8 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +const disposeSymbol = Symbol('Symbol.dispose'); +const asyncDisposeSymbol = Symbol('Symbol.asyncDispose'); + +Symbol.asyncDispose ??= asyncDisposeSymbol; +Symbol.dispose ??= disposeSymbol; diff --git a/apps/zipsync/config/rig.json b/apps/zipsync/config/rig.json new file mode 100644 index 00000000000..165ffb001f5 --- /dev/null +++ b/apps/zipsync/config/rig.json @@ -0,0 +1,7 @@ +{ + // The "rig.json" file directs tools to look for their config files in an external package. + // Documentation for this system: https://www.npmjs.com/package/@rushstack/rig-package + "$schema": "https://developer.microsoft.com/json-schemas/rig-package/rig.schema.json", + + "rigPackageName": "local-node-rig" +} diff --git a/apps/zipsync/eslint.config.js b/apps/zipsync/eslint.config.js new file mode 100644 index 00000000000..ceb5a1bee40 --- /dev/null +++ b/apps/zipsync/eslint.config.js @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +const nodeTrustedToolProfile = require('local-node-rig/profiles/default/includes/eslint/flat/profile/node-trusted-tool'); +const friendlyLocalsMixin = require('local-node-rig/profiles/default/includes/eslint/flat/mixins/friendly-locals'); + +module.exports = [ + ...nodeTrustedToolProfile, + ...friendlyLocalsMixin, + { + files: ['**/*.ts', '**/*.tsx'], + languageOptions: { + parserOptions: { + tsconfigRootDir: __dirname + } + }, + rules: { + 'no-console': 'off' + } + } +]; diff --git a/apps/zipsync/package.json b/apps/zipsync/package.json new file mode 100644 index 00000000000..9c9960e0857 --- /dev/null +++ b/apps/zipsync/package.json @@ -0,0 +1,34 @@ +{ + "name": "@rushstack/zipsync", + "version": "0.0.0", + "description": "CLI tool for creating and extracting ZIP archives with intelligent filesystem synchronization", + "repository": { + "type": "git", + "url": "https://github.com/microsoft/rushstack.git", + "directory": "apps/zipsync" + }, + "bin": { + "zipsync": "./bin/zipsync" + }, + "license": "MIT", + "scripts": { + "start": "node lib/start", + "build": "heft build --clean", + "_phase:build": "heft run --only build -- --clean", + "_phase:test": "heft run --only test -- --clean" + }, + "dependencies": { + "@rushstack/node-core-library": "workspace:*", + "@rushstack/terminal": "workspace:*", + "@rushstack/ts-command-line": "workspace:*", + "semver": "~7.5.4", + "typescript": "~5.8.2", + "@rushstack/lookup-by-path": "workspace:*" + }, + "devDependencies": { + "@rushstack/heft": "workspace:*", + "@types/semver": "7.5.0", + "eslint": "~9.25.1", + "local-node-rig": "workspace:*" + } +} diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts new file mode 100644 index 00000000000..66bae3de4e2 --- /dev/null +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -0,0 +1,120 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { CommandLineParser } from '@rushstack/ts-command-line/lib/providers/CommandLineParser'; +import type { + CommandLineFlagParameter, + IRequiredCommandLineStringParameter, + IRequiredCommandLineChoiceParameter, + IRequiredCommandLineStringListParameter, + CommandLineChoiceParameter +} from '@rushstack/ts-command-line/lib/index'; +import { InternalError } from '@rushstack/node-core-library/lib/InternalError'; +import { Colorize } from '@rushstack/terminal/lib/Colorize'; +import type { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; + +import { type IZipMode, zipSync } from './zipSync'; + +export class ZipSyncCommandLineParser extends CommandLineParser { + private readonly _debugParameter: CommandLineFlagParameter; + private readonly _verboseParameter: CommandLineFlagParameter; + private readonly _modeParameter: IRequiredCommandLineChoiceParameter; + private readonly _archivePathParameter: IRequiredCommandLineStringParameter; + private readonly _baseDirParameter: IRequiredCommandLineStringParameter; + private readonly _targetDirectoriesParameter: IRequiredCommandLineStringListParameter; + private readonly _compressionParameter: CommandLineChoiceParameter<'store' | 'deflate' | 'auto'>; + private readonly _terminal: ITerminal; + private readonly _terminalProvider: ConsoleTerminalProvider; + + public constructor(terminalProvider: ConsoleTerminalProvider, terminal: ITerminal) { + super({ + toolFilename: 'zipsync', + toolDescription: '' + }); + + this._terminal = terminal; + this._terminalProvider = terminalProvider; + + this._debugParameter = this.defineFlagParameter({ + parameterLongName: '--debug', + parameterShortName: '-d', + description: 'Show the full call stack if an error occurs while executing the tool' + }); + + this._verboseParameter = this.defineFlagParameter({ + parameterLongName: '--verbose', + parameterShortName: '-v', + description: 'Show verbose output' + }); + + this._modeParameter = this.defineChoiceParameter({ + parameterLongName: '--mode', + parameterShortName: '-m', + description: + 'The mode of operation: "pack" to create a zip archive, or "unpack" to extract files from a zip archive', + alternatives: ['pack', 'unpack'], + required: true + }); + + this._archivePathParameter = this.defineStringParameter({ + parameterLongName: '--archive-path', + parameterShortName: '-a', + description: 'Zip file path', + argumentName: 'ARCHIVE_PATH', + required: true + }); + + this._targetDirectoriesParameter = this.defineStringListParameter({ + parameterLongName: '--target-directory', + parameterShortName: '-t', + description: 'Target directories to pack or unpack', + argumentName: 'TARGET_DIRECTORIES', + required: true + }); + + this._baseDirParameter = this.defineStringParameter({ + parameterLongName: '--base-dir', + parameterShortName: '-b', + description: 'Base directory for relative paths within the archive', + argumentName: 'BASE_DIR', + required: true + }); + + this._compressionParameter = this.defineChoiceParameter<'store' | 'deflate' | 'auto'>({ + parameterLongName: '--compression', + parameterShortName: '-z', + description: + 'Compression strategy when packing. "deflate" attempts DEFLATE for every file (keeps only if smaller); "auto" first skips likely-compressed types before attempting; "store" disables compression.', + alternatives: ['store', 'deflate', 'auto'], + required: true + }); + } + + protected override async onExecuteAsync(): Promise { + if (this._debugParameter.value) { + InternalError.breakInDebugger = true; + this._terminalProvider.debugEnabled = true; + this._terminalProvider.verboseEnabled = true; + } + if (this._verboseParameter.value) { + this._terminalProvider.verboseEnabled = true; + } + try { + zipSync({ + terminal: this._terminal, + mode: this._modeParameter.value, + archivePath: this._archivePathParameter.value, + targetDirectories: this._targetDirectoriesParameter.values, + baseDir: this._baseDirParameter.value, + compression: (this._compressionParameter.value as 'store' | 'deflate' | 'auto' | undefined) ?? 'auto' + }); + } catch (error) { + if (this._debugParameter.value) { + console.error('\n' + error.stack); + } else { + console.error('\n' + Colorize.red('ERROR: ' + error.message.trim())); + } + } + } +} diff --git a/apps/zipsync/src/__snapshots__/start.test.ts.snap b/apps/zipsync/src/__snapshots__/start.test.ts.snap new file mode 100644 index 00000000000..2c8b0be1c63 --- /dev/null +++ b/apps/zipsync/src/__snapshots__/start.test.ts.snap @@ -0,0 +1,33 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`CLI Tool Tests should display help for "zipsync --help" 1`] = ` +" +zipsync 0.0.0 - https://rushstack.io + +usage: zipsync [-h] [-d] [-v] -m {pack,unpack} -a ARCHIVE_PATH -t + TARGET_DIRECTORIES -b BASE_DIR -z {store,deflate,auto} + + +Optional arguments: + -h, --help Show this help message and exit. + -d, --debug Show the full call stack if an error occurs while + executing the tool + -v, --verbose Show verbose output + -m {pack,unpack}, --mode {pack,unpack} + The mode of operation: \\"pack\\" to create a zip archive, + or \\"unpack\\" to extract files from a zip archive + -a ARCHIVE_PATH, --archive-path ARCHIVE_PATH + Zip file path + -t TARGET_DIRECTORIES, --target-directory TARGET_DIRECTORIES + Target directories to pack or unpack + -b BASE_DIR, --base-dir BASE_DIR + Base directory for relative paths within the archive + -z {store,deflate,auto}, --compression {store,deflate,auto} + Compression strategy when packing. \\"deflate\\" attempts + DEFLATE for every file (keeps only if smaller); + \\"auto\\" first skips likely-compressed types before + attempting; \\"store\\" disables compression. + +For detailed help about a specific command, use: zipsync -h +" +`; diff --git a/apps/zipsync/src/__snapshots__/zipSync.test.ts.snap b/apps/zipsync/src/__snapshots__/zipSync.test.ts.snap new file mode 100644 index 00000000000..bbf73a8eaeb --- /dev/null +++ b/apps/zipsync/src/__snapshots__/zipSync.test.ts.snap @@ -0,0 +1,187 @@ +// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing + +exports[`zipSync tests basic pack test 1`] = ` +Object { + "filesPacked": 21, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, +} +`; + +exports[`zipSync tests basic pack test 2`] = ` +Object { + "filesDeleted": 0, + "filesExtracted": 20, + "filesSkipped": 0, + "foldersDeleted": 0, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, + "otherEntriesDeleted": 0, +} +`; diff --git a/apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap b/apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap new file mode 100644 index 00000000000..8b94af245d0 --- /dev/null +++ b/apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap @@ -0,0 +1,187 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`zipSyncWorkerAsync tests basic pack test 1`] = ` +Object { + "filesPacked": 21, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, +} +`; + +exports[`zipSyncWorkerAsync tests basic pack test 2`] = ` +Object { + "filesDeleted": 0, + "filesExtracted": 20, + "filesSkipped": 0, + "foldersDeleted": 0, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, + "otherEntriesDeleted": 0, +} +`; diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts new file mode 100644 index 00000000000..c1123687d1b --- /dev/null +++ b/apps/zipsync/src/benchmark.test.ts @@ -0,0 +1,495 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { execSync } from 'child_process'; +import { tmpdir } from 'os'; +import * as path from 'path'; +import * as fs from 'fs'; +import { createHash, randomUUID } from 'crypto'; +import { zipSync } from './zipSync'; +import { NoOpTerminalProvider, Terminal } from '@rushstack/terminal'; + +// create a tempdir and setup dummy files there for benchmarking +let tempDir: string; +const runId = randomUUID(); +async function setupDemoDataAsync(): Promise { + console.log('Setting up demo data for benchmark...'); + tempDir = path.join(tmpdir(), `zipsync-benchmark-${runId}`); + fs.mkdirSync(tempDir, { recursive: true }); + + const demoSubDir1 = path.join(tempDir, 'demo-data', 'subdir1'); + fs.mkdirSync(demoSubDir1, { recursive: true }); + const demoSubDir2 = path.join(tempDir, 'demo-data', 'subdir2'); + fs.mkdirSync(demoSubDir2, { recursive: true }); + + for (let i = 0; i < 1000; i++) { + const filePath1 = path.join(demoSubDir1, `file${i}.txt`); + fs.writeFileSync(filePath1, `This is file ${i} in subdir1\n`.repeat(1000), { encoding: 'utf-8' }); + const filePath2 = path.join(demoSubDir2, `file${i}.txt`); + fs.writeFileSync(filePath2, `This is file ${i} in subdir2\n`.repeat(1000), { encoding: 'utf-8' }); + } + + console.log(`Demo data setup complete in ${tempDir}`); +} + +async function cleanupDemoDataAsync(): Promise { + if (tempDir && fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + console.log(`Cleaned up temp directory: ${tempDir}`); + } +} + +beforeAll(async () => { + await setupDemoDataAsync(); +}); + +afterAll(async () => { + await cleanupDemoDataAsync(); +}); + +// Collect timings for table output after all tests +interface IMeasurement { + name: string; + kind: string; + phase: 'pack' | 'unpack'; + ms: number; + // Only for pack phase: archive size in bytes and compression ratio (archiveSize / uncompressedSourceSize) + sizeBytes?: number; +} +const measurements: IMeasurement[] = []; +// Allow specifying iterations via env BENCH_ITERATIONS or command arg --iterations N (jest passes args; we scan process.argv) +function detectIterations(): number { + let iter = 1; + const envParsed: number = parseInt(process.env.BENCH_ITERATIONS || '', 10); + if (!isNaN(envParsed) && envParsed > 0) { + iter = envParsed; + } + return iter || 5; +} +const ITERATIONS: number = detectIterations(); + +function measureFn(callback: () => void): number { + const start: number = performance.now(); + callback(); + return performance.now() - start; +} + +interface IBenchContext { + archive: string; + demoDir: string; // source demo data directory + unpackDir: string; +} + +interface IBenchCommands { + // Function that performs the packing. Receives archive path and demoDir. + pack: (ctx: IBenchContext) => void; + // Function that performs the unpack. Receives archive and unpackDir. + unpack: (ctx: IBenchContext) => void; + archive: string; + unpackDir: string; + populateUnpackDir?: 'full' | 'partial'; + cleanBeforeUnpack?: boolean; +} + +function bench(kind: string, commands: IBenchCommands): void { + const demoDataPath = path.join(tempDir, 'demo-data'); + const srcDir = demoDataPath; + // Compute total uncompressed source size once per bench invocation + // We intentionally no longer compute total source size for ratio; only archive size is tracked. + function verifyUnpack(unpackDir: string): void { + // Compare file listings and hashes + function buildMap(root: string): Map { + const map = new Map(); + function walk(current: string): void { + for (const entry of fs.readdirSync(current, { withFileTypes: true })) { + const full = path.join(current, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.isFile()) { + const rel = path.relative(root, full).replace(/\\/g, '/'); + const buf = fs.readFileSync(full); + const hash = createHash('sha256').update(buf).digest('hex'); + map.set(rel, { size: buf.length, hash }); + } + } + } + walk(root); + return map; + } + const srcMap = buildMap(srcDir); + const dstMap = buildMap(unpackDir); + if (srcMap.size !== dstMap.size) { + throw new Error( + `Verification failed (${kind}): file count mismatch src=${srcMap.size} dst=${dstMap.size}` + ); + } + for (const [rel, meta] of srcMap) { + const other = dstMap.get(rel); + if (!other) throw new Error(`Verification failed (${kind}): missing file ${rel}`); + if (other.size !== meta.size || other.hash !== meta.hash) { + throw new Error(`Verification failed (${kind}): content mismatch in ${rel}`); + } + } + } + for (let i = 0; i < ITERATIONS; i++) { + // Ensure previous artifacts removed + if (fs.existsSync(commands.archive)) fs.rmSync(commands.archive, { force: true }); + if (fs.existsSync(commands.unpackDir)) fs.rmSync(commands.unpackDir, { recursive: true, force: true }); + fs.mkdirSync(commands.unpackDir, { recursive: true }); + if (commands.populateUnpackDir === 'full') { + fs.cpSync(srcDir, commands.unpackDir, { recursive: true }); + } else if (commands.populateUnpackDir === 'partial') { + // Copy half the files + for (let j = 0; j < 500; j++) { + const file1 = path.join(srcDir, 'subdir1', `file${j}.txt`); + const file2 = path.join(srcDir, 'subdir2', `file${j}.txt`); + const dest1 = path.join(commands.unpackDir, 'subdir1', `file${j}.txt`); + const dest2 = path.join(commands.unpackDir, 'subdir2', `file${j}.txt`); + fs.mkdirSync(path.dirname(dest1), { recursive: true }); + fs.mkdirSync(path.dirname(dest2), { recursive: true }); + fs.copyFileSync(file1, dest1); + fs.copyFileSync(file2, dest2); + } + } + + let archiveSize: number | undefined; + const packMs: number = measureFn(() => { + commands.pack({ archive: commands.archive, demoDir: demoDataPath, unpackDir: commands.unpackDir }); + try { + const stat = fs.statSync(commands.archive); + archiveSize = stat.size; + } catch { + // ignore if archive not found + } + }); + measurements.push({ + name: `${kind}#${i + 1}`, + kind, + phase: 'pack', + ms: packMs, + sizeBytes: archiveSize + }); + + const unpackMs: number = measureFn(() => { + if (commands.cleanBeforeUnpack) { + fs.rmSync(commands.unpackDir, { recursive: true, force: true }); + fs.mkdirSync(commands.unpackDir, { recursive: true }); + } + commands.unpack({ archive: commands.archive, demoDir: demoDataPath, unpackDir: commands.unpackDir }); + }); + measurements.push({ name: `${kind}#${i + 1}`, kind, phase: 'unpack', ms: unpackMs }); + verifyUnpack(commands.unpackDir); + } +} + +function benchZipSyncScenario( + kind: string, + compression: 'store' | 'deflate' | 'auto', + existingFiles: 'all' | 'none' | 'partial' +): void { + if (!tempDir) throw new Error('Temp directory is not set up.'); + const terminal = new Terminal(new NoOpTerminalProvider()); + bench(kind, { + pack: ({ archive, demoDir }) => { + const { filesPacked } = zipSync({ + mode: 'pack', + archivePath: archive, + targetDirectories: ['subdir1', 'subdir2'], + baseDir: demoDir, + compression, + terminal + }); + console.log(`Files packed: ${filesPacked}`); + }, + unpack: ({ archive, unpackDir }) => { + const { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } = zipSync({ + mode: 'unpack', + archivePath: archive, + targetDirectories: ['subdir1', 'subdir2'], + baseDir: unpackDir, + compression, + terminal + }); + console.log( + `Files extracted: ${filesExtracted}, files skipped: ${filesSkipped}, files deleted: ${filesDeleted}, folders deleted: ${foldersDeleted}, other entries deleted: ${otherEntriesDeleted}` + ); + }, + archive: path.join(tempDir, `archive-zipsync-${compression}.zip`), + unpackDir: path.join(tempDir, `unpacked-zipsync-${compression}-${existingFiles}`), + populateUnpackDir: existingFiles === 'all' ? 'full' : existingFiles === 'partial' ? 'partial' : undefined, + cleanBeforeUnpack: false + }); +} + +describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { + it('tar', () => { + if (!isTarAvailable()) { + console.log('Skipping tar test because tar is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('tar', { + pack: ({ archive, demoDir }) => execSync(`tar -cf "${archive}" -C "${demoDir}" .`), + unpack: ({ archive, unpackDir }) => execSync(`tar -xf "${archive}" -C "${unpackDir}"`), + archive: path.join(tempDir, 'archive.tar'), + unpackDir: path.join(tempDir, 'unpacked-tar'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('tar.gz', () => { + if (!isTarAvailable()) { + console.log('Skipping tar test because tar is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('tar.gz', { + pack: ({ archive, demoDir }) => execSync(`tar -czf "${archive}" -C "${demoDir}" .`), + unpack: ({ archive, unpackDir }) => execSync(`tar -xzf "${archive}" -C "${unpackDir}"`), + archive: path.join(tempDir, 'archive.tar.gz'), + unpackDir: path.join(tempDir, 'unpacked-tar-gz'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('zip-store', () => { + if (!isZipAvailable()) { + console.log('Skipping zip test because zip is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('zip-store', { + pack: ({ archive, demoDir }) => execSync(`zip -r -Z store "${archive}" .`, { cwd: demoDir }), + unpack: ({ archive, unpackDir }) => execSync(`unzip "${archive}" -d "${unpackDir}"`), + archive: path.join(tempDir, 'archive.zip'), + unpackDir: path.join(tempDir, 'unpacked-zip'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('zip-deflate', () => { + if (!isZipAvailable()) { + console.log('Skipping zip test because zip is not available'); + return; + } + if (!tempDir) throw new Error('Temp directory is not set up.'); + bench('zip-deflate', { + pack: ({ archive, demoDir }) => execSync(`zip -r -Z deflate -9 "${archive}" .`, { cwd: demoDir }), + unpack: ({ archive, unpackDir }) => execSync(`unzip "${archive}" -d "${unpackDir}"`), + archive: path.join(tempDir, 'archive-deflate.zip'), + unpackDir: path.join(tempDir, 'unpacked-zip-deflate'), + populateUnpackDir: 'full', + cleanBeforeUnpack: true + }); + }); + it('zipsync-store-all-existing', () => { + benchZipSyncScenario('zipsync-store-all-existing', 'store', 'all'); + }); + it('zipsync-store-none-existing', () => { + benchZipSyncScenario('zipsync-store-none-existing', 'store', 'none'); + }); + it('zipsync-store-partial-existing', () => { + benchZipSyncScenario('zipsync-store-partial-existing', 'store', 'partial'); + }); + it('zipsync-deflate-all-existing', () => { + benchZipSyncScenario('zipsync-deflate-all-existing', 'deflate', 'all'); + }); + it('zipsync-deflate-none-existing', () => { + benchZipSyncScenario('zipsync-deflate-none-existing', 'deflate', 'none'); + }); + it('zipsync-deflate-partial-existing', () => { + benchZipSyncScenario('zipsync-deflate-partial-existing', 'deflate', 'partial'); + }); + it('zipsync-auto-all-existing', () => { + benchZipSyncScenario('zipsync-auto-all-existing', 'auto', 'all'); + }); + it('zipsync-auto-none-existing', () => { + benchZipSyncScenario('zipsync-auto-none-existing', 'auto', 'none'); + }); + it('zipsync-auto-partial-existing', () => { + benchZipSyncScenario('zipsync-auto-partial-existing', 'auto', 'partial'); + }); +}); + +afterAll(() => { + if (!measurements.length) return; + interface IStats { + kind: string; + phase: string; + n: number; + min: number; + max: number; + mean: number; + p95: number; + std: number; + sizeMean?: number; // only for pack + } + const groups: Map = new Map(); + for (const m of measurements) { + const key: string = `${m.kind}|${m.phase}`; + let bucket = groups.get(key); + if (!bucket) { + bucket = { times: [], sizes: [] }; + groups.set(key, bucket); + } + bucket.times.push(m.ms); + if (typeof m.sizeBytes === 'number') bucket.sizes.push(m.sizeBytes); + } + const stats: IStats[] = []; + function percentile(sorted: number[], p: number): number { + if (!sorted.length) return 0; + const idx: number = Math.min(sorted.length - 1, Math.ceil((p / 100) * sorted.length) - 1); + return sorted[idx]; + } + for (const [key, bucket] of groups) { + const [kind, phase] = key.split('|'); + bucket.times.sort((a, b) => a - b); + const arr = bucket.times; + const n = arr.length; + const min = arr[0]; + const max = arr[n - 1]; + const sum = arr.reduce((a, b) => a + b, 0); + const mean = sum / n; + const variance = arr.reduce((a, b) => a + (b - mean) * (b - mean), 0) / n; + const std = Math.sqrt(variance); + const p95 = percentile(arr, 95); + const sizeMean = bucket.sizes.length + ? bucket.sizes.reduce((a, b) => a + b, 0) / bucket.sizes.length + : undefined; + stats.push({ kind, phase, n, min, max, mean, std, p95, sizeMean }); + } + // Organize into groups + const groupsDef: Array<{ title: string; baseline: string; members: string[] }> = [ + { + title: 'Uncompressed (baseline: tar)', + baseline: 'tar', + members: [ + 'tar', + 'zip-store', + 'zipsync-store-all-existing', + 'zipsync-store-none-existing', + 'zipsync-store-partial-existing' + ] + }, + { + title: 'Compressed (baseline: tar.gz)', + baseline: 'tar.gz', + members: [ + 'tar.gz', + 'zip-deflate', + 'zipsync-deflate-all-existing', + 'zipsync-deflate-none-existing', + 'zipsync-deflate-partial-existing', + 'zipsync-auto-all-existing', + 'zipsync-auto-none-existing', + 'zipsync-auto-partial-existing' + ] + } + ]; + interface ITableRow { + group: string; + isBaseline: boolean; + s: IStats; + deltaMeanPct: number; + } + const tableRows: ITableRow[] = []; + for (const g of groupsDef) { + const baselinePack: IStats | undefined = stats.find((s) => s.kind === g.baseline && s.phase === 'pack'); + const baselineUnpack: IStats | undefined = stats.find( + (s) => s.kind === g.baseline && s.phase === 'unpack' + ); + for (const member of g.members) { + for (const phase of ['pack', 'unpack'] as const) { + const s = stats.find((st) => st.kind === member && st.phase === phase); + if (!s) continue; + const baseline = phase === 'pack' ? baselinePack : baselineUnpack; + const deltaMeanPct = baseline ? ((s.mean - baseline.mean) / baseline.mean) * 100 : 0; + tableRows.push({ group: g.title, isBaseline: member === g.baseline, s, deltaMeanPct }); + } + } + } + + function buildTable(rowsData: ITableRow[], phaseFilter: 'pack' | 'unpack'): string[] { + const headers = + phaseFilter === 'pack' + ? [ + 'Group', + 'Archive', + 'iter', + 'min(ms)', + 'mean(ms)', + 'Δmean%', + 'p95(ms)', + 'max(ms)', + 'std(ms)', + 'size(bytes)' + ] + : ['Group', 'Archive', 'iter', 'min(ms)', 'mean(ms)', 'Δmean%', 'p95(ms)', 'max(ms)', 'std(ms)']; + const rows: string[][] = [headers]; + for (const row of rowsData.filter((r) => r.s.phase === phaseFilter)) { + const baseCols = [ + row.isBaseline ? row.group : '', + row.s.kind + (row.isBaseline ? '*' : ''), + String(row.s.n), + row.s.min.toFixed(2), + row.s.mean.toFixed(2), + (row.deltaMeanPct >= 0 ? '+' : '') + row.deltaMeanPct.toFixed(1), + row.s.p95.toFixed(2), + row.s.max.toFixed(2), + row.s.std.toFixed(2) + ]; + if (phaseFilter === 'pack') { + baseCols.push(row.s.sizeMean !== undefined ? Math.round(row.s.sizeMean).toString() : ''); + } + rows.push(baseCols); + } + const colWidths: number[] = headers.map((header, i) => + rows.reduce((w, r) => Math.max(w, r[i].length), 0) + ); + return rows.map((r) => r.map((c, i) => c.padStart(colWidths[i], ' ')).join(' ')); + } + const packTable: string[] = buildTable(tableRows, 'pack'); + const unpackTable: string[] = buildTable(tableRows, 'unpack'); + const outputLines: string[] = []; + outputLines.push('\nBenchmark Results (iterations=' + ITERATIONS + '):'); + outputLines.push('PACK PHASE:'); + outputLines.push(packTable[0]); + outputLines.push('-'.repeat(packTable[0].length)); + for (let i = 1; i < packTable.length; i++) outputLines.push(packTable[i]); + outputLines.push('* baseline (pack)'); + outputLines.push(''); + outputLines.push('UNPACK PHASE:'); + outputLines.push(unpackTable[0]); + outputLines.push('-'.repeat(unpackTable[0].length)); + for (let i = 1; i < unpackTable.length; i++) outputLines.push(unpackTable[i]); + outputLines.push('* baseline (unpack)'); + const resultText = outputLines.join('\n'); + console.log(resultText); + try { + const resultFile = path.join(__dirname, '..', 'temp', `benchmark-results-${runId}.txt`); + fs.writeFileSync(resultFile, resultText, { encoding: 'utf-8' }); + console.log(`Benchmark results written to: ${resultFile}`); + } catch (e) { + console.warn('Failed to write benchmark results file:', (e as Error).message); + } +}); +function isZipAvailable(): boolean { + try { + const checkZip = process.platform === 'win32' ? 'where zip' : 'command -v zip'; + const checkUnzip = process.platform === 'win32' ? 'where unzip' : 'command -v unzip'; + execSync(checkZip, { stdio: 'ignore' }); + execSync(checkUnzip, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} +function isTarAvailable(): boolean { + try { + const checkTar = process.platform === 'win32' ? 'where tar' : 'command -v tar'; + execSync(checkTar, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} diff --git a/apps/zipsync/src/compress.ts b/apps/zipsync/src/compress.ts new file mode 100644 index 00000000000..0a11f129020 --- /dev/null +++ b/apps/zipsync/src/compress.ts @@ -0,0 +1,189 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { Transform } from 'node:stream'; +import zlib from 'node:zlib'; +import { DISPOSE_SYMBOL } from './disposableFileHandle'; + +type OutputChunkHandler = (chunk: Uint8Array, lengthBytes: number) => void; + +const kError: unique symbol = (() => { + // Create an instance of Deflate so that we can get our hands on the internal error symbol + // It isn't exported. + const reference: zlib.Deflate = zlib.createDeflateRaw(); + const kErrorResult: symbol | undefined = Object.getOwnPropertySymbols(reference).find((x) => + x.toString().includes('kError') + ); + if (kErrorResult === undefined) { + throw new Error('Unable to find the internal error symbol in node:zlib'); + } + reference.close(); + return kErrorResult; + // Casting `symbol` to the exact symbol of this definition +})() as typeof kError; + +/** + * Internal members of all Zlib compressors. + * Needed to + */ +interface IZlibInternals { + /** + * The native binding to Zlib. + */ + _handle: IHandle | undefined; + /** + * The flush flag passed to each call other than the last one for this implementation. + * Varies by compressor. + */ + _defaultFlushFlag: number; + /** + * The flush flag passed to the final call for this implementation. + * Varies by compressor. + */ + _finishFlushFlag: number; + /** + * The number of bytes read from the input and written to the output. + */ + _writeState: [number, number]; + /** + * The internal error state + */ + [kError]: Error | undefined; +} + +type Compressor = Transform & IZlibInternals; + +interface IHandle { + /** + * Closes the handle and releases resources. + * Ensure that this is always invoked. + */ + close(): void; + /** + * Compresses up to `inLen` bytes from `chunk` starting at `inOff`. + * Writes up to `outLen` bytes to `output` starting at `outOff`. + * @param flushFlag - The flush flag to the compressor implementation. Defines the behavior when reaching the end of the input. + * @param chunk - The buffer containing the data to be compressed + * @param inOff - The offset in bytes to start reading from `chunk` + * @param inLen - The maximum number of bytes to read from `chunk` + * @param output - The buffer to write the compressed data to + * @param outOff - The offset in bytes to start writing to `output` at + * @param outLen - The maximum number of bytes to write to `output`. + */ + writeSync( + flushFlag: number, + chunk: Uint8Array, + inOff: number, + inLen: number, + output: Uint8Array, + outOff: number, + outLen: number + ): void; +} + +export type IIncrementalZlib = Disposable & { + update: (inputBuffer: Uint8Array) => void; +}; + +export function createIncrementalZlib( + outputBuffer: Uint8Array, + handleOutputChunk: OutputChunkHandler, + mode: 'deflate' | 'inflate' +): IIncrementalZlib { + // The zlib constructors all allocate a buffer of size chunkSize using Buffer.allocUnsafe + // We want to ensure that that invocation doesn't allocate a buffer. + // Unfortunately the minimum value of `chunkSize` to the constructor is non-zero + + let compressor: Compressor | undefined; + + const savedAllocUnsafe: typeof Buffer.allocUnsafe = Buffer.allocUnsafe; + + try { + //@ts-expect-error + Buffer.allocUnsafe = () => outputBuffer; + if (mode === 'inflate') { + compressor = zlib.createInflateRaw({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + } else { + compressor = zlib.createDeflateRaw({ + chunkSize: outputBuffer.byteLength, + level: zlib.constants.Z_BEST_COMPRESSION + }) as unknown as Transform & IZlibInternals; + } + } finally { + Buffer.allocUnsafe = savedAllocUnsafe; + } + + if (!compressor) { + throw new Error('Failed to create zlib instance'); + } + + const handle: IHandle = compressor._handle!; + + return { + [DISPOSE_SYMBOL]: () => { + if (compressor._handle) { + compressor._handle.close(); + compressor._handle = undefined; + } + }, + update: function processInputChunk(inputBuffer: Uint8Array): void { + let error: Error | undefined; + + // Directive to the compressor on reaching the end of the current input buffer + // Default value is to expect more data + let flushFlag: number = compressor._defaultFlushFlag; + + let bytesInInputBuffer: number = inputBuffer.byteLength; + + if (bytesInInputBuffer <= 0) { + // Ensure the value is non-negative + // We will call the compressor one last time with 0 bytes of input + bytesInInputBuffer = 0; + // Tell the compressor to flush anything in its internal buffer and write any needed trailer. + flushFlag = compressor._finishFlushFlag; + } + + let availInBefore: number = bytesInInputBuffer; + let inOff: number = 0; + let availOutAfter: number = 0; + let availInAfter: number | undefined; + + const state: [number, number] = compressor._writeState; + + do { + handle.writeSync( + flushFlag, + inputBuffer, // in + inOff, // in_off + availInBefore, // in_len + outputBuffer, // out + 0, // out_off + outputBuffer.byteLength // out_len + ); + + if (error) { + throw error; + } else if (compressor[kError]) { + throw compressor[kError]; + } + + availOutAfter = state[0]; + availInAfter = state[1]; + + const inDelta: number = availInBefore - availInAfter; + + const have: number = outputBuffer.byteLength - availOutAfter; + if (have > 0) { + handleOutputChunk(outputBuffer, have); + } + + // These values get reset if we have new data, + // so we can update them even if we're done + inOff += inDelta; + availInBefore = availInAfter; + } while (availOutAfter === 0); + } + }; +} diff --git a/apps/zipsync/src/crc32.test.ts b/apps/zipsync/src/crc32.test.ts new file mode 100644 index 00000000000..60a4e76019f --- /dev/null +++ b/apps/zipsync/src/crc32.test.ts @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { fallbackCrc32 } from './crc32'; +import * as zlib from 'zlib'; + +describe('crc32', () => { + it('fallbackCrc32 should match zlib.crc32', () => { + if (!zlib.crc32) { + console.log('Skipping test because zlib.crc32 is not available in this Node.js version'); + return; + } + + const testData = [ + Buffer.from('hello world', 'utf-8'), + Buffer.alloc(0), // empty buffer + Buffer.from('hello crc32', 'utf-8'), + Buffer.from([-1, 2, 3, 4, 5, 255, 0, 128]) + ]; + + let fallbackCrc: number = 0; + let zlibCrc: number = 0; + + for (const data of testData) { + fallbackCrc = fallbackCrc32(data, fallbackCrc); + zlibCrc = zlib.crc32(data, zlibCrc); + } + + fallbackCrc = fallbackCrc >>> 0; + zlibCrc = zlibCrc >>> 0; + + expect(fallbackCrc).toBe(zlibCrc); + }); +}); diff --git a/apps/zipsync/src/crc32.ts b/apps/zipsync/src/crc32.ts new file mode 100644 index 00000000000..bc01a1171d1 --- /dev/null +++ b/apps/zipsync/src/crc32.ts @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as zlib from 'zlib'; + +let crcTable: Uint32Array | undefined; + +function initCrcTable(): Uint32Array { + if (crcTable) { + return crcTable; + } + + crcTable = new Uint32Array(256); + for (let i: number = 0; i < 256; i++) { + let crcEntry: number = i; + for (let j: number = 0; j < 8; j++) { + // eslint-disable-next-line no-bitwise + crcEntry = crcEntry & 1 ? 0xedb88320 ^ (crcEntry >>> 1) : crcEntry >>> 1; + } + crcTable[i] = crcEntry; + } + return crcTable; +} + +export function fallbackCrc32(data: Buffer, value: number = 0): number { + const table: Uint32Array = initCrcTable(); + value = (value ^ 0xffffffff) >>> 0; + + for (let i: number = 0; i < data.length; i++) { + // eslint-disable-next-line no-bitwise + value = table[(value ^ data[i]) & 0xff] ^ (value >>> 8); + } + + value = (value ^ 0xffffffff) >>> 0; + return value; +} + +export function crc32Builder(data: Buffer, value: number = 0): number { + if (zlib.crc32) { + return zlib.crc32(data, value); + } else { + // Fallback implementation for Node.js versions older than 20 + return fallbackCrc32(data, value); + } +} diff --git a/apps/zipsync/src/disposableFileHandle.ts b/apps/zipsync/src/disposableFileHandle.ts new file mode 100644 index 00000000000..d86f6286c47 --- /dev/null +++ b/apps/zipsync/src/disposableFileHandle.ts @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { default as fs, type OpenMode } from 'node:fs'; + +interface IInternalDisposableFileHandle extends Disposable { + fd: number; +} + +export interface IDisposableFileHandle extends IInternalDisposableFileHandle { + readonly fd: number; +} + +export const DISPOSE_SYMBOL: typeof Symbol.dispose = Symbol.dispose ?? Symbol.for('Symbol.dispose'); + +export function getDisposableFileHandle(path: string, openMode: OpenMode): IDisposableFileHandle { + const result: IInternalDisposableFileHandle = { + fd: fs.openSync(path, openMode), + [DISPOSE_SYMBOL]: () => { + if (!isNaN(result.fd)) { + fs.closeSync(result.fd); + result.fd = NaN; + } + } + }; + + return result; +} diff --git a/apps/zipsync/src/perf.ts b/apps/zipsync/src/perf.ts new file mode 100644 index 00000000000..7c3a20f34b2 --- /dev/null +++ b/apps/zipsync/src/perf.ts @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { PerformanceEntry } from 'node:perf_hooks'; +import { performance } from 'node:perf_hooks'; +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; + +export function markStart(name: string): void { + performance.mark(`zipsync:${name}:start`); +} +export function markEnd(name: string): void { + const base: string = `zipsync:${name}`; + performance.mark(`${base}:end`); + performance.measure(base, `${base}:start`, `${base}:end`); +} +export function getDuration(name: string): number { + const measures: PerformanceEntry[] = performance.getEntriesByName( + `zipsync:${name}` + ) as unknown as PerformanceEntry[]; + if (measures.length === 0) return 0; + return measures[measures.length - 1].duration; +} +export function formatDuration(ms: number): string { + return ms >= 1000 ? (ms / 1000).toFixed(2) + 's' : ms.toFixed(2) + 'ms'; +} +export function emitSummary(operation: 'pack' | 'unpack', term: ITerminal): void { + const totalName: string = `${operation}.total`; + // Ensure total is measured + markEnd(totalName); + const totalDuration: number = getDuration(totalName); + const prefix: string = `zipsync:${operation}.`; + const measures: PerformanceEntry[] = performance.getEntriesByType( + 'measure' + ) as unknown as PerformanceEntry[]; + const rows: Array<{ name: string; dur: number }> = []; + for (const m of measures) { + if (!m.name.startsWith(prefix)) continue; + if (m.name === `zipsync:${totalName}`) continue; + // Extract segment name (remove prefix) + const segment: string = m.name.substring(prefix.length); + rows.push({ name: segment, dur: m.duration }); + } + rows.sort((a, b) => b.dur - a.dur); + const lines: string[] = rows.map((r) => { + const pct: number = totalDuration ? (r.dur / totalDuration) * 100 : 0; + return ` ${r.name}: ${formatDuration(r.dur)} (${pct.toFixed(1)}%)`; + }); + lines.push(` TOTAL ${operation}.total: ${formatDuration(totalDuration)}`); + term.writeVerboseLine(`Performance summary (${operation}):\n` + lines.join('\n')); + // Cleanup marks/measures to avoid unbounded growth + performance.clearMarks(); + performance.clearMeasures(); +} diff --git a/apps/zipsync/src/start.test.ts b/apps/zipsync/src/start.test.ts new file mode 100644 index 00000000000..ce4cb7fba4d --- /dev/null +++ b/apps/zipsync/src/start.test.ts @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { execSync } from 'child_process'; + +describe('CLI Tool Tests', () => { + it('should display help for "zipsync --help"', () => { + const startOutput = execSync('node lib/start.js --help').toString(); + expect(startOutput).toMatchSnapshot(); + }); +}); diff --git a/apps/zipsync/src/start.ts b/apps/zipsync/src/start.ts new file mode 100644 index 00000000000..33920ce2d7b --- /dev/null +++ b/apps/zipsync/src/start.ts @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { version } from '../package.json'; +import { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; +import { Terminal } from '@rushstack/terminal/lib/Terminal'; + +import { ZipSyncCommandLineParser } from './ZipSyncCommandLineParser'; + +const toolVersion: string = version; + +const consoleTerminalProvider: ConsoleTerminalProvider = new ConsoleTerminalProvider(); +const terminal: Terminal = new Terminal(consoleTerminalProvider); + +terminal.writeLine(); +terminal.writeLine(`zipsync ${toolVersion} - https://rushstack.io`); +terminal.writeLine(); + +const commandLine: ZipSyncCommandLineParser = new ZipSyncCommandLineParser(consoleTerminalProvider, terminal); +commandLine.executeAsync().catch((error) => { + terminal.writeError(error); +}); diff --git a/apps/zipsync/src/zipSync.test.ts b/apps/zipsync/src/zipSync.test.ts new file mode 100644 index 00000000000..c8f5bc969e2 --- /dev/null +++ b/apps/zipsync/src/zipSync.test.ts @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { tmpdir } from 'os'; +import * as path from 'path'; +import * as fs from 'fs'; +import * as crypto from 'crypto'; +import { zipSync } from './zipSync'; +import { NoOpTerminalProvider } from '@rushstack/terminal/lib/NoOpTerminalProvider'; +import { Terminal } from '@rushstack/terminal/lib/Terminal'; + +function getTempDir(): string { + const randomId = crypto.randomUUID(); + const tempDir = path.join(tmpdir(), `zipsync-test-${randomId}`); + fs.mkdirSync(tempDir); + return tempDir; +} + +function getDemoDataDirectoryDisposable(): { + targetDirectories: string[]; + baseDir: string; + [Symbol.dispose](): void; +} { + const baseDir: string = getTempDir(); + + const targetDirectories = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( + (folderName) => { + const dataDir: string = path.join(baseDir, folderName); + fs.mkdirSync(dataDir, { recursive: true }); + const subdir: string = path.join(dataDir, 'subdir'); + fs.mkdirSync(subdir); + for (let i: number = 0; i < 5; ++i) { + const filePath: string = path.join(subdir, `file-${i}.txt`); + fs.writeFileSync(filePath, `This is file ${i} in ${folderName}/subdir\n`, { encoding: 'utf-8' }); + } + return folderName; + } + ); + + return { + targetDirectories, + baseDir, + [Symbol.dispose]() { + fs.rmSync(baseDir, { recursive: true, force: true }); + } + }; +} + +describe('zipSync tests', () => { + it('basic pack test', () => { + const demoDataDisposable = getDemoDataDirectoryDisposable(); + const { targetDirectories, baseDir } = demoDataDisposable; + + const terminal = new Terminal(new NoOpTerminalProvider()); + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const packResult = zipSync({ + mode: 'pack', + terminal: terminal, + compression: 'deflate', + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchSnapshot(); + + const unpackBaseDir = getTempDir(); + + const unpackResult = zipSync({ + mode: 'unpack', + terminal: terminal, + archivePath, + baseDir: unpackBaseDir, + targetDirectories, + compression: 'deflate' + }); + + expect(unpackResult).toMatchSnapshot(); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } + } + + demoDataDisposable[Symbol.dispose](); + }); +}); diff --git a/apps/zipsync/src/zipSync.ts b/apps/zipsync/src/zipSync.ts new file mode 100644 index 00000000000..7fb506deabb --- /dev/null +++ b/apps/zipsync/src/zipSync.ts @@ -0,0 +1,680 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as crypto from 'crypto'; +import * as zlib from 'zlib'; +import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; +import { crc32Builder } from './crc32'; +import { DISPOSE_SYMBOL, getDisposableFileHandle, type IDisposableFileHandle } from './disposableFileHandle'; +import { type IIncrementalZlib, createIncrementalZlib } from './compress'; +import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; +import { + writeLocalFileHeader, + writeDataDescriptor, + writeCentralDirectoryHeader, + writeEndOfCentralDirectory, + findEndOfCentralDirectory, + parseCentralDirectoryHeader, + getFileFromZip, + DEFLATE_COMPRESSION, + STORE_COMPRESSION, + type ZipMetaCompressionMethod, + type IEndOfCentralDirectory, + type ICentralDirectoryHeaderParseResult, + type IFileEntry +} from './zipUtils'; + +const METADATA_FILENAME: string = '__zipsync_metadata__.json'; +const METADATA_VERSION: string = '1.0'; + +export type IZipMode = 'pack' | 'unpack'; + +type ZipSyncOptionCompression = 'store' | 'deflate' | 'auto'; + +/** + * @public + * Options for zipsync + */ +export interface IZipSyncOptions { + /** + * @rushstack/terminal compatible terminal for logging + */ + terminal: ITerminal; + /** + * Mode of operation: "pack" to create a zip archive, or "unpack" to extract files from a zip archive + */ + mode: IZipMode; + /** + * Zip file path + */ + archivePath: string; + /** + * Target directories to pack or unpack (depending on mode) + */ + targetDirectories: ReadonlyArray; + /** + * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) + */ + baseDir: string; + /** + * Compression mode. If set to 'deflate', file data will be compressed using raw DEFLATE (method 8) when this + * produces a smaller result; otherwise it will fall back to 'store' per-file. + */ + compression: ZipSyncOptionCompression; +} + +interface IDirQueueItem { + dir: string; + depth: number; + node?: IReadonlyPathTrieNode | undefined; +} + +interface IMetadataFileRecord { + size: number; + sha1Hash: string; +} + +interface IMetadata { + version: string; + files: Record; +} + +interface IPackResult { + filesPacked: number; + metadata: IMetadata; +} + +interface IUnpackResult { + metadata: IMetadata; + filesExtracted: number; + filesSkipped: number; + filesDeleted: number; + foldersDeleted: number; + otherEntriesDeleted: number; +} + +const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = + /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; + +/** + * Packs (creates) or unpacks (synchronizes) a ZIP archive. + * + * @public + */ +export function zipSync( + options: T +): T['mode'] extends 'pack' ? IPackResult : IUnpackResult { + const { + terminal, + mode, + archivePath, + targetDirectories: rawTargetDirectories, + baseDir: rawBaseDir + } = options; + const baseDir: string = path.resolve(rawBaseDir); + const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); + const compressionMode: ZipSyncOptionCompression = options.compression; + + function calculateSHA1(data: Buffer): string { + return crypto.createHash('sha1').update(data).digest('hex'); + } + + function packZip(): IPackResult { + markStart('pack.total'); + terminal.writeDebugLine('Starting packZip'); + // Pass 1: enumerate + markStart('pack.enumerate'); + + const filePaths: string[] = []; + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, depth: 0 })); + + while (queue.length) { + const { dir: currentDir, depth } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + if ( + e && + ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') + ) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}. Ignoring.`); + continue; + } else { + throw e; + } + } + + for (const item of items) { + const fullPath: string = path.join(currentDir, item.name); + if (item.isFile()) { + const relativePath: string = path.relative(baseDir, fullPath).replace(/\\/g, '/'); + terminal.writeVerboseLine(`${padding}${item.name}`); + filePaths.push(relativePath); + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: fullPath, depth: depth + 1 }); + } else { + throw new Error(`Unexpected item (not file or directory): ${fullPath}. Aborting.`); + } + } + } + + terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); + markEnd('pack.enumerate'); + + // Pass 2: read + hash + compress + markStart('pack.prepareEntries'); + const bufferSize: number = 1 << 25; // 32 MiB + const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + + terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); + const zipFile: number = fs.openSync(archivePath, 'w'); + let currentOffset: number = 0; + // Use this function to do any write to the zip file, so that we can track the current offset. + function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { + let offset: number = 0; + while (lengthBytes > 0 && offset < chunk.byteLength) { + // In practice this call always writes all data at once, but the spec says it is not an error + // for it to not do so. Possibly that situation comes up when writing to something that is not + // an ordinary file. + const written: number = fs.writeSync(zipFile, chunk, offset, lengthBytes); + lengthBytes -= written; + offset += written; + } + currentOffset += offset; + } + + function writeFileEntry(relativePath: string): IFileEntry { + function isLikelyAlreadyCompressed(filename: string): boolean { + return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); + } + const fullPath: string = path.join(baseDir, relativePath); + + const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( + onChunk: (bytesInInputBuffer: number) => void + ): void => { + using inputDisposable: IDisposableFileHandle = getDisposableFileHandle(fullPath, 'r'); + + let bytesInInputBuffer: number = 0; + // The entire input buffer will be drained in each loop iteration + // So run until EOF + while (!isNaN(inputDisposable.fd)) { + bytesInInputBuffer = fs.readSync(inputDisposable.fd, inputBuffer, 0, inputBuffer.byteLength, -1); + + if (bytesInInputBuffer <= 0) { + // EOF, close the input fd + inputDisposable[DISPOSE_SYMBOL](); + } + + onChunk(bytesInInputBuffer); + } + }; + + let shouldCompress: boolean = false; + if (compressionMode === 'deflate') { + shouldCompress = true; + } else if (compressionMode === 'auto') { + // Heuristic: skip compression for small files or likely-already-compressed files + if (!isLikelyAlreadyCompressed(relativePath)) { + shouldCompress = true; + } else { + terminal.writeVerboseLine( + `Skip compression heuristically (already-compressed) for ${relativePath} (size unknown at this point)` + ); + } + } + + const compressionMethod: ZipMetaCompressionMethod = shouldCompress + ? DEFLATE_COMPRESSION + : STORE_COMPRESSION; + + const entry: IFileEntry = { + filename: relativePath, + size: 0, + compressedSize: 0, + crc32: 0, + sha1Hash: '', + localHeaderOffset: currentOffset, + compressionMethod + }; + + writeChunkToZip(writeLocalFileHeader(entry)); + + const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); + let crc32: number = 0; + let uncompressedSize: number = 0; + let compressedSize: number = 0; + + using deflateIncremental: IIncrementalZlib | undefined = shouldCompress + ? createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + writeChunkToZip(chunk, lengthBytes); + compressedSize += lengthBytes; + }, + 'deflate' + ) + : undefined; + + // Also capture content if we might need it (for compression decision or storing raw data). + // We'll accumulate into an array of buffers to avoid repeated concatenations for large files. + readInputInChunks((bytesInInputBuffer: number) => { + const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); + sha1HashBuilder.update(slice); + crc32 = crc32Builder(slice, crc32); + if (deflateIncremental) { + deflateIncremental.update(slice); + } else { + writeChunkToZip(slice, bytesInInputBuffer); + } + uncompressedSize += bytesInInputBuffer; + }); + + // finalize hashes, compression + deflateIncremental?.update(Buffer.alloc(0)); + crc32 = crc32 >>> 0; + const sha1Hash: string = sha1HashBuilder.digest('hex'); + + if (!shouldCompress) { + compressedSize = uncompressedSize; + } + + entry.size = uncompressedSize; + entry.compressedSize = compressedSize; + entry.crc32 = crc32; + entry.sha1Hash = sha1Hash; + + writeChunkToZip(writeDataDescriptor(entry)); + + terminal.writeVerboseLine( + `${relativePath} (sha1=${entry.sha1Hash}, crc32=${entry.crc32.toString(16)}, size=${ + entry.size + }, compressed=${entry.compressedSize}, method=${entry.compressionMethod}, compressed ${( + 100 - + (entry.compressedSize / entry.size) * 100 + ).toFixed(1)}%)` + ); + return entry; + } + + const entries: IFileEntry[] = []; + for (const relativePath of filePaths) { + entries.push(writeFileEntry(relativePath)); + } + + markEnd('pack.prepareEntries'); + terminal.writeLine(`Prepared ${entries.length} file entries`); + + markStart('pack.metadata.build'); + const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; + for (const entry of entries) { + metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; + } + + const metadataContent: string = JSON.stringify(metadata); + const metadataBuffer: Buffer = Buffer.from(metadataContent, 'utf8'); + terminal.writeDebugLine( + `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` + ); + + let metadataCompressionMethod: ZipMetaCompressionMethod = STORE_COMPRESSION; + let metadataData: Buffer = metadataBuffer; + let metadataCompressedSize: number = metadataBuffer.length; + if ((compressionMode === 'deflate' || compressionMode === 'auto') && metadataBuffer.length > 64) { + const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); + if (compressed.length < metadataBuffer.length) { + metadataCompressionMethod = DEFLATE_COMPRESSION; + metadataData = compressed; + metadataCompressedSize = compressed.length; + terminal.writeDebugLine( + `Metadata compressed (orig=${metadataBuffer.length}, compressed=${compressed.length})` + ); + } else { + terminal.writeDebugLine('Metadata compression skipped (not smaller)'); + } + } + + const metadataEntry: IFileEntry = { + filename: METADATA_FILENAME, + size: metadataBuffer.length, + compressedSize: metadataCompressedSize, + crc32: crc32Builder(metadataBuffer), + sha1Hash: calculateSHA1(metadataBuffer), + localHeaderOffset: currentOffset, + compressionMethod: metadataCompressionMethod + }; + + writeChunkToZip(writeLocalFileHeader(metadataEntry)); + writeChunkToZip(metadataData, metadataCompressedSize); + writeChunkToZip(writeDataDescriptor(metadataEntry)); + + entries.push(metadataEntry); + terminal.writeVerboseLine(`Total entries including metadata: ${entries.length}`); + + markEnd('pack.metadata.build'); + + markStart('pack.write.entries'); + const outputDir: string = path.dirname(archivePath); + fs.mkdirSync(outputDir, { recursive: true }); + + try { + markEnd('pack.write.entries'); + + markStart('pack.write.centralDirectory'); + const centralDirOffset: number = currentOffset; + let centralDirSize: number = 0; + + for (const entry of entries) { + const centralHeader: Buffer = writeCentralDirectoryHeader(entry); + fs.writeSync(zipFile, centralHeader); + centralDirSize += centralHeader.length; + } + terminal.writeDebugLine( + `Central directory written (offset=${centralDirOffset}, size=${centralDirSize})` + ); + markEnd('pack.write.centralDirectory'); + + // Write end of central directory + markStart('pack.write.eocd'); + const endOfCentralDir: Buffer = writeEndOfCentralDirectory( + centralDirOffset, + centralDirSize, + entries.length + ); + fs.writeSync(zipFile, endOfCentralDir); + terminal.writeDebugLine('EOCD record written'); + markEnd('pack.write.eocd'); + } finally { + fs.closeSync(zipFile); + terminal.writeDebugLine('Archive file closed'); + } + markEnd('pack.total'); + const total: number = getDuration('pack.total'); + emitSummary('pack', terminal); + terminal.writeLine(`Successfully packed ${entries.length} files in ${formatDuration(total)}`); + return { filesPacked: entries.length, metadata }; + } + + function unpackZip(): IUnpackResult { + markStart('unpack.total'); + terminal.writeDebugLine('Starting unpackZip'); + + markStart('unpack.read.archive'); + const zipBuffer: Buffer = fs.readFileSync(archivePath); + terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); + markEnd('unpack.read.archive'); + + markStart('unpack.parse.centralDirectory'); + const zipTree: LookupByPath = new LookupByPath(); + const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); + + const centralDirBuffer: Buffer = zipBuffer.subarray( + endOfCentralDir.centralDirOffset, + endOfCentralDir.centralDirOffset + endOfCentralDir.centralDirSize + ); + terminal.writeDebugLine( + `Central directory slice size=${centralDirBuffer.length} (expected=${endOfCentralDir.centralDirSize})` + ); + + let metadataEntry: ICentralDirectoryHeaderParseResult | undefined; + const entries: Array = []; + let offset: number = 0; + + for (let i: number = 0; i < endOfCentralDir.totalCentralDirRecords; i++) { + const result: ICentralDirectoryHeaderParseResult = parseCentralDirectoryHeader( + centralDirBuffer, + offset + ); + zipTree.setItem(result.filename, true); + + if (result.filename === METADATA_FILENAME) { + if (metadataEntry) { + throw new Error('Multiple metadata entries found in archive'); + } + metadataEntry = result; + } + + entries.push(result); + offset = result.nextOffset; + terminal.writeDebugLine( + `Parsed central entry ${result.filename} (method=${result.header.compressionMethod}, compSize=${result.header.compressedSize})` + ); + } + markEnd('unpack.parse.centralDirectory'); + + if (!metadataEntry) { + throw new Error(`Metadata entry not found in archive`); + } + + markStart('unpack.read.metadata'); + terminal.writeDebugLine('Metadata entry found, reading'); + const metadataZipBuffer: Buffer = getFileFromZip(zipBuffer, metadataEntry); + + let metadataBuffer: Buffer; + if (metadataEntry.header.compressionMethod === STORE_COMPRESSION) { + metadataBuffer = metadataZipBuffer; + } else if (metadataEntry.header.compressionMethod === DEFLATE_COMPRESSION) { + metadataBuffer = zlib.inflateRawSync(metadataZipBuffer); + if (metadataBuffer.length !== metadataEntry.header.uncompressedSize) { + throw new Error( + `Metadata size mismatch (expected ${metadataEntry.header.uncompressedSize}, got ${metadataBuffer.length})` + ); + } + } else { + throw new Error( + `Unsupported compression method for metadata: ${metadataEntry.header.compressionMethod}` + ); + } + + const metadata: IMetadata = JSON.parse(metadataBuffer.toString('utf8')) as IMetadata; + + if (metadata.version !== METADATA_VERSION) { + throw new Error(`Unsupported metadata version: ${metadata.version}`); + } + + terminal.writeDebugLine( + `Metadata (version=${metadata.version}) parsed (fileCount=${Object.keys(metadata.files).length}, rawSize=${metadataBuffer.length})` + ); + markEnd('unpack.read.metadata'); + + terminal.writeLine(`Found ${entries.length} files in archive`); + + for (const targetDirectory of targetDirectories) { + fs.mkdirSync(targetDirectory, { recursive: true }); + terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); + } + + let extractedCount: number = 0; + let skippedCount: number = 0; + let deletedFilesCount: number = 0; + let deletedOtherCount: number = 0; + let deletedFoldersCount: number = 0; + let scanCount: number = 0; + + const dirsToCleanup: string[] = []; + + markStart('unpack.scan.existing'); + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ + dir, + depth: 0, + node: zipTree.getNodeAtPrefix(path.relative(baseDir, dir)) + })); + + while (queue.length) { + const { dir: currentDir, depth, node } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}`); + continue; + } + + for (const item of items) { + scanCount++; + // check if exists in zipTree, if not delete + const relativePath: string = path + .relative(baseDir, path.join(currentDir, item.name)) + .replace(/\\/g, '/'); + + const childNode: IReadonlyPathTrieNode | undefined = node?.children?.get(item.name); + + if (item.isFile()) { + terminal.writeVerboseLine(`${padding}${item.name}`); + if (!childNode?.value) { + terminal.writeDebugLine(`Deleting file: ${relativePath}`); + fs.unlinkSync(relativePath); + deletedFilesCount++; + } + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: relativePath, depth: depth + 1, node: childNode }); + if (!childNode || childNode.value) { + dirsToCleanup.push(relativePath); + } + } else { + terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); + fs.unlinkSync(relativePath); + deletedOtherCount++; + } + } + } + + for (const dir of dirsToCleanup) { + // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. + try { + fs.rmdirSync(dir); + terminal.writeDebugLine(`Deleted empty directory: ${dir}`); + deletedFoldersCount++; + } catch (e) { + // Probably not empty + terminal.writeDebugLine(`Directory not empty, skipping: ${dir}`); + } + } + + terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); + markEnd('unpack.scan.existing'); + + markStart('unpack.extract.loop'); + const bufferSize: number = 1 << 25; // 32 MiB + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + + const dirsCreated: Set = new Set(); + + for (const entry of entries) { + if (entry.filename === METADATA_FILENAME) { + continue; + } + + const targetPath: string = path.join(baseDir, entry.filename); + const targetDir: string = path.dirname(targetPath); + if (!dirsCreated.has(targetDir)) { + fs.mkdirSync(targetDir, { recursive: true }); + dirsCreated.add(targetDir); + } + + let shouldExtract: boolean = true; + if (metadata) { + const stats: fs.Stats | undefined = fs.statSync(targetPath, { throwIfNoEntry: false }); + if (!stats) { + terminal.writeDebugLine(`File does not exist and will be extracted: ${entry.filename}`); + } else { + const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; + + if (metadataFile && stats.size === metadataFile.size) { + const existingData: Buffer = fs.readFileSync(targetPath); + const existingHash: string = calculateSHA1(existingData); + + if (existingHash === metadataFile.sha1Hash) { + shouldExtract = false; + skippedCount++; + terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); + } + } + } + } + + if (shouldExtract) { + terminal.writeDebugLine(`Extracting file: ${entry.filename}`); + const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); + let fileData: Buffer; + using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); + if (entry.header.compressionMethod === STORE_COMPRESSION) { + fileData = fileZipBuffer; + let writeOffset: number = 0; + while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { + const written: number = fs.writeSync( + fileHandle.fd, + fileData, + writeOffset, + fileData.length - writeOffset + ); + writeOffset += written; + } + } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { + using inflateIncremental: IIncrementalZlib = createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + let writeOffset: number = 0; + while (lengthBytes > 0 && writeOffset < chunk.byteLength) { + const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); + lengthBytes -= written; + writeOffset += written; + } + }, + 'inflate' + ); + inflateIncremental.update(fileZipBuffer); + inflateIncremental.update(Buffer.alloc(0)); + } else { + throw new Error( + `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + ); + } + + // If data descriptor was used we rely on central directory values already consumed. + extractedCount++; + } + } + markEnd('unpack.extract.loop'); + + markEnd('unpack.total'); + const unpackTotal: number = getDuration('unpack.total'); + terminal.writeLine( + `Extraction complete: ${extractedCount} extracted, ${skippedCount} skipped, ${deletedFilesCount} deleted, ${deletedFoldersCount} folders deleted, ${deletedOtherCount} other entries deleted in ${formatDuration( + unpackTotal + )}` + ); + emitSummary('unpack', terminal); + terminal.writeDebugLine('unpackZip finished'); + return { + metadata, + filesExtracted: extractedCount, + filesSkipped: skippedCount, + filesDeleted: deletedFilesCount, + foldersDeleted: deletedFoldersCount, + otherEntriesDeleted: deletedOtherCount + }; + } + + if (mode === 'pack') { + terminal.writeLine(`Packing to ${archivePath} from ${rawTargetDirectories.join(', ')}`); + return packZip() as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; + } else { + terminal.writeLine(`Unpacking to ${rawTargetDirectories.join(', ')} from ${archivePath}`); + return unpackZip() as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; + } +} diff --git a/apps/zipsync/src/zipSyncWorker.ts b/apps/zipsync/src/zipSyncWorker.ts new file mode 100644 index 00000000000..9a1c1c6e7f9 --- /dev/null +++ b/apps/zipsync/src/zipSyncWorker.ts @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { parentPort as rawParentPort, type MessagePort } from 'node:worker_threads'; +import { type IZipSyncOptions, zipSync } from './zipSync'; +import { Terminal } from '@rushstack/terminal/lib/Terminal'; +import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; + +export interface IHashWorkerData { + basePath: string; +} + +export interface IZipSyncCommandMessage { + type: 'zipsync'; + id: number; + options: Omit; +} + +interface IZipSyncSuccessMessage { + id: number; + type: 'zipsync'; + result: { + zipSyncReturn: ReturnType; + zipSyncLogs: string; + }; +} + +export interface IErrorMessage { + type: 'error'; + id: number; + args: { + message: string; + stack: string; + }; +} + +export type IHostToWorkerMessage = IZipSyncCommandMessage; +export type IWorkerToHostMessage = IZipSyncSuccessMessage | IErrorMessage; + +if (!rawParentPort) { + throw new Error('This module must be run in a worker thread.'); +} +const parentPort: MessagePort = rawParentPort; + +function handleMessage(message: IHostToWorkerMessage | false): void { + if (message === false) { + parentPort.removeAllListeners(); + parentPort.close(); + return; + } + + try { + switch (message.type) { + case 'zipsync': { + const { options } = message; + + const terminalProvider: StringBufferTerminalProvider = new StringBufferTerminalProvider(); + const terminal: Terminal = new Terminal(terminalProvider); + + const successMessage: IZipSyncSuccessMessage = { + type: message.type, + id: message.id, + result: { + zipSyncReturn: zipSync({ ...options, terminal }), + zipSyncLogs: terminalProvider.getOutput() + } + }; + return parentPort.postMessage(successMessage); + } + } + } catch (err) { + const errorMessage: IErrorMessage = { + type: 'error', + id: message.id, + args: { + message: (err as Error).message, + stack: (err as Error).stack || '' + } + }; + parentPort.postMessage(errorMessage); + } +} + +parentPort.on('message', handleMessage); diff --git a/apps/zipsync/src/zipSyncWorkerAsync.test.ts b/apps/zipsync/src/zipSyncWorkerAsync.test.ts new file mode 100644 index 00000000000..ea5c9148eb4 --- /dev/null +++ b/apps/zipsync/src/zipSyncWorkerAsync.test.ts @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { tmpdir } from 'os'; +import * as path from 'path'; +import * as fs from 'fs'; +import * as crypto from 'crypto'; +import { zipSyncWorkerAsync } from './zipSyncWorkerAsync'; + +function getTempDir(): string { + const randomId = crypto.randomUUID(); + const tempDir = path.join(tmpdir(), `zipsync-test-${randomId}`); + fs.mkdirSync(tempDir); + return tempDir; +} + +function getDemoDataDirectoryDisposable(): { + targetDirectories: string[]; + baseDir: string; + [Symbol.dispose](): void; +} { + const baseDir: string = getTempDir(); + + const targetDirectories = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( + (folderName) => { + const dataDir: string = path.join(baseDir, folderName); + fs.mkdirSync(dataDir, { recursive: true }); + const subdir: string = path.join(dataDir, 'subdir'); + fs.mkdirSync(subdir); + for (let i: number = 0; i < 5; ++i) { + const filePath: string = path.join(subdir, `file-${i}.txt`); + fs.writeFileSync(filePath, `This is file ${i} in ${folderName}/subdir\n`, { encoding: 'utf-8' }); + } + return folderName; + } + ); + + return { + targetDirectories, + baseDir, + [Symbol.dispose]() { + fs.rmSync(baseDir, { recursive: true, force: true }); + } + }; +} + +describe('zipSyncWorkerAsync tests', () => { + it('basic pack test', async () => { + const demoDataDisposable = getDemoDataDirectoryDisposable(); + const { targetDirectories, baseDir } = demoDataDisposable; + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const { zipSyncReturn: packResult } = await zipSyncWorkerAsync({ + mode: 'pack', + compression: 'deflate', + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchSnapshot(); + + const unpackBaseDir = getTempDir(); + + const { zipSyncReturn: unpackResult } = await zipSyncWorkerAsync({ + mode: 'unpack', + archivePath, + baseDir: unpackBaseDir, + targetDirectories, + compression: 'deflate' + }); + + expect(unpackResult).toMatchSnapshot(); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } + } + + demoDataDisposable[Symbol.dispose](); + }); +}); diff --git a/apps/zipsync/src/zipSyncWorkerAsync.ts b/apps/zipsync/src/zipSyncWorkerAsync.ts new file mode 100644 index 00000000000..34eb7657e7c --- /dev/null +++ b/apps/zipsync/src/zipSyncWorkerAsync.ts @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { Worker } from 'node:worker_threads'; +import type { IZipSyncOptions } from './zipSync'; +import type { IWorkerToHostMessage, IZipSyncCommandMessage } from './zipSyncWorker'; + +type IZipSyncResult = ReturnType; + +export async function zipSyncWorkerAsync( + options: Omit +): Promise { + const { Worker } = await import('node:worker_threads'); + + const worker: Worker = new Worker(require.resolve('./zipSyncWorker')); + + return new Promise((resolve, reject) => { + worker.on('message', (message: IWorkerToHostMessage) => { + switch (message.type) { + case 'zipsync': { + resolve(message.result); + break; + } + case 'error': { + const error: Error = new Error(message.args.message); + error.stack = message.args.stack; + reject(error); + break; + } + default: { + const exhaustiveCheck: never = message; + throw new Error(`Unexpected message type: ${JSON.stringify(exhaustiveCheck)}`); + } + } + }); + + worker.on('error', (err) => { + reject(err); + }); + + worker.on('exit', (code) => { + if (code !== 0) { + reject(new Error(`Worker stopped with exit code ${code}`)); + } + }); + + const commandMessage: IZipSyncCommandMessage = { + type: 'zipsync', + id: 0, + options + }; + worker.postMessage(commandMessage); + }).finally(() => { + worker.postMessage(false); + }); +} diff --git a/apps/zipsync/src/zipUtils.ts b/apps/zipsync/src/zipUtils.ts new file mode 100644 index 00000000000..2bdad1975de --- /dev/null +++ b/apps/zipsync/src/zipUtils.ts @@ -0,0 +1,356 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +const LOCAL_FILE_HEADER_SIGNATURE: number = 0x04034b50; +const CENTRAL_DIR_HEADER_SIGNATURE: number = 0x02014b50; +const END_OF_CENTRAL_DIR_SIGNATURE: number = 0x06054b50; +const DATA_DESCRIPTOR_SIGNATURE: number = 0x08074b50; + +export const STORE_COMPRESSION: 0 = 0; +export const DEFLATE_COMPRESSION: 8 = 8; +export type ZipMetaCompressionMethod = typeof STORE_COMPRESSION | typeof DEFLATE_COMPRESSION; + +export interface IFileEntry { + filename: string; + size: number; + compressedSize: number; + crc32: number; + sha1Hash: string; + localHeaderOffset: number; + compressionMethod: ZipMetaCompressionMethod; +} + +export interface ILocalFileHeader { + signature: number; + versionNeeded: number; + flags: number; + compressionMethod: number; + lastModTime: number; + lastModDate: number; + crc32: number; + compressedSize: number; + uncompressedSize: number; + filenameLength: number; + extraFieldLength: number; +} + +export interface ICentralDirectoryHeader { + signature: number; + versionMadeBy: number; + versionNeeded: number; + flags: number; + compressionMethod: number; + lastModTime: number; + lastModDate: number; + crc32: number; + compressedSize: number; + uncompressedSize: number; + filenameLength: number; + extraFieldLength: number; + commentLength: number; + diskNumberStart: number; + internalFileAttributes: number; + externalFileAttributes: number; + localHeaderOffset: number; +} + +export interface IEndOfCentralDirectory { + signature: number; + diskNumber: number; + centralDirStartDisk: number; + centralDirRecordsOnDisk: number; + totalCentralDirRecords: number; + centralDirSize: number; + centralDirOffset: number; + commentLength: number; +} + +function writeUInt32LE(buffer: Buffer, value: number, offset: number): void { + buffer.writeUInt32LE(value, offset); +} + +function writeUInt16LE(buffer: Buffer, value: number, offset: number): void { + buffer.writeUInt16LE(value, offset); +} + +function readUInt32LE(buffer: Buffer, offset: number): number { + return buffer.readUInt32LE(offset); +} + +function readUInt16LE(buffer: Buffer, offset: number): number { + return buffer.readUInt16LE(offset); +} + +function dosDateTime(date: Date): { time: number; date: number } { + const time: number = + // eslint-disable-next-line no-bitwise + ((date.getHours() & 0x1f) << 11) | + // eslint-disable-next-line no-bitwise + ((date.getMinutes() & 0x3f) << 5) | + // eslint-disable-next-line no-bitwise + ((date.getSeconds() / 2) & 0x1f); + + const dateVal: number = + // eslint-disable-next-line no-bitwise + (((date.getFullYear() - 1980) & 0x7f) << 9) | + // eslint-disable-next-line no-bitwise + (((date.getMonth() + 1) & 0xf) << 5) | + // eslint-disable-next-line no-bitwise + (date.getDate() & 0x1f); + + return { time, date: dateVal }; +} + +export function writeLocalFileHeader(entry: IFileEntry): Buffer { + const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); + + const headerSize: number = 30 + filenameBuffer.length; + const header: Buffer = Buffer.allocUnsafeSlow(headerSize); + const now: Date = new Date(); + const { time, date } = dosDateTime(now); + + let offset: number = 0; + writeUInt32LE(header, LOCAL_FILE_HEADER_SIGNATURE, offset); + offset += 4; + writeUInt16LE(header, 20, offset); // version needed + offset += 2; + // General purpose bit flag: set bit 3 (0x0008) to indicate presence of data descriptor + // Per APPNOTE: when bit 3 is set, CRC-32 and sizes in local header are set to zero and + // the actual values are stored in the data descriptor that follows the file data. + writeUInt16LE(header, 0x0008, offset); // flags (data descriptor) + offset += 2; + writeUInt16LE(header, entry.compressionMethod, offset); // compression method (0=store,8=deflate) + offset += 2; + writeUInt16LE(header, time, offset); // last mod time + offset += 2; + writeUInt16LE(header, date, offset); // last mod date + offset += 2; + // With bit 3 set, these three fields MUST be zero in the local header + writeUInt32LE(header, 0, offset); // crc32 (placeholder, real value in data descriptor) + offset += 4; + writeUInt32LE(header, 0, offset); // compressed size (placeholder) + offset += 4; + writeUInt32LE(header, 0, offset); // uncompressed size (placeholder) + offset += 4; + writeUInt16LE(header, filenameBuffer.length, offset); // filename length + offset += 2; + writeUInt16LE(header, 0, offset); // extra field length + offset += 2; + + filenameBuffer.copy(header, offset); + offset += filenameBuffer.length; + + return header; +} + +export function writeCentralDirectoryHeader(entry: IFileEntry): Buffer { + const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); + + const headerSize: number = 46 + filenameBuffer.length; + const header: Buffer = Buffer.alloc(headerSize); + const now: Date = new Date(); + const { time, date } = dosDateTime(now); + + let offset: number = 0; + writeUInt32LE(header, CENTRAL_DIR_HEADER_SIGNATURE, offset); + offset += 4; + writeUInt16LE(header, 20, offset); // version made by + offset += 2; + writeUInt16LE(header, 20, offset); // version needed + offset += 2; + // Mirror flags used in local header (bit 3 set to indicate data descriptor was used) + writeUInt16LE(header, 0x0008, offset); // flags + offset += 2; + writeUInt16LE(header, entry.compressionMethod, offset); // compression method + offset += 2; + writeUInt16LE(header, time, offset); // last mod time + offset += 2; + writeUInt16LE(header, date, offset); // last mod date + offset += 2; + writeUInt32LE(header, entry.crc32, offset); // crc32 + offset += 4; + writeUInt32LE(header, entry.compressedSize, offset); // compressed size + offset += 4; + writeUInt32LE(header, entry.size, offset); // uncompressed size + offset += 4; + writeUInt16LE(header, filenameBuffer.length, offset); // filename length + offset += 2; + writeUInt16LE(header, 0, offset); // extra field length + offset += 2; + writeUInt16LE(header, 0, offset); // comment length + offset += 2; + writeUInt16LE(header, 0, offset); // disk number start + offset += 2; + writeUInt16LE(header, 0, offset); // internal file attributes + offset += 2; + writeUInt32LE(header, 0, offset); // external file attributes + offset += 4; + writeUInt32LE(header, entry.localHeaderOffset, offset); // local header offset + offset += 4; + + filenameBuffer.copy(header, offset); + offset += filenameBuffer.length; + + return header; +} + +export function writeDataDescriptor(entry: IFileEntry): Buffer { + // We include the optional signature for robustness (APPNOTE allows it) + const descriptor: Buffer = Buffer.alloc(16); + let offset: number = 0; + writeUInt32LE(descriptor, DATA_DESCRIPTOR_SIGNATURE, offset); // signature PK\x07\x08 + offset += 4; + writeUInt32LE(descriptor, entry.crc32, offset); // crc32 + offset += 4; + writeUInt32LE(descriptor, entry.compressedSize, offset); // compressed size + offset += 4; + writeUInt32LE(descriptor, entry.size, offset); // uncompressed size + return descriptor; +} + +export function writeEndOfCentralDirectory( + centralDirOffset: number, + centralDirSize: number, + entryCount: number +): Buffer { + const header: Buffer = Buffer.alloc(22); + + let offset: number = 0; + writeUInt32LE(header, END_OF_CENTRAL_DIR_SIGNATURE, offset); + offset += 4; + writeUInt16LE(header, 0, offset); // disk number + offset += 2; + writeUInt16LE(header, 0, offset); // central dir start disk + offset += 2; + writeUInt16LE(header, entryCount, offset); // central dir records on disk + offset += 2; + writeUInt16LE(header, entryCount, offset); // total central dir records + offset += 2; + writeUInt32LE(header, centralDirSize, offset); // central dir size + offset += 4; + writeUInt32LE(header, centralDirOffset, offset); // central dir offset + offset += 4; + writeUInt16LE(header, 0, offset); // comment length + + return header; +} + +interface ILocalFileHeaderParseResult { + header: ILocalFileHeader; + nextOffset: number; +} + +export function parseLocalFileHeader(buffer: Buffer, offset: number): ILocalFileHeaderParseResult { + const signature: number = readUInt32LE(buffer, offset); + if (signature !== LOCAL_FILE_HEADER_SIGNATURE) { + throw new Error( + `Unexpected local file header signature at offset ${offset.toString(16)}: ${signature.toString(16)}` + ); + } + const header: ILocalFileHeader = { + signature, + versionNeeded: readUInt16LE(buffer, offset + 4), + flags: readUInt16LE(buffer, offset + 6), + compressionMethod: readUInt16LE(buffer, offset + 8), + lastModTime: readUInt16LE(buffer, offset + 10), + lastModDate: readUInt16LE(buffer, offset + 12), + crc32: readUInt32LE(buffer, offset + 14), + compressedSize: readUInt32LE(buffer, offset + 18), + uncompressedSize: readUInt32LE(buffer, offset + 22), + filenameLength: readUInt16LE(buffer, offset + 26), + extraFieldLength: readUInt16LE(buffer, offset + 28) + }; + + return { + header, + nextOffset: offset + 30 + header.filenameLength + header.extraFieldLength + }; +} + +export interface ICentralDirectoryHeaderParseResult { + header: ICentralDirectoryHeader; + filename: string; + sha1Hash: string; + nextOffset: number; +} + +export function parseCentralDirectoryHeader( + buffer: Buffer, + offset: number +): ICentralDirectoryHeaderParseResult { + const signature: number = readUInt32LE(buffer, offset); + if (signature !== CENTRAL_DIR_HEADER_SIGNATURE) { + throw new Error( + `Unexpected central directory signature at offset ${offset.toString(16)}: ${signature.toString(16)}` + ); + } + const header: ICentralDirectoryHeader = { + signature, + versionMadeBy: readUInt16LE(buffer, offset + 4), + versionNeeded: readUInt16LE(buffer, offset + 6), + flags: readUInt16LE(buffer, offset + 8), + compressionMethod: readUInt16LE(buffer, offset + 10), + lastModTime: readUInt16LE(buffer, offset + 12), + lastModDate: readUInt16LE(buffer, offset + 14), + crc32: readUInt32LE(buffer, offset + 16), + compressedSize: readUInt32LE(buffer, offset + 20), + uncompressedSize: readUInt32LE(buffer, offset + 24), + filenameLength: readUInt16LE(buffer, offset + 28), + extraFieldLength: readUInt16LE(buffer, offset + 30), + commentLength: readUInt16LE(buffer, offset + 32), + diskNumberStart: readUInt16LE(buffer, offset + 34), + internalFileAttributes: readUInt16LE(buffer, offset + 36), + externalFileAttributes: readUInt32LE(buffer, offset + 38), + localHeaderOffset: readUInt32LE(buffer, offset + 42) + }; + + const filename: string = buffer.slice(offset + 46, offset + 46 + header.filenameLength).toString('utf8'); + + let sha1Hash: string = ''; + if (header.extraFieldLength > 0) { + const extraFieldOffset: number = offset + 46 + header.filenameLength; + const extraFieldId: number = readUInt16LE(buffer, extraFieldOffset); + const extraFieldSize: number = readUInt16LE(buffer, extraFieldOffset + 2); + + if (extraFieldId === 0x0001 && extraFieldSize === 20) { + sha1Hash = buffer.slice(extraFieldOffset + 4, extraFieldOffset + 4 + extraFieldSize).toString('hex'); + } + } + + return { + header, + filename, + sha1Hash, + nextOffset: offset + 46 + header.filenameLength + header.extraFieldLength + header.commentLength + }; +} + +export function findEndOfCentralDirectory(buffer: Buffer): IEndOfCentralDirectory { + for (let i: number = buffer.length - 22; i >= 0; i--) { + if (readUInt32LE(buffer, i) === END_OF_CENTRAL_DIR_SIGNATURE) { + return { + signature: readUInt32LE(buffer, i), + diskNumber: readUInt16LE(buffer, i + 4), + centralDirStartDisk: readUInt16LE(buffer, i + 6), + centralDirRecordsOnDisk: readUInt16LE(buffer, i + 8), + totalCentralDirRecords: readUInt16LE(buffer, i + 10), + centralDirSize: readUInt32LE(buffer, i + 12), + centralDirOffset: readUInt32LE(buffer, i + 16), + commentLength: readUInt16LE(buffer, i + 20) + }; + } + } + + throw new Error('End of central directory not found'); +} + +export function getFileFromZip(zipBuffer: Buffer, entry: ICentralDirectoryHeaderParseResult): Buffer { + const { header: localFileHeader } = parseLocalFileHeader(zipBuffer, entry.header.localHeaderOffset); + const localDataOffset: number = + entry.header.localHeaderOffset + 30 + localFileHeader.filenameLength + localFileHeader.extraFieldLength; + const fileZipBuffer: Buffer = zipBuffer.subarray( + localDataOffset, + localDataOffset + entry.header.compressedSize + ); + return fileZipBuffer; +} diff --git a/apps/zipsync/tsconfig.json b/apps/zipsync/tsconfig.json new file mode 100644 index 00000000000..dac21d04081 --- /dev/null +++ b/apps/zipsync/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "./node_modules/local-node-rig/profiles/default/tsconfig-base.json" +} diff --git a/common/config/rush/browser-approved-packages.json b/common/config/rush/browser-approved-packages.json index 23c585145b8..95d8a2e05c5 100644 --- a/common/config/rush/browser-approved-packages.json +++ b/common/config/rush/browser-approved-packages.json @@ -50,6 +50,10 @@ "name": "@rushstack/rush-vscode-command-webview", "allowedCategories": [ "vscode-extensions" ] }, + { + "name": "@rushstack/zipsync", + "allowedCategories": [ "libraries" ] + }, { "name": "@ungap/structured-clone", "allowedCategories": [ "libraries" ] diff --git a/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml b/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml index 120705e8971..44c87b07487 100644 --- a/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml +++ b/common/config/subspaces/build-tests-subspace/pnpm-lock.yaml @@ -6345,7 +6345,6 @@ packages: resolution: {integrity: sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==} engines: {node: '>=14.17'} hasBin: true - dev: true /unbox-primitive@1.0.2: resolution: {integrity: sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==} @@ -6725,6 +6724,21 @@ packages: - '@types/node' dev: true + file:../../../apps/zipsync(@types/node@20.17.19): + resolution: {directory: ../../../apps/zipsync, type: directory} + id: file:../../../apps/zipsync + name: '@rushstack/zipsync' + hasBin: true + dependencies: + '@rushstack/lookup-by-path': file:../../../libraries/lookup-by-path(@types/node@20.17.19) + '@rushstack/node-core-library': file:../../../libraries/node-core-library(@types/node@20.17.19) + '@rushstack/terminal': file:../../../libraries/terminal(@types/node@20.17.19) + '@rushstack/ts-command-line': file:../../../libraries/ts-command-line(@types/node@20.17.19) + semver: 7.5.4 + typescript: 5.8.2 + transitivePeerDependencies: + - '@types/node' + file:../../../eslint/eslint-config(eslint@9.25.1)(typescript@4.9.5): resolution: {directory: ../../../eslint/eslint-config, type: directory} id: file:../../../eslint/eslint-config @@ -7114,6 +7128,7 @@ packages: '@rushstack/stream-collator': file:../../../libraries/stream-collator(@types/node@20.17.19) '@rushstack/terminal': file:../../../libraries/terminal(@types/node@20.17.19) '@rushstack/ts-command-line': file:../../../libraries/ts-command-line(@types/node@20.17.19) + '@rushstack/zipsync': file:../../../apps/zipsync(@types/node@20.17.19) '@yarnpkg/lockfile': 1.0.2 builtin-modules: 3.1.0 cli-table: 0.3.11 diff --git a/common/config/subspaces/build-tests-subspace/repo-state.json b/common/config/subspaces/build-tests-subspace/repo-state.json index 078bc879c4a..1132889f823 100644 --- a/common/config/subspaces/build-tests-subspace/repo-state.json +++ b/common/config/subspaces/build-tests-subspace/repo-state.json @@ -1,6 +1,6 @@ // DO NOT MODIFY THIS FILE MANUALLY BUT DO COMMIT IT. It is generated and used by Rush. { - "pnpmShrinkwrapHash": "a4362af2793dd557efe7e9f005f3e2f376eb2eda", + "pnpmShrinkwrapHash": "2ac01ba33e09661dc0e7d7faa36d215bb3d3b91e", "preferredVersionsHash": "550b4cee0bef4e97db6c6aad726df5149d20e7d9", - "packageJsonInjectedDependenciesHash": "79ac135cb61506457e8d49c7ec1342d419bde3e2" + "packageJsonInjectedDependenciesHash": "b4fb0c03638f2decf92e0951d76b292ee931e138" } diff --git a/common/config/subspaces/default/pnpm-lock.yaml b/common/config/subspaces/default/pnpm-lock.yaml index f0ab142cd09..f9f41386122 100644 --- a/common/config/subspaces/default/pnpm-lock.yaml +++ b/common/config/subspaces/default/pnpm-lock.yaml @@ -449,6 +449,40 @@ importers: specifier: workspace:* version: link:../../rigs/local-node-rig + ../../../apps/zipsync: + dependencies: + '@rushstack/lookup-by-path': + specifier: workspace:* + version: link:../../libraries/lookup-by-path + '@rushstack/node-core-library': + specifier: workspace:* + version: link:../../libraries/node-core-library + '@rushstack/terminal': + specifier: workspace:* + version: link:../../libraries/terminal + '@rushstack/ts-command-line': + specifier: workspace:* + version: link:../../libraries/ts-command-line + semver: + specifier: ~7.5.4 + version: 7.5.4 + typescript: + specifier: ~5.8.2 + version: 5.8.2 + devDependencies: + '@rushstack/heft': + specifier: workspace:* + version: link:../heft + '@types/semver': + specifier: 7.5.0 + version: 7.5.0 + eslint: + specifier: ~9.25.1 + version: 9.25.1(supports-color@8.1.1) + local-node-rig: + specifier: workspace:* + version: link:../../rigs/local-node-rig + ../../../build-tests-samples/heft-node-basic-tutorial: devDependencies: '@rushstack/heft': @@ -3614,6 +3648,9 @@ importers: '@rushstack/ts-command-line': specifier: workspace:* version: link:../ts-command-line + '@rushstack/zipsync': + specifier: workspace:* + version: link:../../apps/zipsync '@yarnpkg/lockfile': specifier: ~1.0.2 version: 1.0.2 @@ -10468,7 +10505,7 @@ packages: dependencies: '@pnpm/crypto.base32-hash': 3.0.1 '@pnpm/types': 12.2.0 - semver: 7.6.3 + semver: 7.7.2 dev: false /@pnpm/error@1.4.0: @@ -14600,7 +14637,7 @@ packages: eslint: 9.25.1(supports-color@8.1.1) json-stable-stringify-without-jsonify: 1.0.1 lodash.merge: 4.6.2 - semver: 7.6.3 + semver: 7.7.2 transitivePeerDependencies: - supports-color - typescript @@ -14809,7 +14846,7 @@ packages: fast-glob: 3.3.2 is-glob: 4.0.3 minimatch: 9.0.5 - semver: 7.6.3 + semver: 7.7.2 ts-api-utils: 2.0.1(typescript@4.9.5) typescript: 4.9.5 transitivePeerDependencies: @@ -14828,7 +14865,7 @@ packages: fast-glob: 3.3.2 is-glob: 4.0.3 minimatch: 9.0.5 - semver: 7.6.3 + semver: 7.7.2 ts-api-utils: 2.0.1(typescript@5.8.2) typescript: 5.8.2 transitivePeerDependencies: @@ -19319,7 +19356,7 @@ packages: espree: 10.3.0 esquery: 1.6.0 parse-imports-exports: 0.2.4 - semver: 7.6.3 + semver: 7.7.2 spdx-expression-parse: 4.0.0 transitivePeerDependencies: - supports-color @@ -27813,16 +27850,10 @@ packages: dependencies: lru-cache: 6.0.0 - /semver@7.6.3: - resolution: {integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==} - engines: {node: '>=10'} - hasBin: true - /semver@7.7.2: resolution: {integrity: sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==} engines: {node: '>=10'} hasBin: true - dev: false /send@0.17.2: resolution: {integrity: sha512-UJYB6wFSJE3G00nEivR5rgWp8c2xXvJ3OPWPhmuteU0IKj8nKbG3DrjiOmLwpnHGYWAVwA69zmTm++YG0Hmwww==} diff --git a/common/config/subspaces/default/repo-state.json b/common/config/subspaces/default/repo-state.json index 711e482da42..bf2c1a8ba42 100644 --- a/common/config/subspaces/default/repo-state.json +++ b/common/config/subspaces/default/repo-state.json @@ -1,5 +1,5 @@ // DO NOT MODIFY THIS FILE MANUALLY BUT DO COMMIT IT. It is generated and used by Rush. { - "pnpmShrinkwrapHash": "b3b0018c5869d606a645e2b69ef6c53f9d2bf483", + "pnpmShrinkwrapHash": "99186b016ffe5874093a1b9fb71c52c903b86978", "preferredVersionsHash": "61cd419c533464b580f653eb5f5a7e27fe7055ca" } diff --git a/common/reviews/api/lookup-by-path.api.md b/common/reviews/api/lookup-by-path.api.md index 6c69844f179..66336af9e63 100644 --- a/common/reviews/api/lookup-by-path.api.md +++ b/common/reviews/api/lookup-by-path.api.md @@ -31,6 +31,7 @@ export interface IReadonlyLookupByPath extends Iterable<[strin findChildPathFromSegments(childPathSegments: Iterable): TItem | undefined; findLongestPrefixMatch(query: string, delimiter?: string): IPrefixMatch | undefined; get(query: string, delimiter?: string): TItem | undefined; + getNodeAtPrefix(query: string, delimiter?: string): IReadonlyPathTrieNode | undefined; groupByChild(infoByPath: Map, delimiter?: string): Map>; has(query: string, delimiter?: string): boolean; get size(): number; @@ -57,6 +58,7 @@ export class LookupByPath implements IReadonlyLookupByPath): TItem | undefined; findLongestPrefixMatch(query: string, delimiter?: string): IPrefixMatch | undefined; get(key: string, delimiter?: string): TItem | undefined; + getNodeAtPrefix(query: string, delimiter?: string): IReadonlyPathTrieNode | undefined; groupByChild(infoByPath: Map, delimiter?: string): Map>; has(key: string, delimiter?: string): boolean; static iteratePathSegments(serializedPath: string, delimiter?: string): Iterable; diff --git a/common/reviews/api/rush-lib.api.md b/common/reviews/api/rush-lib.api.md index 58ddeefaf83..b0ca862444f 100644 --- a/common/reviews/api/rush-lib.api.md +++ b/common/reviews/api/rush-lib.api.md @@ -1424,7 +1424,7 @@ export class RushConstants { static readonly artifactoryFilename: 'artifactory.json'; static readonly browserApprovedPackagesFilename: 'browser-approved-packages.json'; static readonly buildCacheFilename: 'build-cache.json'; - static readonly buildCacheVersion: 1; + static readonly buildCacheVersion: 2; static readonly buildCommandName: 'build'; static readonly bulkCommandKind: 'bulk'; static readonly bypassPolicyFlagLongName: '--bypass-policy'; diff --git a/common/reviews/api/ts-command-line.api.md b/common/reviews/api/ts-command-line.api.md index 3541c82bb73..dd85f73df4b 100644 --- a/common/reviews/api/ts-command-line.api.md +++ b/common/reviews/api/ts-command-line.api.md @@ -425,6 +425,12 @@ export interface IRequiredCommandLineIntegerParameter extends CommandLineInteger readonly value: number; } +// @public +export interface IRequiredCommandLineStringListParameter extends CommandLineStringListParameter { + // (undocumented) + values: ReadonlyArray; +} + // @public export interface IRequiredCommandLineStringParameter extends CommandLineStringParameter { // (undocumented) diff --git a/libraries/lookup-by-path/src/LookupByPath.ts b/libraries/lookup-by-path/src/LookupByPath.ts index b1ac287960d..a6b3353371e 100644 --- a/libraries/lookup-by-path/src/LookupByPath.ts +++ b/libraries/lookup-by-path/src/LookupByPath.ts @@ -186,6 +186,15 @@ export interface IReadonlyLookupByPath extends Iterable<[strin * @param infoByPath - The info to be grouped, keyed by path */ groupByChild(infoByPath: Map, delimiter?: string): Map>; + + /** + * Retrieves the trie node at the specified prefix, if it exists. + * + * @param query - The prefix to check for + * @param delimiter - The path delimiter + * @returns The trie node at the specified prefix, or `undefined` if no node was found + */ + getNodeAtPrefix(query: string, delimiter?: string): IReadonlyPathTrieNode | undefined; } /** @@ -526,6 +535,16 @@ export class LookupByPath implements IReadonlyLookupByPath | undefined { + return this._findNodeAtPrefix(query, delimiter); + } + /** * Iterates through progressively longer prefixes of a given string and returns as soon * as the number of candidate items that match the prefix are 1 or 0. diff --git a/libraries/rush-lib/package.json b/libraries/rush-lib/package.json index 0ce5842b6a6..7d3f55556be 100644 --- a/libraries/rush-lib/package.json +++ b/libraries/rush-lib/package.json @@ -42,6 +42,7 @@ "@rushstack/stream-collator": "workspace:*", "@rushstack/terminal": "workspace:*", "@rushstack/ts-command-line": "workspace:*", + "@rushstack/zipsync": "workspace:*", "@yarnpkg/lockfile": "~1.0.2", "builtin-modules": "~3.1.0", "cli-table": "~0.3.1", diff --git a/libraries/rush-lib/src/logic/RushConstants.ts b/libraries/rush-lib/src/logic/RushConstants.ts index e04c03d2f2e..75eea19bd32 100644 --- a/libraries/rush-lib/src/logic/RushConstants.ts +++ b/libraries/rush-lib/src/logic/RushConstants.ts @@ -226,7 +226,7 @@ export class RushConstants { * Build cache version number, incremented when the logic to create cache entries changes. * Changing this ensures that cache entries generated by an old version will no longer register as a cache hit. */ - public static readonly buildCacheVersion: 1 = 1; + public static readonly buildCacheVersion: 2 = 2; /** * Cobuild configuration file. diff --git a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts index d603c094547..759df8b7e7b 100644 --- a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts +++ b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts @@ -1,18 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import * as path from 'path'; import * as crypto from 'crypto'; import { FileSystem, type FolderItem, InternalError, Async } from '@rushstack/node-core-library'; import type { ITerminal } from '@rushstack/terminal'; +import { zipSyncWorkerAsync } from '@rushstack/zipsync/lib/zipSyncWorkerAsync'; import type { RushConfigurationProject } from '../../api/RushConfigurationProject'; import type { BuildCacheConfiguration } from '../../api/BuildCacheConfiguration'; import type { ICloudBuildCacheProvider } from './ICloudBuildCacheProvider'; import type { FileSystemBuildCacheProvider } from './FileSystemBuildCacheProvider'; -import { TarExecutable } from '../../utilities/TarExecutable'; -import { EnvironmentVariableNames } from '../../api/EnvironmentConfiguration'; import type { IOperationExecutionResult } from '../operations/IOperationExecutionResult'; /** @@ -60,8 +58,6 @@ interface IPathsToCache { * @internal */ export class OperationBuildCache { - private static _tarUtilityPromise: Promise | undefined; - private readonly _project: RushConfigurationProject; private readonly _localBuildCacheProvider: FileSystemBuildCacheProvider; private readonly _cloudBuildCacheProvider: ICloudBuildCacheProvider | undefined; @@ -90,14 +86,6 @@ export class OperationBuildCache { this._cacheId = cacheId; } - private static _tryGetTarUtility(terminal: ITerminal): Promise { - if (!OperationBuildCache._tarUtilityPromise) { - OperationBuildCache._tarUtilityPromise = TarExecutable.tryInitializeAsync(terminal); - } - - return OperationBuildCache._tarUtilityPromise; - } - public get cacheId(): string | undefined { return this._cacheId; } @@ -176,32 +164,40 @@ export class OperationBuildCache { const projectFolderPath: string = this._project.projectFolder; - // Purge output folders - terminal.writeVerboseLine(`Clearing cached folders: ${this._projectOutputFolderNames.join(', ')}`); - await Promise.all( - this._projectOutputFolderNames.map((outputFolderName: string) => - FileSystem.deleteFolderAsync(`${projectFolderPath}/${outputFolderName}`) - ) - ); - - const tarUtility: TarExecutable | undefined = await OperationBuildCache._tryGetTarUtility(terminal); let restoreSuccess: boolean = false; - if (tarUtility && localCacheEntryPath) { - const logFilePath: string = this._getTarLogFilePath(cacheId, 'untar'); - const tarExitCode: number = await tarUtility.tryUntarAsync({ - archivePath: localCacheEntryPath, - outputFolderPath: projectFolderPath, - logFilePath + terminal.writeVerboseLine(`Using zipsync to restore cached folders.`); + try { + const { + zipSyncReturn: { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } + } = await zipSyncWorkerAsync({ + mode: 'unpack', + compression: 'deflate', + archivePath: localCacheEntryPath!, + targetDirectories: this._projectOutputFolderNames, + baseDir: projectFolderPath }); - if (tarExitCode === 0) { - restoreSuccess = true; - terminal.writeLine('Successfully restored output from the build cache.'); - } else { - terminal.writeWarningLine( - 'Unable to restore output from the build cache. ' + - `See "${logFilePath}" for logs from the tar process.` + terminal.writeVerboseLine(`Restored ${filesExtracted + filesSkipped} files from cache.`); + if (filesExtracted > 0) { + terminal.writeVerboseLine(`Extracted ${filesExtracted} files to target folders.`); + } + if (filesSkipped > 0) { + terminal.writeVerboseLine(`Skipped ${filesSkipped} files that were already up to date.`); + } + if (filesDeleted > 0) { + terminal.writeVerboseLine(`Deleted ${filesDeleted} files from target folders.`); + } + if (foldersDeleted > 0) { + terminal.writeVerboseLine(`Deleted ${foldersDeleted} empty folders from target folders.`); + } + if (otherEntriesDeleted > 0) { + terminal.writeVerboseLine( + `Deleted ${otherEntriesDeleted} items (e.g. symbolic links) from target folders.` ); } + restoreSuccess = true; + terminal.writeLine('Successfully restored output from the build cache.'); + } catch (e) { + terminal.writeWarningLine(`Unable to restore output from the build cache: ${e}`); } if (updateLocalCacheSuccess === false) { @@ -234,59 +230,55 @@ export class OperationBuildCache { let localCacheEntryPath: string | undefined; - const tarUtility: TarExecutable | undefined = await OperationBuildCache._tryGetTarUtility(terminal); - if (tarUtility) { - const finalLocalCacheEntryPath: string = this._localBuildCacheProvider.getCacheEntryPath(cacheId); - - // Derive the temp file from the destination path to ensure they are on the same volume - // In the case of a shared network drive containing the build cache, we also need to make - // sure the the temp path won't be shared by two parallel rush builds. - const randomSuffix: string = crypto.randomBytes(8).toString('hex'); - const tempLocalCacheEntryPath: string = `${finalLocalCacheEntryPath}-${randomSuffix}.temp`; - - const logFilePath: string = this._getTarLogFilePath(cacheId, 'tar'); - const tarExitCode: number = await tarUtility.tryCreateArchiveFromProjectPathsAsync({ + const finalLocalCacheEntryPath: string = this._localBuildCacheProvider.getCacheEntryPath(cacheId); + + // Derive the temp file from the destination path to ensure they are on the same volume + // In the case of a shared network drive containing the build cache, we also need to make + // sure the the temp path won't be shared by two parallel rush builds. + const randomSuffix: string = crypto.randomBytes(8).toString('hex'); + const tempLocalCacheEntryPath: string = `${finalLocalCacheEntryPath}-${randomSuffix}.temp`; + + terminal.writeVerboseLine(`Using zipsync to create cache archive.`); + try { + const { + zipSyncReturn: { filesPacked } + } = await zipSyncWorkerAsync({ + mode: 'pack', + compression: 'deflate', archivePath: tempLocalCacheEntryPath, - paths: filesToCache.outputFilePaths, - project: this._project, - logFilePath + targetDirectories: this._projectOutputFolderNames, + baseDir: this._project.projectFolder }); + terminal.writeVerboseLine(`Packed ${filesPacked} files for caching.`); - if (tarExitCode === 0) { - // Move after the archive is finished so that if the process is interrupted we aren't left with an invalid file + // Move after the archive is finished so that if the process is interrupted we aren't left with an invalid file + try { + await Async.runWithRetriesAsync({ + action: () => + FileSystem.moveAsync({ + sourcePath: tempLocalCacheEntryPath, + destinationPath: finalLocalCacheEntryPath, + overwrite: true + }), + maxRetries: 2, + retryDelayMs: 500 + }); + } catch (moveError) { try { - await Async.runWithRetriesAsync({ - action: () => - FileSystem.moveAsync({ - sourcePath: tempLocalCacheEntryPath, - destinationPath: finalLocalCacheEntryPath, - overwrite: true - }), - maxRetries: 2, - retryDelayMs: 500 - }); - } catch (moveError) { - try { - await FileSystem.deleteFileAsync(tempLocalCacheEntryPath); - } catch (deleteError) { - // Ignored - } - throw moveError; + await FileSystem.deleteFileAsync(tempLocalCacheEntryPath); + } catch (deleteError) { + // Ignored } - localCacheEntryPath = finalLocalCacheEntryPath; - } else { - terminal.writeWarningLine( - `"tar" exited with code ${tarExitCode} while attempting to create the cache entry. ` + - `See "${logFilePath}" for logs from the tar process.` - ); - return false; + throw moveError; } - } else { - terminal.writeWarningLine( - `Unable to locate "tar". Please ensure that "tar" is on your PATH environment variable, or set the ` + - `${EnvironmentVariableNames.RUSH_TAR_BINARY_PATH} environment variable to the full path to the "tar" binary.` - ); - return false; + localCacheEntryPath = finalLocalCacheEntryPath; + } catch (e) { + try { + await FileSystem.deleteFileAsync(tempLocalCacheEntryPath); + } catch (deleteError) { + // Ignored + } + throw e; } let cacheEntryBuffer: Buffer | undefined; @@ -395,10 +387,6 @@ export class OperationBuildCache { }; } - private _getTarLogFilePath(cacheId: string, mode: 'tar' | 'untar'): string { - return path.join(this._project.projectRushTempFolder, `${cacheId}.${mode}.log`); - } - private static _getCacheId(options: IProjectBuildCacheOptions): string | undefined { const { buildCacheConfiguration, diff --git a/libraries/ts-command-line/src/index.ts b/libraries/ts-command-line/src/index.ts index dc4b17d60e9..f88f6c55c49 100644 --- a/libraries/ts-command-line/src/index.ts +++ b/libraries/ts-command-line/src/index.ts @@ -40,7 +40,10 @@ export { CommandLineStringParameter, type IRequiredCommandLineStringParameter } from './parameters/CommandLineStringParameter'; -export { CommandLineStringListParameter } from './parameters/CommandLineStringListParameter'; +export { + CommandLineStringListParameter, + type IRequiredCommandLineStringListParameter +} from './parameters/CommandLineStringListParameter'; export { CommandLineIntegerParameter, type IRequiredCommandLineIntegerParameter diff --git a/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts b/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts index c62eed59971..ade590da365 100644 --- a/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts +++ b/libraries/ts-command-line/src/parameters/CommandLineStringListParameter.ts @@ -5,6 +5,14 @@ import type { ICommandLineStringListDefinition } from './CommandLineDefinition'; import { CommandLineParameterWithArgument, CommandLineParameterKind } from './BaseClasses'; import { EnvironmentVariableParser } from './EnvironmentVariableParser'; +/** + * The data type returned by {@link CommandLineParameterProvider.(defineStringParameter:2)}. + * @public + */ +export interface IRequiredCommandLineStringListParameter extends CommandLineStringListParameter { + values: ReadonlyArray; +} + /** * The data type returned by {@link CommandLineParameterProvider.defineStringListParameter}. * @public diff --git a/libraries/ts-command-line/src/providers/CommandLineParser.ts b/libraries/ts-command-line/src/providers/CommandLineParser.ts index 7855cd5a301..a72ae6d4f7d 100644 --- a/libraries/ts-command-line/src/providers/CommandLineParser.ts +++ b/libraries/ts-command-line/src/providers/CommandLineParser.ts @@ -2,7 +2,7 @@ // See LICENSE in the project root for license information. import type * as argparse from 'argparse'; -import { Colorize } from '@rushstack/terminal'; +import { Colorize } from '@rushstack/terminal/lib/Colorize'; import type { CommandLineAction } from './CommandLineAction'; import type { AliasCommandLineAction } from './AliasCommandLineAction'; diff --git a/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts b/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts index a535c1e77cc..d72c558c34f 100644 --- a/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts +++ b/libraries/ts-command-line/src/test/ScopedCommandLineAction.test.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import { AnsiEscape } from '@rushstack/terminal'; +import { AnsiEscape } from '@rushstack/terminal/lib/AnsiEscape'; import { ScopedCommandLineAction } from '../providers/ScopedCommandLineAction'; import type { CommandLineStringParameter } from '../parameters/CommandLineStringParameter'; diff --git a/rush.json b/rush.json index 80a47febb84..c4ef84b429a 100644 --- a/rush.json +++ b/rush.json @@ -489,6 +489,12 @@ "reviewCategory": "libraries", "shouldPublish": true }, + { + "packageName": "@rushstack/zipsync", + "projectFolder": "apps/zipsync", + "reviewCategory": "libraries", + "shouldPublish": true + }, // "build-tests" folder (alphabetical order) { From 5b3086182ee4485eb131ba732e9345c7bb270355 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Mon, 15 Sep 2025 20:36:48 -0700 Subject: [PATCH 02/20] update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6c88b571601..7966c03d7dd 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ These GitHub repositories provide supplementary resources for Rush Stack: | [/apps/rush](./apps/rush/) | [![npm version](https://badge.fury.io/js/%40microsoft%2Frush.svg)](https://badge.fury.io/js/%40microsoft%2Frush) | [changelog](./apps/rush/CHANGELOG.md) | [@microsoft/rush](https://www.npmjs.com/package/@microsoft/rush) | | [/apps/rush-mcp-server](./apps/rush-mcp-server/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Fmcp-server.svg)](https://badge.fury.io/js/%40rushstack%2Fmcp-server) | [changelog](./apps/rush-mcp-server/CHANGELOG.md) | [@rushstack/mcp-server](https://www.npmjs.com/package/@rushstack/mcp-server) | | [/apps/trace-import](./apps/trace-import/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Ftrace-import.svg)](https://badge.fury.io/js/%40rushstack%2Ftrace-import) | [changelog](./apps/trace-import/CHANGELOG.md) | [@rushstack/trace-import](https://www.npmjs.com/package/@rushstack/trace-import) | +| [/apps/zipsync](./apps/zipsync/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Fzipsync.svg)](https://badge.fury.io/js/%40rushstack%2Fzipsync) | [changelog](./apps/zipsync/CHANGELOG.md) | [@rushstack/zipsync](https://www.npmjs.com/package/@rushstack/zipsync) | | [/eslint/eslint-bulk](./eslint/eslint-bulk/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Feslint-bulk.svg)](https://badge.fury.io/js/%40rushstack%2Feslint-bulk) | [changelog](./eslint/eslint-bulk/CHANGELOG.md) | [@rushstack/eslint-bulk](https://www.npmjs.com/package/@rushstack/eslint-bulk) | | [/eslint/eslint-config](./eslint/eslint-config/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Feslint-config.svg)](https://badge.fury.io/js/%40rushstack%2Feslint-config) | [changelog](./eslint/eslint-config/CHANGELOG.md) | [@rushstack/eslint-config](https://www.npmjs.com/package/@rushstack/eslint-config) | | [/eslint/eslint-patch](./eslint/eslint-patch/) | [![npm version](https://badge.fury.io/js/%40rushstack%2Feslint-patch.svg)](https://badge.fury.io/js/%40rushstack%2Feslint-patch) | [changelog](./eslint/eslint-patch/CHANGELOG.md) | [@rushstack/eslint-patch](https://www.npmjs.com/package/@rushstack/eslint-patch) | From 118be3d90d0771a6243c73d0cf0e66a42c9aaad6 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Mon, 15 Sep 2025 17:42:28 -0700 Subject: [PATCH 03/20] rush change --- .../rush/bmiddha-zipsync-3_2025-09-16-00-42.json | 10 ++++++++++ .../bmiddha-zipsync-3_2025-09-16-00-42.json | 10 ++++++++++ .../bmiddha-zipsync-3_2025-09-16-00-42.json | 10 ++++++++++ .../zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json | 10 ++++++++++ 4 files changed, 40 insertions(+) create mode 100644 common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json create mode 100644 common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json create mode 100644 common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json create mode 100644 common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json diff --git a/common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..1dea9e02004 --- /dev/null +++ b/common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@microsoft/rush", + "comment": "Add support for zipsync for build cache packing and unpacking.", + "type": "none" + } + ], + "packageName": "@microsoft/rush" +} \ No newline at end of file diff --git a/common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..51759d89a49 --- /dev/null +++ b/common/changes/@rushstack/lookup-by-path/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/lookup-by-path", + "comment": "Expose getNodeAtPrefix API to allow getting nodes with undefined values.", + "type": "patch" + } + ], + "packageName": "@rushstack/lookup-by-path" +} \ No newline at end of file diff --git a/common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..211ddf456cc --- /dev/null +++ b/common/changes/@rushstack/rig-package/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/ts-command-line", + "comment": "Add IRequiredCommandLineStringListParameter.", + "type": "patch" + } + ], + "packageName": "@rushstack/ts-command-line" +} \ No newline at end of file diff --git a/common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json new file mode 100644 index 00000000000..327c9aae6dd --- /dev/null +++ b/common/changes/@rushstack/zipsync/bmiddha-zipsync-3_2025-09-16-00-42.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/zipsync", + "comment": "Add zipsync tool to pack and unpack build cache entries.", + "type": "patch" + } + ], + "packageName": "@rushstack/zipsync" +} \ No newline at end of file From 0acedae8abe82ebb2f1b185aa3052f05a97da616 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Tue, 16 Sep 2025 15:45:41 -0700 Subject: [PATCH 04/20] address pr feedback --- apps/zipsync/eslint.config.js | 3 - apps/zipsync/package.json | 2 - apps/zipsync/src/ZipSyncCommandLineParser.ts | 7 +- apps/zipsync/src/benchmark.test.ts | 1 + apps/zipsync/src/crc32.test.ts | 1 + apps/zipsync/src/crc32.ts | 12 +- apps/zipsync/src/start.ts | 2 +- apps/zipsync/src/zipSync.ts | 44 +++--- apps/zipsync/src/zipUtils.ts | 149 ++++++++---------- .../rush/browser-approved-packages.json | 4 - .../rush/nonbrowser-approved-packages.json | 4 + .../build-tests-subspace/repo-state.json | 2 +- .../config/subspaces/default/pnpm-lock.yaml | 6 - .../logic/buildCache/OperationBuildCache.ts | 25 +-- 14 files changed, 110 insertions(+), 152 deletions(-) diff --git a/apps/zipsync/eslint.config.js b/apps/zipsync/eslint.config.js index ceb5a1bee40..c15e6077310 100644 --- a/apps/zipsync/eslint.config.js +++ b/apps/zipsync/eslint.config.js @@ -13,9 +13,6 @@ module.exports = [ parserOptions: { tsconfigRootDir: __dirname } - }, - rules: { - 'no-console': 'off' } } ]; diff --git a/apps/zipsync/package.json b/apps/zipsync/package.json index 9c9960e0857..47dd8a2455d 100644 --- a/apps/zipsync/package.json +++ b/apps/zipsync/package.json @@ -21,13 +21,11 @@ "@rushstack/node-core-library": "workspace:*", "@rushstack/terminal": "workspace:*", "@rushstack/ts-command-line": "workspace:*", - "semver": "~7.5.4", "typescript": "~5.8.2", "@rushstack/lookup-by-path": "workspace:*" }, "devDependencies": { "@rushstack/heft": "workspace:*", - "@types/semver": "7.5.0", "eslint": "~9.25.1", "local-node-rig": "workspace:*" } diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts index 66bae3de4e2..08ecf89ed03 100644 --- a/apps/zipsync/src/ZipSyncCommandLineParser.ts +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -10,7 +10,6 @@ import type { CommandLineChoiceParameter } from '@rushstack/ts-command-line/lib/index'; import { InternalError } from '@rushstack/node-core-library/lib/InternalError'; -import { Colorize } from '@rushstack/terminal/lib/Colorize'; import type { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; @@ -110,11 +109,7 @@ export class ZipSyncCommandLineParser extends CommandLineParser { compression: (this._compressionParameter.value as 'store' | 'deflate' | 'auto' | undefined) ?? 'auto' }); } catch (error) { - if (this._debugParameter.value) { - console.error('\n' + error.stack); - } else { - console.error('\n' + Colorize.red('ERROR: ' + error.message.trim())); - } + this._terminal.writeErrorLine('\n' + error.stack); } } } diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index c1123687d1b..09bd37a2cbb 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -1,5 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. +/* eslint-disable no-console */ import { execSync } from 'child_process'; import { tmpdir } from 'os'; diff --git a/apps/zipsync/src/crc32.test.ts b/apps/zipsync/src/crc32.test.ts index 60a4e76019f..59682bca311 100644 --- a/apps/zipsync/src/crc32.test.ts +++ b/apps/zipsync/src/crc32.test.ts @@ -7,6 +7,7 @@ import * as zlib from 'zlib'; describe('crc32', () => { it('fallbackCrc32 should match zlib.crc32', () => { if (!zlib.crc32) { + // eslint-disable-next-line no-console console.log('Skipping test because zlib.crc32 is not available in this Node.js version'); return; } diff --git a/apps/zipsync/src/crc32.ts b/apps/zipsync/src/crc32.ts index bc01a1171d1..18017c58852 100644 --- a/apps/zipsync/src/crc32.ts +++ b/apps/zipsync/src/crc32.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import * as zlib from 'zlib'; +import * as zlib from 'node:zlib'; let crcTable: Uint32Array | undefined; @@ -35,11 +35,5 @@ export function fallbackCrc32(data: Buffer, value: number = 0): return value; } -export function crc32Builder(data: Buffer, value: number = 0): number { - if (zlib.crc32) { - return zlib.crc32(data, value); - } else { - // Fallback implementation for Node.js versions older than 20 - return fallbackCrc32(data, value); - } -} +export const crc32Builder: (data: Buffer, value?: number) => number = + zlib.crc32 ?? fallbackCrc32; diff --git a/apps/zipsync/src/start.ts b/apps/zipsync/src/start.ts index 33920ce2d7b..f1bf39e55c0 100644 --- a/apps/zipsync/src/start.ts +++ b/apps/zipsync/src/start.ts @@ -1,10 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import { version } from '../package.json'; import { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; import { Terminal } from '@rushstack/terminal/lib/Terminal'; +import { version } from '../package.json'; import { ZipSyncCommandLineParser } from './ZipSyncCommandLineParser'; const toolVersion: string = version; diff --git a/apps/zipsync/src/zipSync.ts b/apps/zipsync/src/zipSync.ts index 7fb506deabb..a76ee08323c 100644 --- a/apps/zipsync/src/zipSync.ts +++ b/apps/zipsync/src/zipSync.ts @@ -1,11 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as crypto from 'node:crypto'; +import * as zlib from 'node:zlib'; import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; -import * as fs from 'fs'; -import * as path from 'path'; -import * as crypto from 'crypto'; -import * as zlib from 'zlib'; import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; import { crc32Builder } from './crc32'; import { DISPOSE_SYMBOL, getDisposableFileHandle, type IDisposableFileHandle } from './disposableFileHandle'; @@ -24,7 +24,8 @@ import { type ZipMetaCompressionMethod, type IEndOfCentralDirectory, type ICentralDirectoryHeaderParseResult, - type IFileEntry + type IFileEntry, + dosDateTime } from './zipUtils'; const METADATA_FILENAME: string = '__zipsync_metadata__.json'; @@ -192,7 +193,13 @@ export function zipSync( } currentOffset += offset; } + function writeChunksToZip(chunks: Uint8Array[]): void { + for (const chunk of chunks) { + writeChunkToZip(chunk); + } + } + const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); function writeFileEntry(relativePath: string): IFileEntry { function isLikelyAlreadyCompressed(filename: string): boolean { return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); @@ -244,10 +251,11 @@ export function zipSync( crc32: 0, sha1Hash: '', localHeaderOffset: currentOffset, - compressionMethod + compressionMethod, + dosDateTime: dosDateTimeNow }; - writeChunkToZip(writeLocalFileHeader(entry)); + writeChunksToZip(writeLocalFileHeader(entry)); const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); let crc32: number = 0; @@ -350,10 +358,11 @@ export function zipSync( crc32: crc32Builder(metadataBuffer), sha1Hash: calculateSHA1(metadataBuffer), localHeaderOffset: currentOffset, - compressionMethod: metadataCompressionMethod + compressionMethod: metadataCompressionMethod, + dosDateTime: dosDateTimeNow }; - writeChunkToZip(writeLocalFileHeader(metadataEntry)); + writeChunksToZip(writeLocalFileHeader(metadataEntry)); writeChunkToZip(metadataData, metadataCompressedSize); writeChunkToZip(writeDataDescriptor(metadataEntry)); @@ -371,26 +380,15 @@ export function zipSync( markStart('pack.write.centralDirectory'); const centralDirOffset: number = currentOffset; - let centralDirSize: number = 0; - for (const entry of entries) { - const centralHeader: Buffer = writeCentralDirectoryHeader(entry); - fs.writeSync(zipFile, centralHeader); - centralDirSize += centralHeader.length; + writeChunksToZip(writeCentralDirectoryHeader(entry)); } - terminal.writeDebugLine( - `Central directory written (offset=${centralDirOffset}, size=${centralDirSize})` - ); + const centralDirSize: number = currentOffset - centralDirOffset; markEnd('pack.write.centralDirectory'); // Write end of central directory markStart('pack.write.eocd'); - const endOfCentralDir: Buffer = writeEndOfCentralDirectory( - centralDirOffset, - centralDirSize, - entries.length - ); - fs.writeSync(zipFile, endOfCentralDir); + writeChunkToZip(writeEndOfCentralDirectory(centralDirOffset, centralDirSize, entries.length)); terminal.writeDebugLine('EOCD record written'); markEnd('pack.write.eocd'); } finally { diff --git a/apps/zipsync/src/zipUtils.ts b/apps/zipsync/src/zipUtils.ts index 2bdad1975de..746fb9dd71e 100644 --- a/apps/zipsync/src/zipUtils.ts +++ b/apps/zipsync/src/zipUtils.ts @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. +// zip spec: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + const LOCAL_FILE_HEADER_SIGNATURE: number = 0x04034b50; const CENTRAL_DIR_HEADER_SIGNATURE: number = 0x02014b50; const END_OF_CENTRAL_DIR_SIGNATURE: number = 0x06054b50; @@ -18,6 +20,7 @@ export interface IFileEntry { sha1Hash: string; localHeaderOffset: number; compressionMethod: ZipMetaCompressionMethod; + dosDateTime: { time: number; date: number }; } export interface ILocalFileHeader { @@ -81,158 +84,143 @@ function readUInt16LE(buffer: Buffer, offset: number): number { return buffer.readUInt16LE(offset); } -function dosDateTime(date: Date): { time: number; date: number } { +export function dosDateTime(date: Date): { time: number; date: number } { + /* eslint-disable no-bitwise */ const time: number = - // eslint-disable-next-line no-bitwise - ((date.getHours() & 0x1f) << 11) | - // eslint-disable-next-line no-bitwise - ((date.getMinutes() & 0x3f) << 5) | - // eslint-disable-next-line no-bitwise - ((date.getSeconds() / 2) & 0x1f); + ((date.getHours() & 0x1f) << 11) | ((date.getMinutes() & 0x3f) << 5) | ((date.getSeconds() / 2) & 0x1f); const dateVal: number = - // eslint-disable-next-line no-bitwise (((date.getFullYear() - 1980) & 0x7f) << 9) | - // eslint-disable-next-line no-bitwise (((date.getMonth() + 1) & 0xf) << 5) | - // eslint-disable-next-line no-bitwise (date.getDate() & 0x1f); + /* eslint-enable no-bitwise */ return { time, date: dateVal }; } -export function writeLocalFileHeader(entry: IFileEntry): Buffer { +const localFileHeaderBuffer: Buffer = Buffer.allocUnsafe(30); +export function writeLocalFileHeader( + entry: IFileEntry +): [fileHeaderWithoutVariableLengthData: Buffer, fileHeaderVariableLengthData: Buffer] { const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); - const headerSize: number = 30 + filenameBuffer.length; - const header: Buffer = Buffer.allocUnsafeSlow(headerSize); - const now: Date = new Date(); - const { time, date } = dosDateTime(now); + const { time, date } = entry.dosDateTime; let offset: number = 0; - writeUInt32LE(header, LOCAL_FILE_HEADER_SIGNATURE, offset); + writeUInt32LE(localFileHeaderBuffer, LOCAL_FILE_HEADER_SIGNATURE, offset); offset += 4; - writeUInt16LE(header, 20, offset); // version needed + writeUInt16LE(localFileHeaderBuffer, 20, offset); // version needed offset += 2; // General purpose bit flag: set bit 3 (0x0008) to indicate presence of data descriptor // Per APPNOTE: when bit 3 is set, CRC-32 and sizes in local header are set to zero and // the actual values are stored in the data descriptor that follows the file data. - writeUInt16LE(header, 0x0008, offset); // flags (data descriptor) + writeUInt16LE(localFileHeaderBuffer, 0x0008, offset); // flags (data descriptor) offset += 2; - writeUInt16LE(header, entry.compressionMethod, offset); // compression method (0=store,8=deflate) + writeUInt16LE(localFileHeaderBuffer, entry.compressionMethod, offset); // compression method (0=store,8=deflate) offset += 2; - writeUInt16LE(header, time, offset); // last mod time + writeUInt16LE(localFileHeaderBuffer, time, offset); // last mod time offset += 2; - writeUInt16LE(header, date, offset); // last mod date + writeUInt16LE(localFileHeaderBuffer, date, offset); // last mod date offset += 2; // With bit 3 set, these three fields MUST be zero in the local header - writeUInt32LE(header, 0, offset); // crc32 (placeholder, real value in data descriptor) + writeUInt32LE(localFileHeaderBuffer, 0, offset); // crc32 (placeholder, real value in data descriptor) offset += 4; - writeUInt32LE(header, 0, offset); // compressed size (placeholder) + writeUInt32LE(localFileHeaderBuffer, 0, offset); // compressed size (placeholder) offset += 4; - writeUInt32LE(header, 0, offset); // uncompressed size (placeholder) + writeUInt32LE(localFileHeaderBuffer, 0, offset); // uncompressed size (placeholder) offset += 4; - writeUInt16LE(header, filenameBuffer.length, offset); // filename length + writeUInt16LE(localFileHeaderBuffer, filenameBuffer.length, offset); // filename length offset += 2; - writeUInt16LE(header, 0, offset); // extra field length + writeUInt16LE(localFileHeaderBuffer, 0, offset); // extra field length offset += 2; - filenameBuffer.copy(header, offset); - offset += filenameBuffer.length; - - return header; + return [localFileHeaderBuffer, filenameBuffer]; } -export function writeCentralDirectoryHeader(entry: IFileEntry): Buffer { +const centralDirHeaderBuffer: Buffer = Buffer.allocUnsafe(46); +export function writeCentralDirectoryHeader(entry: IFileEntry): Buffer[] { const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); - const headerSize: number = 46 + filenameBuffer.length; - const header: Buffer = Buffer.alloc(headerSize); const now: Date = new Date(); const { time, date } = dosDateTime(now); let offset: number = 0; - writeUInt32LE(header, CENTRAL_DIR_HEADER_SIGNATURE, offset); + writeUInt32LE(centralDirHeaderBuffer, CENTRAL_DIR_HEADER_SIGNATURE, offset); offset += 4; - writeUInt16LE(header, 20, offset); // version made by + writeUInt16LE(centralDirHeaderBuffer, 20, offset); // version made by offset += 2; - writeUInt16LE(header, 20, offset); // version needed + writeUInt16LE(centralDirHeaderBuffer, 20, offset); // version needed offset += 2; // Mirror flags used in local header (bit 3 set to indicate data descriptor was used) - writeUInt16LE(header, 0x0008, offset); // flags + writeUInt16LE(centralDirHeaderBuffer, 0x0008, offset); // flags offset += 2; - writeUInt16LE(header, entry.compressionMethod, offset); // compression method + writeUInt16LE(centralDirHeaderBuffer, entry.compressionMethod, offset); // compression method offset += 2; - writeUInt16LE(header, time, offset); // last mod time + writeUInt16LE(centralDirHeaderBuffer, time, offset); // last mod time offset += 2; - writeUInt16LE(header, date, offset); // last mod date + writeUInt16LE(centralDirHeaderBuffer, date, offset); // last mod date offset += 2; - writeUInt32LE(header, entry.crc32, offset); // crc32 + writeUInt32LE(centralDirHeaderBuffer, entry.crc32, offset); // crc32 offset += 4; - writeUInt32LE(header, entry.compressedSize, offset); // compressed size + writeUInt32LE(centralDirHeaderBuffer, entry.compressedSize, offset); // compressed size offset += 4; - writeUInt32LE(header, entry.size, offset); // uncompressed size + writeUInt32LE(centralDirHeaderBuffer, entry.size, offset); // uncompressed size offset += 4; - writeUInt16LE(header, filenameBuffer.length, offset); // filename length + writeUInt16LE(centralDirHeaderBuffer, filenameBuffer.length, offset); // filename length offset += 2; - writeUInt16LE(header, 0, offset); // extra field length + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // extra field length offset += 2; - writeUInt16LE(header, 0, offset); // comment length + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // comment length offset += 2; - writeUInt16LE(header, 0, offset); // disk number start + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // disk number start offset += 2; - writeUInt16LE(header, 0, offset); // internal file attributes + writeUInt16LE(centralDirHeaderBuffer, 0, offset); // internal file attributes offset += 2; - writeUInt32LE(header, 0, offset); // external file attributes + writeUInt32LE(centralDirHeaderBuffer, 0, offset); // external file attributes offset += 4; - writeUInt32LE(header, entry.localHeaderOffset, offset); // local header offset + writeUInt32LE(centralDirHeaderBuffer, entry.localHeaderOffset, offset); // local header offset offset += 4; - filenameBuffer.copy(header, offset); - offset += filenameBuffer.length; - - return header; + return [centralDirHeaderBuffer, filenameBuffer]; } +const dataDescriptorBuffer: Buffer = Buffer.allocUnsafe(16); export function writeDataDescriptor(entry: IFileEntry): Buffer { - // We include the optional signature for robustness (APPNOTE allows it) - const descriptor: Buffer = Buffer.alloc(16); let offset: number = 0; - writeUInt32LE(descriptor, DATA_DESCRIPTOR_SIGNATURE, offset); // signature PK\x07\x08 + writeUInt32LE(dataDescriptorBuffer, DATA_DESCRIPTOR_SIGNATURE, offset); // signature PK\x07\x08 offset += 4; - writeUInt32LE(descriptor, entry.crc32, offset); // crc32 + writeUInt32LE(dataDescriptorBuffer, entry.crc32, offset); // crc32 offset += 4; - writeUInt32LE(descriptor, entry.compressedSize, offset); // compressed size + writeUInt32LE(dataDescriptorBuffer, entry.compressedSize, offset); // compressed size offset += 4; - writeUInt32LE(descriptor, entry.size, offset); // uncompressed size - return descriptor; + writeUInt32LE(dataDescriptorBuffer, entry.size, offset); // uncompressed size + return dataDescriptorBuffer; } +const endOfCentralDirBuffer: Buffer = Buffer.allocUnsafe(22); export function writeEndOfCentralDirectory( centralDirOffset: number, centralDirSize: number, entryCount: number ): Buffer { - const header: Buffer = Buffer.alloc(22); - let offset: number = 0; - writeUInt32LE(header, END_OF_CENTRAL_DIR_SIGNATURE, offset); + writeUInt32LE(endOfCentralDirBuffer, END_OF_CENTRAL_DIR_SIGNATURE, offset); offset += 4; - writeUInt16LE(header, 0, offset); // disk number + writeUInt16LE(endOfCentralDirBuffer, 0, offset); // disk number offset += 2; - writeUInt16LE(header, 0, offset); // central dir start disk + writeUInt16LE(endOfCentralDirBuffer, 0, offset); // central dir start disk offset += 2; - writeUInt16LE(header, entryCount, offset); // central dir records on disk + writeUInt16LE(endOfCentralDirBuffer, entryCount, offset); // central dir records on disk offset += 2; - writeUInt16LE(header, entryCount, offset); // total central dir records + writeUInt16LE(endOfCentralDirBuffer, entryCount, offset); // total central dir records offset += 2; - writeUInt32LE(header, centralDirSize, offset); // central dir size + writeUInt32LE(endOfCentralDirBuffer, centralDirSize, offset); // central dir size offset += 4; - writeUInt32LE(header, centralDirOffset, offset); // central dir offset + writeUInt32LE(endOfCentralDirBuffer, centralDirOffset, offset); // central dir offset offset += 4; - writeUInt16LE(header, 0, offset); // comment length + writeUInt16LE(endOfCentralDirBuffer, 0, offset); // comment length - return header; + return endOfCentralDirBuffer; } interface ILocalFileHeaderParseResult { @@ -270,7 +258,6 @@ export function parseLocalFileHeader(buffer: Buffer, offset: number): ILocalFile export interface ICentralDirectoryHeaderParseResult { header: ICentralDirectoryHeader; filename: string; - sha1Hash: string; nextOffset: number; } @@ -304,24 +291,14 @@ export function parseCentralDirectoryHeader( localHeaderOffset: readUInt32LE(buffer, offset + 42) }; - const filename: string = buffer.slice(offset + 46, offset + 46 + header.filenameLength).toString('utf8'); - - let sha1Hash: string = ''; - if (header.extraFieldLength > 0) { - const extraFieldOffset: number = offset + 46 + header.filenameLength; - const extraFieldId: number = readUInt16LE(buffer, extraFieldOffset); - const extraFieldSize: number = readUInt16LE(buffer, extraFieldOffset + 2); + offset += 46; - if (extraFieldId === 0x0001 && extraFieldSize === 20) { - sha1Hash = buffer.slice(extraFieldOffset + 4, extraFieldOffset + 4 + extraFieldSize).toString('hex'); - } - } + const filename: string = buffer.toString('utf8', offset, offset + header.filenameLength); return { header, filename, - sha1Hash, - nextOffset: offset + 46 + header.filenameLength + header.extraFieldLength + header.commentLength + nextOffset: offset + header.filenameLength + header.extraFieldLength + header.commentLength }; } diff --git a/common/config/rush/browser-approved-packages.json b/common/config/rush/browser-approved-packages.json index 95d8a2e05c5..23c585145b8 100644 --- a/common/config/rush/browser-approved-packages.json +++ b/common/config/rush/browser-approved-packages.json @@ -50,10 +50,6 @@ "name": "@rushstack/rush-vscode-command-webview", "allowedCategories": [ "vscode-extensions" ] }, - { - "name": "@rushstack/zipsync", - "allowedCategories": [ "libraries" ] - }, { "name": "@ungap/structured-clone", "allowedCategories": [ "libraries" ] diff --git a/common/config/rush/nonbrowser-approved-packages.json b/common/config/rush/nonbrowser-approved-packages.json index b2dde4e55ed..7b3939e4cf7 100644 --- a/common/config/rush/nonbrowser-approved-packages.json +++ b/common/config/rush/nonbrowser-approved-packages.json @@ -370,6 +370,10 @@ "name": "@rushstack/worker-pool", "allowedCategories": [ "libraries" ] }, + { + "name": "@rushstack/zipsync", + "allowedCategories": [ "libraries" ] + }, { "name": "@serverless-stack/aws-lambda-ric", "allowedCategories": [ "tests" ] diff --git a/common/config/subspaces/build-tests-subspace/repo-state.json b/common/config/subspaces/build-tests-subspace/repo-state.json index 1132889f823..e69eed3e56e 100644 --- a/common/config/subspaces/build-tests-subspace/repo-state.json +++ b/common/config/subspaces/build-tests-subspace/repo-state.json @@ -2,5 +2,5 @@ { "pnpmShrinkwrapHash": "2ac01ba33e09661dc0e7d7faa36d215bb3d3b91e", "preferredVersionsHash": "550b4cee0bef4e97db6c6aad726df5149d20e7d9", - "packageJsonInjectedDependenciesHash": "b4fb0c03638f2decf92e0951d76b292ee931e138" + "packageJsonInjectedDependenciesHash": "3d0c925bcd727d09159fa60c410e6ddf4f2f5484" } diff --git a/common/config/subspaces/default/pnpm-lock.yaml b/common/config/subspaces/default/pnpm-lock.yaml index f9f41386122..754bb1a892e 100644 --- a/common/config/subspaces/default/pnpm-lock.yaml +++ b/common/config/subspaces/default/pnpm-lock.yaml @@ -463,9 +463,6 @@ importers: '@rushstack/ts-command-line': specifier: workspace:* version: link:../../libraries/ts-command-line - semver: - specifier: ~7.5.4 - version: 7.5.4 typescript: specifier: ~5.8.2 version: 5.8.2 @@ -473,9 +470,6 @@ importers: '@rushstack/heft': specifier: workspace:* version: link:../heft - '@types/semver': - specifier: 7.5.0 - version: 7.5.0 eslint: specifier: ~9.25.1 version: 9.25.1(supports-color@8.1.1) diff --git a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts index 759df8b7e7b..79b6522d44f 100644 --- a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts +++ b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts @@ -2,6 +2,7 @@ // See LICENSE in the project root for license information. import * as crypto from 'crypto'; +import * as fs from 'fs'; import { FileSystem, type FolderItem, InternalError, Async } from '@rushstack/node-core-library'; import type { ITerminal } from '@rushstack/terminal'; @@ -171,7 +172,7 @@ export class OperationBuildCache { zipSyncReturn: { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } } = await zipSyncWorkerAsync({ mode: 'unpack', - compression: 'deflate', + compression: 'auto', archivePath: localCacheEntryPath!, targetDirectories: this._projectOutputFolderNames, baseDir: projectFolderPath @@ -244,7 +245,7 @@ export class OperationBuildCache { zipSyncReturn: { filesPacked } } = await zipSyncWorkerAsync({ mode: 'pack', - compression: 'deflate', + compression: 'auto', archivePath: tempLocalCacheEntryPath, targetDirectories: this._projectOutputFolderNames, baseDir: this._project.projectFolder @@ -255,10 +256,14 @@ export class OperationBuildCache { try { await Async.runWithRetriesAsync({ action: () => - FileSystem.moveAsync({ - sourcePath: tempLocalCacheEntryPath, - destinationPath: finalLocalCacheEntryPath, - overwrite: true + new Promise((resolve, reject) => { + fs.rename(tempLocalCacheEntryPath, finalLocalCacheEntryPath, (err) => { + if (err) { + reject(err); + } else { + resolve(); + } + }); }), maxRetries: 2, retryDelayMs: 500 @@ -273,11 +278,9 @@ export class OperationBuildCache { } localCacheEntryPath = finalLocalCacheEntryPath; } catch (e) { - try { - await FileSystem.deleteFileAsync(tempLocalCacheEntryPath); - } catch (deleteError) { - // Ignored - } + await FileSystem.deleteFileAsync(tempLocalCacheEntryPath).catch(() => { + /* ignore delete error */ + }); throw e; } From 58f1f2bdec7fe04d3288fd67deccb4633d6e393e Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Tue, 16 Sep 2025 15:46:29 -0700 Subject: [PATCH 05/20] add bin/zipsync --- apps/zipsync/bin/zipsync | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 apps/zipsync/bin/zipsync diff --git a/apps/zipsync/bin/zipsync b/apps/zipsync/bin/zipsync new file mode 100755 index 00000000000..aee68e80224 --- /dev/null +++ b/apps/zipsync/bin/zipsync @@ -0,0 +1,2 @@ +#!/usr/bin/env node +require('../lib/start.js'); From 0e19d5eacafdc431c4102a6b2b735e1b9136301c Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Tue, 16 Sep 2025 19:35:15 -0700 Subject: [PATCH 06/20] pr feedback --- apps/zipsync/package.json | 1 - apps/zipsync/src/ZipSyncCommandLineParser.ts | 4 +- apps/zipsync/src/benchmark.test.ts | 5 +- apps/zipsync/src/hash.ts | 42 + apps/zipsync/src/zipSync.ts | 1007 +++++++++-------- .../build-tests-subspace/repo-state.json | 2 +- .../config/subspaces/default/pnpm-lock.yaml | 3 - .../logic/buildCache/OperationBuildCache.ts | 1 - 8 files changed, 563 insertions(+), 502 deletions(-) create mode 100644 apps/zipsync/src/hash.ts diff --git a/apps/zipsync/package.json b/apps/zipsync/package.json index 47dd8a2455d..86b2e9965c7 100644 --- a/apps/zipsync/package.json +++ b/apps/zipsync/package.json @@ -18,7 +18,6 @@ "_phase:test": "heft run --only test -- --clean" }, "dependencies": { - "@rushstack/node-core-library": "workspace:*", "@rushstack/terminal": "workspace:*", "@rushstack/ts-command-line": "workspace:*", "typescript": "~5.8.2", diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts index 08ecf89ed03..2dd084078d6 100644 --- a/apps/zipsync/src/ZipSyncCommandLineParser.ts +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -9,7 +9,6 @@ import type { IRequiredCommandLineStringListParameter, CommandLineChoiceParameter } from '@rushstack/ts-command-line/lib/index'; -import { InternalError } from '@rushstack/node-core-library/lib/InternalError'; import type { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; @@ -92,7 +91,8 @@ export class ZipSyncCommandLineParser extends CommandLineParser { protected override async onExecuteAsync(): Promise { if (this._debugParameter.value) { - InternalError.breakInDebugger = true; + // eslint-disable-next-line no-debugger + debugger; this._terminalProvider.debugEnabled = true; this._terminalProvider.verboseEnabled = true; } diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index 09bd37a2cbb..12105035824 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -222,7 +222,8 @@ function benchZipSyncScenario( }); } -describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { +// the benchmarks are skipped by default because they require external tools (tar, zip) to be installed +describe.skip(`archive benchmarks (iterations=${ITERATIONS})`, () => { it('tar', () => { if (!isTarAvailable()) { console.log('Skipping tar test because tar is not available'); @@ -467,7 +468,7 @@ afterAll(() => { const resultText = outputLines.join('\n'); console.log(resultText); try { - const resultFile = path.join(__dirname, '..', 'temp', `benchmark-results-${runId}.txt`); + const resultFile = path.join(__dirname, '..', 'temp', `benchmark-results.txt`); fs.writeFileSync(resultFile, resultText, { encoding: 'utf-8' }); console.log(`Benchmark results written to: ${resultFile}`); } catch (e) { diff --git a/apps/zipsync/src/hash.ts b/apps/zipsync/src/hash.ts new file mode 100644 index 00000000000..264a20b7042 --- /dev/null +++ b/apps/zipsync/src/hash.ts @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { closeSync, readSync, fstatSync, type Stats } from 'node:fs'; +import { createHash, type Hash } from 'node:crypto'; + +const buffer: Buffer = Buffer.allocUnsafeSlow(1 << 24); + +export function computeFileHash(fd: number): string | false { + try { + const hash: Hash = createHash('sha256'); + let totalBytesRead: number = 0; + let bytesRead: number; + do { + bytesRead = readSync(fd, buffer, 0, buffer.length, -1); + if (bytesRead <= 0) { + break; + } + totalBytesRead += bytesRead; + hash.update(buffer.subarray(0, bytesRead)); + } while (bytesRead > 0); + if (totalBytesRead === 0) { + // Sometimes directories get treated as empty files + const stat: Stats = fstatSync(fd); + if (!stat.isFile()) { + return false; + } + } + + return hash.digest('hex'); + } catch (err) { + // There is a bug in node-core-library where it doesn't handle if the operation was on a file descriptor + if (err.code === 'EISDIR' || err.code === 'ENOENT' || err.code === 'ENOTDIR') { + return false; + } + throw err; + } finally { + if (fd !== undefined) { + closeSync(fd); + } + } +} diff --git a/apps/zipsync/src/zipSync.ts b/apps/zipsync/src/zipSync.ts index a76ee08323c..5d6be9cdb95 100644 --- a/apps/zipsync/src/zipSync.ts +++ b/apps/zipsync/src/zipSync.ts @@ -27,6 +27,7 @@ import { type IFileEntry, dosDateTime } from './zipUtils'; +import { computeFileHash } from './hash'; const METADATA_FILENAME: string = '__zipsync_metadata__.json'; const METADATA_VERSION: string = '1.0'; @@ -100,579 +101,601 @@ interface IUnpackResult { const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; -/** - * Packs (creates) or unpacks (synchronizes) a ZIP archive. - * - * @public - */ -export function zipSync( - options: T -): T['mode'] extends 'pack' ? IPackResult : IUnpackResult { - const { - terminal, - mode, - archivePath, - targetDirectories: rawTargetDirectories, - baseDir: rawBaseDir - } = options; - const baseDir: string = path.resolve(rawBaseDir); - const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); - const compressionMode: ZipSyncOptionCompression = options.compression; - - function calculateSHA1(data: Buffer): string { - return crypto.createHash('sha1').update(data).digest('hex'); - } - - function packZip(): IPackResult { - markStart('pack.total'); - terminal.writeDebugLine('Starting packZip'); - // Pass 1: enumerate - markStart('pack.enumerate'); - - const filePaths: string[] = []; - const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, depth: 0 })); - - while (queue.length) { - const { dir: currentDir, depth } = queue.shift()!; - terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); - - const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); - - let items: fs.Dirent[]; - try { - items = fs.readdirSync(currentDir, { withFileTypes: true }); - } catch (e) { - if ( - e && - ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') - ) { - terminal.writeWarningLine(`Failed to read directory: ${currentDir}. Ignoring.`); - continue; - } else { - throw e; - } - } +function calculateSHA1(data: Buffer): string { + return crypto.createHash('sha1').update(data).digest('hex'); +} - for (const item of items) { - const fullPath: string = path.join(currentDir, item.name); - if (item.isFile()) { - const relativePath: string = path.relative(baseDir, fullPath).replace(/\\/g, '/'); - terminal.writeVerboseLine(`${padding}${item.name}`); - filePaths.push(relativePath); - } else if (item.isDirectory()) { - terminal.writeVerboseLine(`${padding}${item.name}/`); - queue.push({ dir: fullPath, depth: depth + 1 }); - } else { - throw new Error(`Unexpected item (not file or directory): ${fullPath}. Aborting.`); - } - } +function rmdirSync(dirPath: string): void { + try { + fs.rmdirSync(dirPath); + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') { + // Not found, ignore + } else { + throw e; } + } +} - terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); - markEnd('pack.enumerate'); - - // Pass 2: read + hash + compress - markStart('pack.prepareEntries'); - const bufferSize: number = 1 << 25; // 32 MiB - const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - - terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); - const zipFile: number = fs.openSync(archivePath, 'w'); - let currentOffset: number = 0; - // Use this function to do any write to the zip file, so that we can track the current offset. - function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { - let offset: number = 0; - while (lengthBytes > 0 && offset < chunk.byteLength) { - // In practice this call always writes all data at once, but the spec says it is not an error - // for it to not do so. Possibly that situation comes up when writing to something that is not - // an ordinary file. - const written: number = fs.writeSync(zipFile, chunk, offset, lengthBytes); - lengthBytes -= written; - offset += written; - } - currentOffset += offset; - } - function writeChunksToZip(chunks: Uint8Array[]): void { - for (const chunk of chunks) { - writeChunkToZip(chunk); - } +function unlinkSync(filePath: string): void { + try { + fs.unlinkSync(filePath); + } catch (e) { + if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { + // Not found, ignore + } else { + throw e; } + } +} - const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); - function writeFileEntry(relativePath: string): IFileEntry { - function isLikelyAlreadyCompressed(filename: string): boolean { - return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); - } - const fullPath: string = path.join(baseDir, relativePath); - - const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( - onChunk: (bytesInInputBuffer: number) => void - ): void => { - using inputDisposable: IDisposableFileHandle = getDisposableFileHandle(fullPath, 'r'); - - let bytesInInputBuffer: number = 0; - // The entire input buffer will be drained in each loop iteration - // So run until EOF - while (!isNaN(inputDisposable.fd)) { - bytesInInputBuffer = fs.readSync(inputDisposable.fd, inputBuffer, 0, inputBuffer.byteLength, -1); - - if (bytesInInputBuffer <= 0) { - // EOF, close the input fd - inputDisposable[DISPOSE_SYMBOL](); - } +function packZip({ + archivePath, + baseDir, + compression, + targetDirectories, + terminal +}: IZipSyncOptions): IPackResult { + markStart('pack.total'); + terminal.writeDebugLine('Starting packZip'); + // Pass 1: enumerate + markStart('pack.enumerate'); - onChunk(bytesInInputBuffer); - } - }; + const filePaths: string[] = []; + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, depth: 0 })); - let shouldCompress: boolean = false; - if (compressionMode === 'deflate') { - shouldCompress = true; - } else if (compressionMode === 'auto') { - // Heuristic: skip compression for small files or likely-already-compressed files - if (!isLikelyAlreadyCompressed(relativePath)) { - shouldCompress = true; - } else { - terminal.writeVerboseLine( - `Skip compression heuristically (already-compressed) for ${relativePath} (size unknown at this point)` - ); - } - } + while (queue.length) { + const { dir: currentDir, depth } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); - const compressionMethod: ZipMetaCompressionMethod = shouldCompress - ? DEFLATE_COMPRESSION - : STORE_COMPRESSION; - - const entry: IFileEntry = { - filename: relativePath, - size: 0, - compressedSize: 0, - crc32: 0, - sha1Hash: '', - localHeaderOffset: currentOffset, - compressionMethod, - dosDateTime: dosDateTimeNow - }; - - writeChunksToZip(writeLocalFileHeader(entry)); - - const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); - let crc32: number = 0; - let uncompressedSize: number = 0; - let compressedSize: number = 0; - - using deflateIncremental: IIncrementalZlib | undefined = shouldCompress - ? createIncrementalZlib( - outputBuffer, - (chunk, lengthBytes) => { - writeChunkToZip(chunk, lengthBytes); - compressedSize += lengthBytes; - }, - 'deflate' - ) - : undefined; - - // Also capture content if we might need it (for compression decision or storing raw data). - // We'll accumulate into an array of buffers to avoid repeated concatenations for large files. - readInputInChunks((bytesInInputBuffer: number) => { - const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); - sha1HashBuilder.update(slice); - crc32 = crc32Builder(slice, crc32); - if (deflateIncremental) { - deflateIncremental.update(slice); - } else { - writeChunkToZip(slice, bytesInInputBuffer); - } - uncompressedSize += bytesInInputBuffer; - }); + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); - // finalize hashes, compression - deflateIncremental?.update(Buffer.alloc(0)); - crc32 = crc32 >>> 0; - const sha1Hash: string = sha1HashBuilder.digest('hex'); + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + if ( + e && + ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') + ) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}. Ignoring.`); + continue; + } else { + throw e; + } + } - if (!shouldCompress) { - compressedSize = uncompressedSize; + for (const item of items) { + const fullPath: string = path.join(currentDir, item.name); + if (item.isFile()) { + const relativePath: string = path.relative(baseDir, fullPath).replace(/\\/g, '/'); + terminal.writeVerboseLine(`${padding}${item.name}`); + filePaths.push(relativePath); + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: fullPath, depth: depth + 1 }); + } else { + throw new Error(`Unexpected item (not file or directory): ${fullPath}. Aborting.`); } + } + } - entry.size = uncompressedSize; - entry.compressedSize = compressedSize; - entry.crc32 = crc32; - entry.sha1Hash = sha1Hash; + terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); + markEnd('pack.enumerate'); - writeChunkToZip(writeDataDescriptor(entry)); + // Pass 2: read + hash + compress + markStart('pack.prepareEntries'); + const bufferSize: number = 1 << 25; // 32 MiB + const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - terminal.writeVerboseLine( - `${relativePath} (sha1=${entry.sha1Hash}, crc32=${entry.crc32.toString(16)}, size=${ - entry.size - }, compressed=${entry.compressedSize}, method=${entry.compressionMethod}, compressed ${( - 100 - - (entry.compressedSize / entry.size) * 100 - ).toFixed(1)}%)` - ); - return entry; + terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); + using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); + let currentOffset: number = 0; + // Use this function to do any write to the zip file, so that we can track the current offset. + function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { + let offset: number = 0; + while (lengthBytes > 0 && offset < chunk.byteLength) { + // In practice this call always writes all data at once, but the spec says it is not an error + // for it to not do so. Possibly that situation comes up when writing to something that is not + // an ordinary file. + const written: number = fs.writeSync(zipFile.fd, chunk, offset, lengthBytes); + lengthBytes -= written; + offset += written; } - - const entries: IFileEntry[] = []; - for (const relativePath of filePaths) { - entries.push(writeFileEntry(relativePath)); + currentOffset += offset; + } + function writeChunksToZip(chunks: Uint8Array[]): void { + for (const chunk of chunks) { + writeChunkToZip(chunk); } + } - markEnd('pack.prepareEntries'); - terminal.writeLine(`Prepared ${entries.length} file entries`); - - markStart('pack.metadata.build'); - const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; - for (const entry of entries) { - metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; + const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); + function writeFileEntry(relativePath: string): IFileEntry { + function isLikelyAlreadyCompressed(filename: string): boolean { + return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); } + const fullPath: string = path.join(baseDir, relativePath); + + const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( + onChunk: (bytesInInputBuffer: number) => void + ): void => { + using inputDisposable: IDisposableFileHandle = getDisposableFileHandle(fullPath, 'r'); + + let bytesInInputBuffer: number = 0; + // The entire input buffer will be drained in each loop iteration + // So run until EOF + while (!isNaN(inputDisposable.fd)) { + bytesInInputBuffer = fs.readSync(inputDisposable.fd, inputBuffer, 0, inputBuffer.byteLength, -1); + + if (bytesInInputBuffer <= 0) { + // EOF, close the input fd + inputDisposable[DISPOSE_SYMBOL](); + } - const metadataContent: string = JSON.stringify(metadata); - const metadataBuffer: Buffer = Buffer.from(metadataContent, 'utf8'); - terminal.writeDebugLine( - `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` - ); + onChunk(bytesInInputBuffer); + } + }; - let metadataCompressionMethod: ZipMetaCompressionMethod = STORE_COMPRESSION; - let metadataData: Buffer = metadataBuffer; - let metadataCompressedSize: number = metadataBuffer.length; - if ((compressionMode === 'deflate' || compressionMode === 'auto') && metadataBuffer.length > 64) { - const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); - if (compressed.length < metadataBuffer.length) { - metadataCompressionMethod = DEFLATE_COMPRESSION; - metadataData = compressed; - metadataCompressedSize = compressed.length; - terminal.writeDebugLine( - `Metadata compressed (orig=${metadataBuffer.length}, compressed=${compressed.length})` - ); + let shouldCompress: boolean = false; + if (compression === 'deflate') { + shouldCompress = true; + } else if (compression === 'auto') { + // Heuristic: skip compression for small files or likely-already-compressed files + if (!isLikelyAlreadyCompressed(relativePath)) { + shouldCompress = true; } else { - terminal.writeDebugLine('Metadata compression skipped (not smaller)'); + terminal.writeVerboseLine( + `Skip compression heuristically (already-compressed) for ${relativePath} (size unknown at this point)` + ); } } - const metadataEntry: IFileEntry = { - filename: METADATA_FILENAME, - size: metadataBuffer.length, - compressedSize: metadataCompressedSize, - crc32: crc32Builder(metadataBuffer), - sha1Hash: calculateSHA1(metadataBuffer), + const compressionMethod: ZipMetaCompressionMethod = shouldCompress + ? DEFLATE_COMPRESSION + : STORE_COMPRESSION; + + const entry: IFileEntry = { + filename: relativePath, + size: 0, + compressedSize: 0, + crc32: 0, + sha1Hash: '', localHeaderOffset: currentOffset, - compressionMethod: metadataCompressionMethod, + compressionMethod, dosDateTime: dosDateTimeNow }; - writeChunksToZip(writeLocalFileHeader(metadataEntry)); - writeChunkToZip(metadataData, metadataCompressedSize); - writeChunkToZip(writeDataDescriptor(metadataEntry)); + writeChunksToZip(writeLocalFileHeader(entry)); + + const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); + let crc32: number = 0; + let uncompressedSize: number = 0; + let compressedSize: number = 0; + + using deflateIncremental: IIncrementalZlib | undefined = shouldCompress + ? createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + writeChunkToZip(chunk, lengthBytes); + compressedSize += lengthBytes; + }, + 'deflate' + ) + : undefined; + + // Also capture content if we might need it (for compression decision or storing raw data). + // We'll accumulate into an array of buffers to avoid repeated concatenations for large files. + readInputInChunks((bytesInInputBuffer: number) => { + const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); + sha1HashBuilder.update(slice); + crc32 = crc32Builder(slice, crc32); + if (deflateIncremental) { + deflateIncremental.update(slice); + } else { + writeChunkToZip(slice, bytesInInputBuffer); + } + uncompressedSize += bytesInInputBuffer; + }); + + // finalize hashes, compression + deflateIncremental?.update(Buffer.alloc(0)); + crc32 = crc32 >>> 0; + const sha1Hash: string = sha1HashBuilder.digest('hex'); - entries.push(metadataEntry); - terminal.writeVerboseLine(`Total entries including metadata: ${entries.length}`); + if (!shouldCompress) { + compressedSize = uncompressedSize; + } - markEnd('pack.metadata.build'); + entry.size = uncompressedSize; + entry.compressedSize = compressedSize; + entry.crc32 = crc32; + entry.sha1Hash = sha1Hash; - markStart('pack.write.entries'); - const outputDir: string = path.dirname(archivePath); - fs.mkdirSync(outputDir, { recursive: true }); + writeChunkToZip(writeDataDescriptor(entry)); - try { - markEnd('pack.write.entries'); + terminal.writeVerboseLine( + `${relativePath} (sha1=${entry.sha1Hash}, crc32=${entry.crc32.toString(16)}, size=${ + entry.size + }, compressed=${entry.compressedSize}, method=${entry.compressionMethod}, compressed ${( + 100 - + (entry.compressedSize / entry.size) * 100 + ).toFixed(1)}%)` + ); + return entry; + } - markStart('pack.write.centralDirectory'); - const centralDirOffset: number = currentOffset; - for (const entry of entries) { - writeChunksToZip(writeCentralDirectoryHeader(entry)); - } - const centralDirSize: number = currentOffset - centralDirOffset; - markEnd('pack.write.centralDirectory'); - - // Write end of central directory - markStart('pack.write.eocd'); - writeChunkToZip(writeEndOfCentralDirectory(centralDirOffset, centralDirSize, entries.length)); - terminal.writeDebugLine('EOCD record written'); - markEnd('pack.write.eocd'); - } finally { - fs.closeSync(zipFile); - terminal.writeDebugLine('Archive file closed'); + const entries: IFileEntry[] = []; + for (const relativePath of filePaths) { + entries.push(writeFileEntry(relativePath)); + } + + markEnd('pack.prepareEntries'); + terminal.writeLine(`Prepared ${entries.length} file entries`); + + markStart('pack.metadata.build'); + const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; + for (const entry of entries) { + metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; + } + + const metadataContent: string = JSON.stringify(metadata); + const metadataBuffer: Buffer = Buffer.from(metadataContent, 'utf8'); + terminal.writeDebugLine( + `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` + ); + + let metadataCompressionMethod: ZipMetaCompressionMethod = STORE_COMPRESSION; + let metadataData: Buffer = metadataBuffer; + let metadataCompressedSize: number = metadataBuffer.length; + if ((compression === 'deflate' || compression === 'auto') && metadataBuffer.length > 64) { + const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); + if (compressed.length < metadataBuffer.length) { + metadataCompressionMethod = DEFLATE_COMPRESSION; + metadataData = compressed; + metadataCompressedSize = compressed.length; + terminal.writeDebugLine( + `Metadata compressed (orig=${metadataBuffer.length}, compressed=${compressed.length})` + ); + } else { + terminal.writeDebugLine('Metadata compression skipped (not smaller)'); } - markEnd('pack.total'); - const total: number = getDuration('pack.total'); - emitSummary('pack', terminal); - terminal.writeLine(`Successfully packed ${entries.length} files in ${formatDuration(total)}`); - return { filesPacked: entries.length, metadata }; } - function unpackZip(): IUnpackResult { - markStart('unpack.total'); - terminal.writeDebugLine('Starting unpackZip'); + const metadataEntry: IFileEntry = { + filename: METADATA_FILENAME, + size: metadataBuffer.length, + compressedSize: metadataCompressedSize, + crc32: crc32Builder(metadataBuffer), + sha1Hash: calculateSHA1(metadataBuffer), + localHeaderOffset: currentOffset, + compressionMethod: metadataCompressionMethod, + dosDateTime: dosDateTimeNow + }; - markStart('unpack.read.archive'); - const zipBuffer: Buffer = fs.readFileSync(archivePath); - terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); - markEnd('unpack.read.archive'); + writeChunksToZip(writeLocalFileHeader(metadataEntry)); + writeChunkToZip(metadataData, metadataCompressedSize); + writeChunkToZip(writeDataDescriptor(metadataEntry)); - markStart('unpack.parse.centralDirectory'); - const zipTree: LookupByPath = new LookupByPath(); - const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); + entries.push(metadataEntry); + terminal.writeVerboseLine(`Total entries including metadata: ${entries.length}`); - const centralDirBuffer: Buffer = zipBuffer.subarray( - endOfCentralDir.centralDirOffset, - endOfCentralDir.centralDirOffset + endOfCentralDir.centralDirSize - ); - terminal.writeDebugLine( - `Central directory slice size=${centralDirBuffer.length} (expected=${endOfCentralDir.centralDirSize})` - ); + markEnd('pack.metadata.build'); - let metadataEntry: ICentralDirectoryHeaderParseResult | undefined; - const entries: Array = []; - let offset: number = 0; + markStart('pack.write.entries'); + const outputDir: string = path.dirname(archivePath); + fs.mkdirSync(outputDir, { recursive: true }); - for (let i: number = 0; i < endOfCentralDir.totalCentralDirRecords; i++) { - const result: ICentralDirectoryHeaderParseResult = parseCentralDirectoryHeader( - centralDirBuffer, - offset - ); - zipTree.setItem(result.filename, true); + markEnd('pack.write.entries'); - if (result.filename === METADATA_FILENAME) { - if (metadataEntry) { - throw new Error('Multiple metadata entries found in archive'); - } - metadataEntry = result; - } + markStart('pack.write.centralDirectory'); + const centralDirOffset: number = currentOffset; + for (const entry of entries) { + writeChunksToZip(writeCentralDirectoryHeader(entry)); + } + const centralDirSize: number = currentOffset - centralDirOffset; + markEnd('pack.write.centralDirectory'); + + // Write end of central directory + markStart('pack.write.eocd'); + writeChunkToZip(writeEndOfCentralDirectory(centralDirOffset, centralDirSize, entries.length)); + terminal.writeDebugLine('EOCD record written'); + markEnd('pack.write.eocd'); + + markEnd('pack.total'); + const total: number = getDuration('pack.total'); + emitSummary('pack', terminal); + terminal.writeLine(`Successfully packed ${entries.length} files in ${formatDuration(total)}`); + return { filesPacked: entries.length, metadata }; +} - entries.push(result); - offset = result.nextOffset; - terminal.writeDebugLine( - `Parsed central entry ${result.filename} (method=${result.header.compressionMethod}, compSize=${result.header.compressedSize})` - ); +function unpackZip({ archivePath, baseDir, targetDirectories, terminal }: IZipSyncOptions): IUnpackResult { + markStart('unpack.total'); + terminal.writeDebugLine('Starting unpackZip'); + + markStart('unpack.read.archive'); + const zipBuffer: Buffer = fs.readFileSync(archivePath); + terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); + markEnd('unpack.read.archive'); + + markStart('unpack.parse.centralDirectory'); + const zipTree: LookupByPath = new LookupByPath(); + const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); + + const centralDirBuffer: Buffer = zipBuffer.subarray( + endOfCentralDir.centralDirOffset, + endOfCentralDir.centralDirOffset + endOfCentralDir.centralDirSize + ); + terminal.writeDebugLine( + `Central directory slice size=${centralDirBuffer.length} (expected=${endOfCentralDir.centralDirSize})` + ); + + let metadataEntry: ICentralDirectoryHeaderParseResult | undefined; + const entries: Array = []; + let offset: number = 0; + + for (let i: number = 0; i < endOfCentralDir.totalCentralDirRecords; i++) { + const result: ICentralDirectoryHeaderParseResult = parseCentralDirectoryHeader(centralDirBuffer, offset); + zipTree.setItem(result.filename, true); + + if (result.filename === METADATA_FILENAME) { + if (metadataEntry) { + throw new Error('Multiple metadata entries found in archive'); + } + metadataEntry = result; } - markEnd('unpack.parse.centralDirectory'); - if (!metadataEntry) { - throw new Error(`Metadata entry not found in archive`); - } + entries.push(result); + offset = result.nextOffset; + terminal.writeDebugLine( + `Parsed central entry ${result.filename} (method=${result.header.compressionMethod}, compSize=${result.header.compressedSize})` + ); + } + markEnd('unpack.parse.centralDirectory'); - markStart('unpack.read.metadata'); - terminal.writeDebugLine('Metadata entry found, reading'); - const metadataZipBuffer: Buffer = getFileFromZip(zipBuffer, metadataEntry); + if (!metadataEntry) { + throw new Error(`Metadata entry not found in archive`); + } - let metadataBuffer: Buffer; - if (metadataEntry.header.compressionMethod === STORE_COMPRESSION) { - metadataBuffer = metadataZipBuffer; - } else if (metadataEntry.header.compressionMethod === DEFLATE_COMPRESSION) { - metadataBuffer = zlib.inflateRawSync(metadataZipBuffer); - if (metadataBuffer.length !== metadataEntry.header.uncompressedSize) { - throw new Error( - `Metadata size mismatch (expected ${metadataEntry.header.uncompressedSize}, got ${metadataBuffer.length})` - ); - } - } else { + markStart('unpack.read.metadata'); + terminal.writeDebugLine('Metadata entry found, reading'); + const metadataZipBuffer: Buffer = getFileFromZip(zipBuffer, metadataEntry); + + let metadataBuffer: Buffer; + if (metadataEntry.header.compressionMethod === STORE_COMPRESSION) { + metadataBuffer = metadataZipBuffer; + } else if (metadataEntry.header.compressionMethod === DEFLATE_COMPRESSION) { + metadataBuffer = zlib.inflateRawSync(metadataZipBuffer); + if (metadataBuffer.length !== metadataEntry.header.uncompressedSize) { throw new Error( - `Unsupported compression method for metadata: ${metadataEntry.header.compressionMethod}` + `Metadata size mismatch (expected ${metadataEntry.header.uncompressedSize}, got ${metadataBuffer.length})` ); } + } else { + throw new Error(`Unsupported compression method for metadata: ${metadataEntry.header.compressionMethod}`); + } - const metadata: IMetadata = JSON.parse(metadataBuffer.toString('utf8')) as IMetadata; + const metadata: IMetadata = JSON.parse(metadataBuffer.toString('utf8')) as IMetadata; - if (metadata.version !== METADATA_VERSION) { - throw new Error(`Unsupported metadata version: ${metadata.version}`); - } + if (metadata.version !== METADATA_VERSION) { + throw new Error(`Unsupported metadata version: ${metadata.version}`); + } - terminal.writeDebugLine( - `Metadata (version=${metadata.version}) parsed (fileCount=${Object.keys(metadata.files).length}, rawSize=${metadataBuffer.length})` - ); - markEnd('unpack.read.metadata'); + terminal.writeDebugLine( + `Metadata (version=${metadata.version}) parsed (fileCount=${Object.keys(metadata.files).length}, rawSize=${metadataBuffer.length})` + ); + markEnd('unpack.read.metadata'); - terminal.writeLine(`Found ${entries.length} files in archive`); + terminal.writeLine(`Found ${entries.length} files in archive`); - for (const targetDirectory of targetDirectories) { - fs.mkdirSync(targetDirectory, { recursive: true }); - terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); - } + for (const targetDirectory of targetDirectories) { + fs.mkdirSync(targetDirectory, { recursive: true }); + terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); + } - let extractedCount: number = 0; - let skippedCount: number = 0; - let deletedFilesCount: number = 0; - let deletedOtherCount: number = 0; - let deletedFoldersCount: number = 0; - let scanCount: number = 0; - - const dirsToCleanup: string[] = []; - - markStart('unpack.scan.existing'); - const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ - dir, - depth: 0, - node: zipTree.getNodeAtPrefix(path.relative(baseDir, dir)) - })); - - while (queue.length) { - const { dir: currentDir, depth, node } = queue.shift()!; - terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); - - const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); - - let items: fs.Dirent[]; - try { - items = fs.readdirSync(currentDir, { withFileTypes: true }); - } catch (e) { - terminal.writeWarningLine(`Failed to read directory: ${currentDir}`); - continue; - } + let extractedCount: number = 0; + let skippedCount: number = 0; + let deletedFilesCount: number = 0; + let deletedOtherCount: number = 0; + let deletedFoldersCount: number = 0; + let scanCount: number = 0; - for (const item of items) { - scanCount++; - // check if exists in zipTree, if not delete - const relativePath: string = path - .relative(baseDir, path.join(currentDir, item.name)) - .replace(/\\/g, '/'); - - const childNode: IReadonlyPathTrieNode | undefined = node?.children?.get(item.name); - - if (item.isFile()) { - terminal.writeVerboseLine(`${padding}${item.name}`); - if (!childNode?.value) { - terminal.writeDebugLine(`Deleting file: ${relativePath}`); - fs.unlinkSync(relativePath); - deletedFilesCount++; - } - } else if (item.isDirectory()) { - terminal.writeVerboseLine(`${padding}${item.name}/`); - queue.push({ dir: relativePath, depth: depth + 1, node: childNode }); - if (!childNode || childNode.value) { - dirsToCleanup.push(relativePath); - } - } else { - terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); - fs.unlinkSync(relativePath); - deletedOtherCount++; + const dirsToCleanup: string[] = []; + + markStart('unpack.scan.existing'); + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ + dir, + depth: 0, + node: zipTree.getNodeAtPrefix(path.relative(baseDir, dir)) + })); + + while (queue.length) { + const { dir: currentDir, depth, node } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}`); + continue; + } + + for (const item of items) { + scanCount++; + // check if exists in zipTree, if not delete + const relativePath: string = path + .relative(baseDir, path.join(currentDir, item.name)) + .replace(/\\/g, '/'); + + const childNode: IReadonlyPathTrieNode | undefined = node?.children?.get(item.name); + + if (item.isFile()) { + terminal.writeVerboseLine(`${padding}${item.name}`); + if (!childNode?.value) { + terminal.writeDebugLine(`Deleting file: ${relativePath}`); + unlinkSync(relativePath); + deletedFilesCount++; } + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: relativePath, depth: depth + 1, node: childNode }); + if (!childNode || childNode.value) { + dirsToCleanup.push(relativePath); + } + } else { + terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); + unlinkSync(relativePath); + deletedOtherCount++; } } + } - for (const dir of dirsToCleanup) { - // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. - try { - fs.rmdirSync(dir); - terminal.writeDebugLine(`Deleted empty directory: ${dir}`); - deletedFoldersCount++; - } catch (e) { - // Probably not empty - terminal.writeDebugLine(`Directory not empty, skipping: ${dir}`); - } + for (const dir of dirsToCleanup) { + // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. + try { + rmdirSync(dir); + terminal.writeDebugLine(`Deleted empty directory: ${dir}`); + deletedFoldersCount++; + } catch (e) { + // Probably not empty + terminal.writeDebugLine(`Directory not empty, skipping: ${dir}`); } + } - terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); - markEnd('unpack.scan.existing'); + terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); + markEnd('unpack.scan.existing'); - markStart('unpack.extract.loop'); - const bufferSize: number = 1 << 25; // 32 MiB - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + markStart('unpack.extract.loop'); + const bufferSize: number = 1 << 25; // 32 MiB + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - const dirsCreated: Set = new Set(); + const dirsCreated: Set = new Set(); - for (const entry of entries) { - if (entry.filename === METADATA_FILENAME) { - continue; - } + for (const entry of entries) { + if (entry.filename === METADATA_FILENAME) { + continue; + } - const targetPath: string = path.join(baseDir, entry.filename); - const targetDir: string = path.dirname(targetPath); - if (!dirsCreated.has(targetDir)) { - fs.mkdirSync(targetDir, { recursive: true }); - dirsCreated.add(targetDir); - } + const targetPath: string = path.join(baseDir, entry.filename); + const targetDir: string = path.dirname(targetPath); + if (!dirsCreated.has(targetDir)) { + fs.mkdirSync(targetDir, { recursive: true }); + dirsCreated.add(targetDir); + } - let shouldExtract: boolean = true; - if (metadata) { - const stats: fs.Stats | undefined = fs.statSync(targetPath, { throwIfNoEntry: false }); - if (!stats) { - terminal.writeDebugLine(`File does not exist and will be extracted: ${entry.filename}`); - } else { - const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; - - if (metadataFile && stats.size === metadataFile.size) { - const existingData: Buffer = fs.readFileSync(targetPath); - const existingHash: string = calculateSHA1(existingData); - - if (existingHash === metadataFile.sha1Hash) { - shouldExtract = false; - skippedCount++; - terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); - } + let shouldExtract: boolean = true; + if (metadata) { + const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; + + if (metadataFile) { + try { + using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); + const existingHash: string | false = computeFileHash(existingFile.fd); + if (existingHash === metadataFile.sha1Hash) { + shouldExtract = false; + skippedCount++; + terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); + } + } catch (e) { + if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { + // File does not exist, will extract + } else { + throw e; } } } + } - if (shouldExtract) { - terminal.writeDebugLine(`Extracting file: ${entry.filename}`); - const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); - let fileData: Buffer; - using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); - if (entry.header.compressionMethod === STORE_COMPRESSION) { - fileData = fileZipBuffer; - let writeOffset: number = 0; - while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { - const written: number = fs.writeSync( - fileHandle.fd, - fileData, - writeOffset, - fileData.length - writeOffset - ); - writeOffset += written; - } - } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { - using inflateIncremental: IIncrementalZlib = createIncrementalZlib( - outputBuffer, - (chunk, lengthBytes) => { - let writeOffset: number = 0; - while (lengthBytes > 0 && writeOffset < chunk.byteLength) { - const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); - lengthBytes -= written; - writeOffset += written; - } - }, - 'inflate' - ); - inflateIncremental.update(fileZipBuffer); - inflateIncremental.update(Buffer.alloc(0)); - } else { - throw new Error( - `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + if (shouldExtract) { + terminal.writeDebugLine(`Extracting file: ${entry.filename}`); + const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); + let fileData: Buffer; + using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); + if (entry.header.compressionMethod === STORE_COMPRESSION) { + fileData = fileZipBuffer; + let writeOffset: number = 0; + while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { + const written: number = fs.writeSync( + fileHandle.fd, + fileData, + writeOffset, + fileData.length - writeOffset ); + writeOffset += written; } - - // If data descriptor was used we rely on central directory values already consumed. - extractedCount++; + } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { + using inflateIncremental: IIncrementalZlib = createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + let writeOffset: number = 0; + while (lengthBytes > 0 && writeOffset < chunk.byteLength) { + const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); + lengthBytes -= written; + writeOffset += written; + } + }, + 'inflate' + ); + inflateIncremental.update(fileZipBuffer); + inflateIncremental.update(Buffer.alloc(0)); + } else { + throw new Error( + `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + ); } + + // If data descriptor was used we rely on central directory values already consumed. + extractedCount++; } - markEnd('unpack.extract.loop'); - - markEnd('unpack.total'); - const unpackTotal: number = getDuration('unpack.total'); - terminal.writeLine( - `Extraction complete: ${extractedCount} extracted, ${skippedCount} skipped, ${deletedFilesCount} deleted, ${deletedFoldersCount} folders deleted, ${deletedOtherCount} other entries deleted in ${formatDuration( - unpackTotal - )}` - ); - emitSummary('unpack', terminal); - terminal.writeDebugLine('unpackZip finished'); - return { - metadata, - filesExtracted: extractedCount, - filesSkipped: skippedCount, - filesDeleted: deletedFilesCount, - foldersDeleted: deletedFoldersCount, - otherEntriesDeleted: deletedOtherCount - }; } + markEnd('unpack.extract.loop'); + + markEnd('unpack.total'); + const unpackTotal: number = getDuration('unpack.total'); + terminal.writeLine( + `Extraction complete: ${extractedCount} extracted, ${skippedCount} skipped, ${deletedFilesCount} deleted, ${deletedFoldersCount} folders deleted, ${deletedOtherCount} other entries deleted in ${formatDuration( + unpackTotal + )}` + ); + emitSummary('unpack', terminal); + terminal.writeDebugLine('unpackZip finished'); + return { + metadata, + filesExtracted: extractedCount, + filesSkipped: skippedCount, + filesDeleted: deletedFilesCount, + foldersDeleted: deletedFoldersCount, + otherEntriesDeleted: deletedOtherCount + }; +} + +/** + * Packs (creates) or unpacks (synchronizes) a ZIP archive. + * + * @public + */ +export function zipSync( + options: T +): T['mode'] extends 'pack' ? IPackResult : IUnpackResult { + const { + terminal, + mode, + archivePath, + targetDirectories: rawTargetDirectories, + baseDir: rawBaseDir + } = options; + const baseDir: string = (options.baseDir = path.resolve(rawBaseDir)); + options.targetDirectories = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); if (mode === 'pack') { terminal.writeLine(`Packing to ${archivePath} from ${rawTargetDirectories.join(', ')}`); - return packZip() as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; + return packZip(options) as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; } else { terminal.writeLine(`Unpacking to ${rawTargetDirectories.join(', ')} from ${archivePath}`); - return unpackZip() as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; + return unpackZip(options) as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; } } diff --git a/common/config/subspaces/build-tests-subspace/repo-state.json b/common/config/subspaces/build-tests-subspace/repo-state.json index e69eed3e56e..54a27474b51 100644 --- a/common/config/subspaces/build-tests-subspace/repo-state.json +++ b/common/config/subspaces/build-tests-subspace/repo-state.json @@ -2,5 +2,5 @@ { "pnpmShrinkwrapHash": "2ac01ba33e09661dc0e7d7faa36d215bb3d3b91e", "preferredVersionsHash": "550b4cee0bef4e97db6c6aad726df5149d20e7d9", - "packageJsonInjectedDependenciesHash": "3d0c925bcd727d09159fa60c410e6ddf4f2f5484" + "packageJsonInjectedDependenciesHash": "73ac91f09601cd919fddc7f1d193a03eab37102a" } diff --git a/common/config/subspaces/default/pnpm-lock.yaml b/common/config/subspaces/default/pnpm-lock.yaml index 754bb1a892e..936041daa66 100644 --- a/common/config/subspaces/default/pnpm-lock.yaml +++ b/common/config/subspaces/default/pnpm-lock.yaml @@ -454,9 +454,6 @@ importers: '@rushstack/lookup-by-path': specifier: workspace:* version: link:../../libraries/lookup-by-path - '@rushstack/node-core-library': - specifier: workspace:* - version: link:../../libraries/node-core-library '@rushstack/terminal': specifier: workspace:* version: link:../../libraries/terminal diff --git a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts index 79b6522d44f..7bbd0f0d7ac 100644 --- a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts +++ b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts @@ -166,7 +166,6 @@ export class OperationBuildCache { const projectFolderPath: string = this._project.projectFolder; let restoreSuccess: boolean = false; - terminal.writeVerboseLine(`Using zipsync to restore cached folders.`); try { const { zipSyncReturn: { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } From cb9cd736976d9990e074b7d5a2bbc51d373e6474 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:34:48 -0700 Subject: [PATCH 07/20] split pack and unpack --- apps/zipsync/src/ZipSyncCommandLineParser.ts | 38 +- ...ipSync.test.ts.snap => index.test.ts.snap} | 6 +- ....test.ts.snap => workerAsync.test.ts.snap} | 4 +- apps/zipsync/src/benchmark.test.ts | 12 +- apps/zipsync/src/compress.ts | 3 +- apps/zipsync/src/crc32.test.ts | 3 +- .../src/{disposableFileHandle.ts => fs.ts} | 24 + apps/zipsync/src/hash.ts | 12 +- apps/zipsync/src/index.test.ts | 55 ++ apps/zipsync/src/index.ts | 5 + apps/zipsync/src/pack.ts | 354 +++++++++ .../src/{zipSyncWorker.ts => packWorker.ts} | 46 +- ...pSyncWorkerAsync.ts => packWorkerAsync.ts} | 27 +- apps/zipsync/src/perf.ts | 1 + apps/zipsync/src/testUtils.ts | 44 ++ apps/zipsync/src/unpack.ts | 328 ++++++++ apps/zipsync/src/unpackWorker.ts | 92 +++ apps/zipsync/src/unpackWorkerAsync.ts | 61 ++ apps/zipsync/src/workerAsync.test.ts | 49 ++ apps/zipsync/src/zipSync.test.ts | 95 --- apps/zipsync/src/zipSync.ts | 701 ------------------ apps/zipsync/src/zipSyncUtils.ts | 27 + apps/zipsync/src/zipSyncWorkerAsync.test.ts | 89 --- .../logic/buildCache/OperationBuildCache.ts | 67 +- 24 files changed, 1176 insertions(+), 967 deletions(-) rename apps/zipsync/src/__snapshots__/{zipSync.test.ts.snap => index.test.ts.snap} (98%) rename apps/zipsync/src/__snapshots__/{zipSyncWorkerAsync.test.ts.snap => workerAsync.test.ts.snap} (99%) rename apps/zipsync/src/{disposableFileHandle.ts => fs.ts} (61%) create mode 100644 apps/zipsync/src/index.test.ts create mode 100644 apps/zipsync/src/index.ts create mode 100644 apps/zipsync/src/pack.ts rename apps/zipsync/src/{zipSyncWorker.ts => packWorker.ts} (59%) rename apps/zipsync/src/{zipSyncWorkerAsync.ts => packWorkerAsync.ts} (66%) create mode 100644 apps/zipsync/src/testUtils.ts create mode 100644 apps/zipsync/src/unpack.ts create mode 100644 apps/zipsync/src/unpackWorker.ts create mode 100644 apps/zipsync/src/unpackWorkerAsync.ts create mode 100644 apps/zipsync/src/workerAsync.test.ts delete mode 100644 apps/zipsync/src/zipSync.test.ts delete mode 100644 apps/zipsync/src/zipSync.ts create mode 100644 apps/zipsync/src/zipSyncUtils.ts delete mode 100644 apps/zipsync/src/zipSyncWorkerAsync.test.ts diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts index 2dd084078d6..3b72d163651 100644 --- a/apps/zipsync/src/ZipSyncCommandLineParser.ts +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -6,22 +6,22 @@ import type { CommandLineFlagParameter, IRequiredCommandLineStringParameter, IRequiredCommandLineChoiceParameter, - IRequiredCommandLineStringListParameter, - CommandLineChoiceParameter + IRequiredCommandLineStringListParameter } from '@rushstack/ts-command-line/lib/index'; import type { ConsoleTerminalProvider } from '@rushstack/terminal/lib/ConsoleTerminalProvider'; import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; -import { type IZipMode, zipSync } from './zipSync'; +import type { IZipSyncMode, ZipSyncOptionCompression } from './zipSyncUtils'; +import { pack, unpack } from './index'; export class ZipSyncCommandLineParser extends CommandLineParser { private readonly _debugParameter: CommandLineFlagParameter; private readonly _verboseParameter: CommandLineFlagParameter; - private readonly _modeParameter: IRequiredCommandLineChoiceParameter; + private readonly _modeParameter: IRequiredCommandLineChoiceParameter; private readonly _archivePathParameter: IRequiredCommandLineStringParameter; private readonly _baseDirParameter: IRequiredCommandLineStringParameter; private readonly _targetDirectoriesParameter: IRequiredCommandLineStringListParameter; - private readonly _compressionParameter: CommandLineChoiceParameter<'store' | 'deflate' | 'auto'>; + private readonly _compressionParameter: IRequiredCommandLineChoiceParameter; private readonly _terminal: ITerminal; private readonly _terminalProvider: ConsoleTerminalProvider; @@ -46,7 +46,7 @@ export class ZipSyncCommandLineParser extends CommandLineParser { description: 'Show verbose output' }); - this._modeParameter = this.defineChoiceParameter({ + this._modeParameter = this.defineChoiceParameter({ parameterLongName: '--mode', parameterShortName: '-m', description: @@ -79,7 +79,7 @@ export class ZipSyncCommandLineParser extends CommandLineParser { required: true }); - this._compressionParameter = this.defineChoiceParameter<'store' | 'deflate' | 'auto'>({ + this._compressionParameter = this.defineChoiceParameter({ parameterLongName: '--compression', parameterShortName: '-z', description: @@ -100,14 +100,22 @@ export class ZipSyncCommandLineParser extends CommandLineParser { this._terminalProvider.verboseEnabled = true; } try { - zipSync({ - terminal: this._terminal, - mode: this._modeParameter.value, - archivePath: this._archivePathParameter.value, - targetDirectories: this._targetDirectoriesParameter.values, - baseDir: this._baseDirParameter.value, - compression: (this._compressionParameter.value as 'store' | 'deflate' | 'auto' | undefined) ?? 'auto' - }); + if (this._modeParameter.value === 'pack') { + pack({ + terminal: this._terminal, + archivePath: this._archivePathParameter.value, + targetDirectories: this._targetDirectoriesParameter.values, + baseDir: this._baseDirParameter.value, + compression: this._compressionParameter.value + }); + } else if (this._modeParameter.value === 'unpack') { + unpack({ + terminal: this._terminal, + archivePath: this._archivePathParameter.value, + targetDirectories: this._targetDirectoriesParameter.values, + baseDir: this._baseDirParameter.value + }); + } } catch (error) { this._terminal.writeErrorLine('\n' + error.stack); } diff --git a/apps/zipsync/src/__snapshots__/zipSync.test.ts.snap b/apps/zipsync/src/__snapshots__/index.test.ts.snap similarity index 98% rename from apps/zipsync/src/__snapshots__/zipSync.test.ts.snap rename to apps/zipsync/src/__snapshots__/index.test.ts.snap index bbf73a8eaeb..cabaf56b437 100644 --- a/apps/zipsync/src/__snapshots__/zipSync.test.ts.snap +++ b/apps/zipsync/src/__snapshots__/index.test.ts.snap @@ -1,4 +1,4 @@ -// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing +// Jest Snapshot v1, https://goo.gl/fbAQLP exports[`zipSync tests basic pack test 1`] = ` Object { @@ -94,8 +94,8 @@ Object { exports[`zipSync tests basic pack test 2`] = ` Object { "filesDeleted": 0, - "filesExtracted": 20, - "filesSkipped": 0, + "filesExtracted": 12, + "filesSkipped": 8, "foldersDeleted": 0, "metadata": Object { "files": Object { diff --git a/apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap b/apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap similarity index 99% rename from apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap rename to apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap index 8b94af245d0..9eec6c0bd40 100644 --- a/apps/zipsync/src/__snapshots__/zipSyncWorkerAsync.test.ts.snap +++ b/apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap @@ -94,8 +94,8 @@ Object { exports[`zipSyncWorkerAsync tests basic pack test 2`] = ` Object { "filesDeleted": 0, - "filesExtracted": 20, - "filesSkipped": 0, + "filesExtracted": 12, + "filesSkipped": 8, "foldersDeleted": 0, "metadata": Object { "files": Object { diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index 12105035824..b0f1c819ff9 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -7,9 +7,12 @@ import { tmpdir } from 'os'; import * as path from 'path'; import * as fs from 'fs'; import { createHash, randomUUID } from 'crypto'; -import { zipSync } from './zipSync'; + import { NoOpTerminalProvider, Terminal } from '@rushstack/terminal'; +import { pack } from './pack'; +import { unpack } from './unpack'; + // create a tempdir and setup dummy files there for benchmarking let tempDir: string; const runId = randomUUID(); @@ -192,8 +195,7 @@ function benchZipSyncScenario( const terminal = new Terminal(new NoOpTerminalProvider()); bench(kind, { pack: ({ archive, demoDir }) => { - const { filesPacked } = zipSync({ - mode: 'pack', + const { filesPacked } = pack({ archivePath: archive, targetDirectories: ['subdir1', 'subdir2'], baseDir: demoDir, @@ -203,12 +205,10 @@ function benchZipSyncScenario( console.log(`Files packed: ${filesPacked}`); }, unpack: ({ archive, unpackDir }) => { - const { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } = zipSync({ - mode: 'unpack', + const { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } = unpack({ archivePath: archive, targetDirectories: ['subdir1', 'subdir2'], baseDir: unpackDir, - compression, terminal }); console.log( diff --git a/apps/zipsync/src/compress.ts b/apps/zipsync/src/compress.ts index 0a11f129020..89c1223b77b 100644 --- a/apps/zipsync/src/compress.ts +++ b/apps/zipsync/src/compress.ts @@ -3,7 +3,6 @@ import type { Transform } from 'node:stream'; import zlib from 'node:zlib'; -import { DISPOSE_SYMBOL } from './disposableFileHandle'; type OutputChunkHandler = (chunk: Uint8Array, lengthBytes: number) => void; @@ -122,7 +121,7 @@ export function createIncrementalZlib( const handle: IHandle = compressor._handle!; return { - [DISPOSE_SYMBOL]: () => { + [Symbol.dispose]: () => { if (compressor._handle) { compressor._handle.close(); compressor._handle = undefined; diff --git a/apps/zipsync/src/crc32.test.ts b/apps/zipsync/src/crc32.test.ts index 59682bca311..fb73eb5f9d5 100644 --- a/apps/zipsync/src/crc32.test.ts +++ b/apps/zipsync/src/crc32.test.ts @@ -1,9 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import { fallbackCrc32 } from './crc32'; import * as zlib from 'zlib'; +import { fallbackCrc32 } from './crc32'; + describe('crc32', () => { it('fallbackCrc32 should match zlib.crc32', () => { if (!zlib.crc32) { diff --git a/apps/zipsync/src/disposableFileHandle.ts b/apps/zipsync/src/fs.ts similarity index 61% rename from apps/zipsync/src/disposableFileHandle.ts rename to apps/zipsync/src/fs.ts index d86f6286c47..96da7164f15 100644 --- a/apps/zipsync/src/disposableFileHandle.ts +++ b/apps/zipsync/src/fs.ts @@ -26,3 +26,27 @@ export function getDisposableFileHandle(path: string, openMode: OpenMode): IDisp return result; } + +export function rmdirSync(dirPath: string): void { + try { + fs.rmdirSync(dirPath); + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') { + // Not found, ignore + } else { + throw e; + } + } +} + +export function unlinkSync(filePath: string): void { + try { + fs.unlinkSync(filePath); + } catch (e) { + if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { + // Not found, ignore + } else { + throw e; + } + } +} diff --git a/apps/zipsync/src/hash.ts b/apps/zipsync/src/hash.ts index 264a20b7042..6ba6e59a587 100644 --- a/apps/zipsync/src/hash.ts +++ b/apps/zipsync/src/hash.ts @@ -1,14 +1,14 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -import { closeSync, readSync, fstatSync, type Stats } from 'node:fs'; +import { readSync, fstatSync, type Stats } from 'node:fs'; import { createHash, type Hash } from 'node:crypto'; const buffer: Buffer = Buffer.allocUnsafeSlow(1 << 24); export function computeFileHash(fd: number): string | false { try { - const hash: Hash = createHash('sha256'); + const hash: Hash = createHash('sha1'); let totalBytesRead: number = 0; let bytesRead: number; do { @@ -34,9 +34,9 @@ export function computeFileHash(fd: number): string | false { return false; } throw err; - } finally { - if (fd !== undefined) { - closeSync(fd); - } } } + +export function calculateSHA1(data: Buffer): string { + return createHash('sha1').update(data).digest('hex'); +} diff --git a/apps/zipsync/src/index.test.ts b/apps/zipsync/src/index.test.ts new file mode 100644 index 00000000000..e07b680773a --- /dev/null +++ b/apps/zipsync/src/index.test.ts @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as path from 'node:path'; +import * as fs from 'node:fs'; + +import { NoOpTerminalProvider } from '@rushstack/terminal/lib/NoOpTerminalProvider'; +import { Terminal } from '@rushstack/terminal/lib/Terminal'; + +import { pack, unpack } from './index'; +import { getDemoDataDirectoryDisposable } from './testUtils'; + +describe('zipSync tests', () => { + it('basic pack test', () => { + using demoDataDisposable = getDemoDataDirectoryDisposable(5); + const { targetDirectories, baseDir } = demoDataDisposable; + + const terminal = new Terminal(new NoOpTerminalProvider()); + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const packResult = pack({ + terminal: terminal, + compression: 'deflate', + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchSnapshot(); + + using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); + const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; + + const unpackResult = unpack({ + terminal: terminal, + archivePath, + baseDir: unpackBaseDir, + targetDirectories + }); + + expect(unpackResult).toMatchSnapshot(); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } + } + }); +}); diff --git a/apps/zipsync/src/index.ts b/apps/zipsync/src/index.ts new file mode 100644 index 00000000000..11913cee85c --- /dev/null +++ b/apps/zipsync/src/index.ts @@ -0,0 +1,5 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +export { pack, type IZipSyncPackResult, type IZipSyncPackOptions } from './pack'; +export { unpack, type IZipSyncUnpackResult, type IZipSyncUnpackOptions } from './unpack'; diff --git a/apps/zipsync/src/pack.ts b/apps/zipsync/src/pack.ts new file mode 100644 index 00000000000..8a8a25e57d7 --- /dev/null +++ b/apps/zipsync/src/pack.ts @@ -0,0 +1,354 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as crypto from 'node:crypto'; +import * as zlib from 'node:zlib'; + +import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; + +import { crc32Builder } from './crc32'; +import { DISPOSE_SYMBOL, getDisposableFileHandle, type IDisposableFileHandle } from './fs'; +import { type IIncrementalZlib, createIncrementalZlib } from './compress'; +import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; +import { + writeLocalFileHeader, + writeDataDescriptor, + writeCentralDirectoryHeader, + writeEndOfCentralDirectory, + DEFLATE_COMPRESSION, + STORE_COMPRESSION, + type ZipMetaCompressionMethod, + type IFileEntry, + dosDateTime +} from './zipUtils'; +import { calculateSHA1 } from './hash'; +import { + type ZipSyncOptionCompression, + type IMetadata, + type IDirQueueItem, + METADATA_VERSION, + METADATA_FILENAME +} from './zipSyncUtils'; + +const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = + /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; + +/** + * @public + * Options for zipsync + */ +export interface IZipSyncPackOptions { + /** + * @rushstack/terminal compatible terminal for logging + */ + terminal: ITerminal; + /** + * Zip file path + */ + archivePath: string; + /** + * Target directories to pack or unpack (depending on mode) + */ + targetDirectories: ReadonlyArray; + /** + * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) + */ + baseDir: string; + /** + * Compression mode. If set to 'deflate', file data will be compressed using raw DEFLATE (method 8) when this + * produces a smaller result; otherwise it will fall back to 'store' per-file. + */ + compression: ZipSyncOptionCompression; +} + +export interface IZipSyncPackResult { + filesPacked: number; + metadata: IMetadata; +} + +export function pack({ + archivePath, + targetDirectories: rawTargetDirectories, + baseDir: rawBaseDir, + compression, + terminal +}: IZipSyncPackOptions): IZipSyncPackResult { + const baseDir: string = path.resolve(rawBaseDir); + const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); + terminal.writeLine(`Packing to ${archivePath} from ${rawTargetDirectories.join(', ')}`); + + markStart('pack.total'); + terminal.writeDebugLine('Starting pack'); + // Pass 1: enumerate + markStart('pack.enumerate'); + + const filePaths: string[] = []; + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, depth: 0 })); + + while (queue.length) { + const { dir: currentDir, depth } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + if ( + e && + ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') + ) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}. Ignoring.`); + continue; + } else { + throw e; + } + } + + for (const item of items) { + const fullPath: string = path.join(currentDir, item.name); + if (item.isFile()) { + const relativePath: string = path.relative(baseDir, fullPath).replace(/\\/g, '/'); + terminal.writeVerboseLine(`${padding}${item.name}`); + filePaths.push(relativePath); + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: fullPath, depth: depth + 1 }); + } else { + throw new Error(`Unexpected item (not file or directory): ${fullPath}. Aborting.`); + } + } + } + + terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); + markEnd('pack.enumerate'); + + // Pass 2: read + hash + compress + markStart('pack.prepareEntries'); + const bufferSize: number = 1 << 25; // 32 MiB + const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + + terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); + using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); + let currentOffset: number = 0; + // Use this function to do any write to the zip file, so that we can track the current offset. + function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { + let offset: number = 0; + while (lengthBytes > 0 && offset < chunk.byteLength) { + // In practice this call always writes all data at once, but the spec says it is not an error + // for it to not do so. Possibly that situation comes up when writing to something that is not + // an ordinary file. + const written: number = fs.writeSync(zipFile.fd, chunk, offset, lengthBytes); + lengthBytes -= written; + offset += written; + } + currentOffset += offset; + } + function writeChunksToZip(chunks: Uint8Array[]): void { + for (const chunk of chunks) { + writeChunkToZip(chunk); + } + } + + const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); + function writeFileEntry(relativePath: string): IFileEntry { + function isLikelyAlreadyCompressed(filename: string): boolean { + return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); + } + const fullPath: string = path.join(baseDir, relativePath); + + const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( + onChunk: (bytesInInputBuffer: number) => void + ): void => { + using inputDisposable: IDisposableFileHandle = getDisposableFileHandle(fullPath, 'r'); + + let bytesInInputBuffer: number = 0; + // The entire input buffer will be drained in each loop iteration + // So run until EOF + while (!isNaN(inputDisposable.fd)) { + bytesInInputBuffer = fs.readSync(inputDisposable.fd, inputBuffer, 0, inputBuffer.byteLength, -1); + + if (bytesInInputBuffer <= 0) { + // EOF, close the input fd + inputDisposable[DISPOSE_SYMBOL](); + } + + onChunk(bytesInInputBuffer); + } + }; + + let shouldCompress: boolean = false; + if (compression === 'deflate') { + shouldCompress = true; + } else if (compression === 'auto') { + // Heuristic: skip compression for small files or likely-already-compressed files + if (!isLikelyAlreadyCompressed(relativePath)) { + shouldCompress = true; + } else { + terminal.writeVerboseLine( + `Skip compression heuristically (already-compressed) for ${relativePath} (size unknown at this point)` + ); + } + } + + const compressionMethod: ZipMetaCompressionMethod = shouldCompress + ? DEFLATE_COMPRESSION + : STORE_COMPRESSION; + + const entry: IFileEntry = { + filename: relativePath, + size: 0, + compressedSize: 0, + crc32: 0, + sha1Hash: '', + localHeaderOffset: currentOffset, + compressionMethod, + dosDateTime: dosDateTimeNow + }; + + writeChunksToZip(writeLocalFileHeader(entry)); + + const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); + let crc32: number = 0; + let uncompressedSize: number = 0; + let compressedSize: number = 0; + + using deflateIncremental: IIncrementalZlib | undefined = shouldCompress + ? createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + writeChunkToZip(chunk, lengthBytes); + compressedSize += lengthBytes; + }, + 'deflate' + ) + : undefined; + + // Also capture content if we might need it (for compression decision or storing raw data). + // We'll accumulate into an array of buffers to avoid repeated concatenations for large files. + readInputInChunks((bytesInInputBuffer: number) => { + const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); + sha1HashBuilder.update(slice); + crc32 = crc32Builder(slice, crc32); + if (deflateIncremental) { + deflateIncremental.update(slice); + } else { + writeChunkToZip(slice, bytesInInputBuffer); + } + uncompressedSize += bytesInInputBuffer; + }); + + // finalize hashes, compression + deflateIncremental?.update(Buffer.alloc(0)); + crc32 = crc32 >>> 0; + const sha1Hash: string = sha1HashBuilder.digest('hex'); + + if (!shouldCompress) { + compressedSize = uncompressedSize; + } + + entry.size = uncompressedSize; + entry.compressedSize = compressedSize; + entry.crc32 = crc32; + entry.sha1Hash = sha1Hash; + + writeChunkToZip(writeDataDescriptor(entry)); + + terminal.writeVerboseLine( + `${relativePath} (sha1=${entry.sha1Hash}, crc32=${entry.crc32.toString(16)}, size=${ + entry.size + }, compressed=${entry.compressedSize}, method=${entry.compressionMethod}, compressed ${( + 100 - + (entry.compressedSize / entry.size) * 100 + ).toFixed(1)}%)` + ); + return entry; + } + + const entries: IFileEntry[] = []; + for (const relativePath of filePaths) { + entries.push(writeFileEntry(relativePath)); + } + + markEnd('pack.prepareEntries'); + terminal.writeLine(`Prepared ${entries.length} file entries`); + + markStart('pack.metadata.build'); + const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; + for (const entry of entries) { + metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; + } + + const metadataContent: string = JSON.stringify(metadata); + const metadataBuffer: Buffer = Buffer.from(metadataContent, 'utf8'); + terminal.writeDebugLine( + `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` + ); + + let metadataCompressionMethod: ZipMetaCompressionMethod = STORE_COMPRESSION; + let metadataData: Buffer = metadataBuffer; + let metadataCompressedSize: number = metadataBuffer.length; + if ((compression === 'deflate' || compression === 'auto') && metadataBuffer.length > 64) { + const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); + if (compressed.length < metadataBuffer.length) { + metadataCompressionMethod = DEFLATE_COMPRESSION; + metadataData = compressed; + metadataCompressedSize = compressed.length; + terminal.writeDebugLine( + `Metadata compressed (orig=${metadataBuffer.length}, compressed=${compressed.length})` + ); + } else { + terminal.writeDebugLine('Metadata compression skipped (not smaller)'); + } + } + + const metadataEntry: IFileEntry = { + filename: METADATA_FILENAME, + size: metadataBuffer.length, + compressedSize: metadataCompressedSize, + crc32: crc32Builder(metadataBuffer), + sha1Hash: calculateSHA1(metadataBuffer), + localHeaderOffset: currentOffset, + compressionMethod: metadataCompressionMethod, + dosDateTime: dosDateTimeNow + }; + + writeChunksToZip(writeLocalFileHeader(metadataEntry)); + writeChunkToZip(metadataData, metadataCompressedSize); + writeChunkToZip(writeDataDescriptor(metadataEntry)); + + entries.push(metadataEntry); + terminal.writeVerboseLine(`Total entries including metadata: ${entries.length}`); + + markEnd('pack.metadata.build'); + + markStart('pack.write.entries'); + const outputDir: string = path.dirname(archivePath); + fs.mkdirSync(outputDir, { recursive: true }); + + markEnd('pack.write.entries'); + + markStart('pack.write.centralDirectory'); + const centralDirOffset: number = currentOffset; + for (const entry of entries) { + writeChunksToZip(writeCentralDirectoryHeader(entry)); + } + const centralDirSize: number = currentOffset - centralDirOffset; + markEnd('pack.write.centralDirectory'); + + // Write end of central directory + markStart('pack.write.eocd'); + writeChunkToZip(writeEndOfCentralDirectory(centralDirOffset, centralDirSize, entries.length)); + terminal.writeDebugLine('EOCD record written'); + markEnd('pack.write.eocd'); + + markEnd('pack.total'); + const total: number = getDuration('pack.total'); + emitSummary('pack', terminal); + terminal.writeLine(`Successfully packed ${entries.length} files in ${formatDuration(total)}`); + return { filesPacked: entries.length, metadata }; +} diff --git a/apps/zipsync/src/zipSyncWorker.ts b/apps/zipsync/src/packWorker.ts similarity index 59% rename from apps/zipsync/src/zipSyncWorker.ts rename to apps/zipsync/src/packWorker.ts index 9a1c1c6e7f9..fccff8231c5 100644 --- a/apps/zipsync/src/zipSyncWorker.ts +++ b/apps/zipsync/src/packWorker.ts @@ -2,40 +2,47 @@ // See LICENSE in the project root for license information. import { parentPort as rawParentPort, type MessagePort } from 'node:worker_threads'; -import { type IZipSyncOptions, zipSync } from './zipSync'; + import { Terminal } from '@rushstack/terminal/lib/Terminal'; import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; +import { type IZipSyncPackOptions, type IZipSyncPackResult, pack } from './pack'; + +export { type IZipSyncPackOptions, type IZipSyncPackResult } from './pack'; + export interface IHashWorkerData { basePath: string; } -export interface IZipSyncCommandMessage { - type: 'zipsync'; +export interface IZipSyncPackCommandMessage { + type: 'zipsync-pack'; id: number; - options: Omit; + options: Omit; +} + +export interface IZipSyncPackWorkerResult { + zipSyncReturn: IZipSyncPackResult; + zipSyncLogs: string; } interface IZipSyncSuccessMessage { id: number; - type: 'zipsync'; - result: { - zipSyncReturn: ReturnType; - zipSyncLogs: string; - }; + type: 'zipsync-pack'; + result: IZipSyncPackWorkerResult; } -export interface IErrorMessage { +export interface IZipSyncPackErrorMessage { type: 'error'; id: number; args: { message: string; stack: string; + zipSyncLogs: string; }; } -export type IHostToWorkerMessage = IZipSyncCommandMessage; -export type IWorkerToHostMessage = IZipSyncSuccessMessage | IErrorMessage; +export type IHostToWorkerMessage = IZipSyncPackCommandMessage; +export type IWorkerToHostMessage = IZipSyncSuccessMessage | IZipSyncPackErrorMessage; if (!rawParentPort) { throw new Error('This module must be run in a worker thread.'); @@ -49,19 +56,19 @@ function handleMessage(message: IHostToWorkerMessage | false): void { return; } + const terminalProvider: StringBufferTerminalProvider = new StringBufferTerminalProvider(); + const terminal: Terminal = new Terminal(terminalProvider); + try { switch (message.type) { - case 'zipsync': { + case 'zipsync-pack': { const { options } = message; - const terminalProvider: StringBufferTerminalProvider = new StringBufferTerminalProvider(); - const terminal: Terminal = new Terminal(terminalProvider); - const successMessage: IZipSyncSuccessMessage = { type: message.type, id: message.id, result: { - zipSyncReturn: zipSync({ ...options, terminal }), + zipSyncReturn: pack({ ...options, terminal }), zipSyncLogs: terminalProvider.getOutput() } }; @@ -69,12 +76,13 @@ function handleMessage(message: IHostToWorkerMessage | false): void { } } } catch (err) { - const errorMessage: IErrorMessage = { + const errorMessage: IZipSyncPackErrorMessage = { type: 'error', id: message.id, args: { message: (err as Error).message, - stack: (err as Error).stack || '' + stack: (err as Error).stack || '', + zipSyncLogs: terminalProvider.getOutput() } }; parentPort.postMessage(errorMessage); diff --git a/apps/zipsync/src/zipSyncWorkerAsync.ts b/apps/zipsync/src/packWorkerAsync.ts similarity index 66% rename from apps/zipsync/src/zipSyncWorkerAsync.ts rename to apps/zipsync/src/packWorkerAsync.ts index 34eb7657e7c..71e7b1db062 100644 --- a/apps/zipsync/src/zipSyncWorkerAsync.ts +++ b/apps/zipsync/src/packWorkerAsync.ts @@ -2,22 +2,27 @@ // See LICENSE in the project root for license information. import type { Worker } from 'node:worker_threads'; -import type { IZipSyncOptions } from './zipSync'; -import type { IWorkerToHostMessage, IZipSyncCommandMessage } from './zipSyncWorker'; -type IZipSyncResult = ReturnType; +import type { + IWorkerToHostMessage, + IHostToWorkerMessage, + IZipSyncPackWorkerResult, + IZipSyncPackOptions +} from './packWorker'; -export async function zipSyncWorkerAsync( - options: Omit -): Promise { +export type { IZipSyncPackWorkerResult } from './packWorker'; + +export async function packWorkerAsync( + options: Omit +): Promise { const { Worker } = await import('node:worker_threads'); - const worker: Worker = new Worker(require.resolve('./zipSyncWorker')); + const worker: Worker = new Worker(require.resolve('./packWorker')); - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { worker.on('message', (message: IWorkerToHostMessage) => { switch (message.type) { - case 'zipsync': { + case 'zipsync-pack': { resolve(message.result); break; } @@ -44,8 +49,8 @@ export async function zipSyncWorkerAsync( } }); - const commandMessage: IZipSyncCommandMessage = { - type: 'zipsync', + const commandMessage: IHostToWorkerMessage = { + type: 'zipsync-pack', id: 0, options }; diff --git a/apps/zipsync/src/perf.ts b/apps/zipsync/src/perf.ts index 7c3a20f34b2..1e4677a8eee 100644 --- a/apps/zipsync/src/perf.ts +++ b/apps/zipsync/src/perf.ts @@ -3,6 +3,7 @@ import type { PerformanceEntry } from 'node:perf_hooks'; import { performance } from 'node:perf_hooks'; + import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; export function markStart(name: string): void { diff --git a/apps/zipsync/src/testUtils.ts b/apps/zipsync/src/testUtils.ts new file mode 100644 index 00000000000..489c14445a5 --- /dev/null +++ b/apps/zipsync/src/testUtils.ts @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { tmpdir } from 'node:os'; +import * as path from 'node:path'; +import * as fs from 'node:fs'; +import * as crypto from 'node:crypto'; + +export function getTempDir(): string { + const randomId: string = crypto.randomUUID(); + const tempDir: string = path.join(tmpdir(), `zipsync-test-${randomId}`); + fs.mkdirSync(tempDir); + return tempDir; +} + +export function getDemoDataDirectoryDisposable(numFiles: number): { + targetDirectories: string[]; + baseDir: string; + [Symbol.dispose](): void; +} { + const baseDir: string = getTempDir(); + + const targetDirectories: string[] = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( + (folderName) => { + const dataDir: string = path.join(baseDir, folderName); + fs.mkdirSync(dataDir, { recursive: true }); + const subdir: string = path.join(dataDir, 'subdir'); + fs.mkdirSync(subdir); + for (let i: number = 0; i < numFiles; ++i) { + const filePath: string = path.join(subdir, `file-${i}.txt`); + fs.writeFileSync(filePath, `This is file ${i} in ${folderName}/subdir\n`, { encoding: 'utf-8' }); + } + return folderName; + } + ); + + return { + targetDirectories, + baseDir, + [Symbol.dispose]() { + fs.rmSync(baseDir, { recursive: true, force: true }); + } + }; +} diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts new file mode 100644 index 00000000000..3e3672e1ed5 --- /dev/null +++ b/apps/zipsync/src/unpack.ts @@ -0,0 +1,328 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as zlib from 'node:zlib'; + +import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; +import type { ITerminal } from '@rushstack/terminal'; + +import { getDisposableFileHandle, type IDisposableFileHandle } from './fs'; +import { type IIncrementalZlib, createIncrementalZlib } from './compress'; +import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; +import { + findEndOfCentralDirectory, + parseCentralDirectoryHeader, + getFileFromZip, + DEFLATE_COMPRESSION, + STORE_COMPRESSION, + type IEndOfCentralDirectory, + type ICentralDirectoryHeaderParseResult +} from './zipUtils'; +import { computeFileHash } from './hash'; +import { METADATA_FILENAME, METADATA_VERSION, type IDirQueueItem, type IMetadata } from './zipSyncUtils'; + +/** + * @public + * Options for zipsync + */ +export interface IZipSyncUnpackOptions { + /** + * @rushstack/terminal compatible terminal for logging + */ + terminal: ITerminal; + /** + * Zip file path + */ + archivePath: string; + /** + * Target directories to pack or unpack (depending on mode) + */ + targetDirectories: ReadonlyArray; + /** + * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) + */ + baseDir: string; +} + +export interface IZipSyncUnpackResult { + metadata: IMetadata; + filesExtracted: number; + filesSkipped: number; + filesDeleted: number; + foldersDeleted: number; + otherEntriesDeleted: number; +} + +export function unpack({ + archivePath, + targetDirectories: rawTargetDirectories, + baseDir: rawBaseDir, + terminal +}: IZipSyncUnpackOptions): IZipSyncUnpackResult { + const baseDir: string = path.resolve(rawBaseDir); + const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); + terminal.writeLine(`Unpacking to ${rawTargetDirectories.join(', ')} from ${archivePath}`); + + markStart('unpack.total'); + terminal.writeDebugLine('Starting unpackZip'); + + markStart('unpack.read.archive'); + const zipBuffer: Buffer = fs.readFileSync(archivePath); + terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); + markEnd('unpack.read.archive'); + + markStart('unpack.parse.centralDirectory'); + const zipTree: LookupByPath = new LookupByPath(); + const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); + + const centralDirBuffer: Buffer = zipBuffer.subarray( + endOfCentralDir.centralDirOffset, + endOfCentralDir.centralDirOffset + endOfCentralDir.centralDirSize + ); + terminal.writeDebugLine( + `Central directory slice size=${centralDirBuffer.length} (expected=${endOfCentralDir.centralDirSize})` + ); + + let metadataEntry: ICentralDirectoryHeaderParseResult | undefined; + const entries: Array = []; + let offset: number = 0; + + for (let i: number = 0; i < endOfCentralDir.totalCentralDirRecords; i++) { + const result: ICentralDirectoryHeaderParseResult = parseCentralDirectoryHeader(centralDirBuffer, offset); + zipTree.setItem(result.filename, true); + + if (result.filename === METADATA_FILENAME) { + if (metadataEntry) { + throw new Error('Multiple metadata entries found in archive'); + } + metadataEntry = result; + } + + entries.push(result); + offset = result.nextOffset; + terminal.writeDebugLine( + `Parsed central entry ${result.filename} (method=${result.header.compressionMethod}, compSize=${result.header.compressedSize})` + ); + } + markEnd('unpack.parse.centralDirectory'); + + if (!metadataEntry) { + throw new Error(`Metadata entry not found in archive`); + } + + markStart('unpack.read.metadata'); + terminal.writeDebugLine('Metadata entry found, reading'); + const metadataZipBuffer: Buffer = getFileFromZip(zipBuffer, metadataEntry); + + let metadataBuffer: Buffer; + if (metadataEntry.header.compressionMethod === STORE_COMPRESSION) { + metadataBuffer = metadataZipBuffer; + } else if (metadataEntry.header.compressionMethod === DEFLATE_COMPRESSION) { + metadataBuffer = zlib.inflateRawSync(metadataZipBuffer); + if (metadataBuffer.length !== metadataEntry.header.uncompressedSize) { + throw new Error( + `Metadata size mismatch (expected ${metadataEntry.header.uncompressedSize}, got ${metadataBuffer.length})` + ); + } + } else { + throw new Error(`Unsupported compression method for metadata: ${metadataEntry.header.compressionMethod}`); + } + + const metadata: IMetadata = JSON.parse(metadataBuffer.toString('utf8')) as IMetadata; + + if (metadata.version !== METADATA_VERSION) { + throw new Error(`Unsupported metadata version: ${metadata.version}`); + } + + terminal.writeDebugLine( + `Metadata (version=${metadata.version}) parsed (fileCount=${Object.keys(metadata.files).length}, rawSize=${metadataBuffer.length})` + ); + markEnd('unpack.read.metadata'); + + terminal.writeLine(`Found ${entries.length} files in archive`); + + for (const targetDirectory of targetDirectories) { + fs.mkdirSync(targetDirectory, { recursive: true }); + terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); + } + + let extractedCount: number = 0; + let skippedCount: number = 0; + let deletedFilesCount: number = 0; + let deletedOtherCount: number = 0; + let deletedFoldersCount: number = 0; + let scanCount: number = 0; + + const dirsToCleanup: string[] = []; + + markStart('unpack.scan.existing'); + const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ + dir, + depth: 0, + node: zipTree.getNodeAtPrefix(path.relative(baseDir, dir)) + })); + + while (queue.length) { + const { dir: currentDir, depth, node } = queue.shift()!; + terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); + + const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); + + let items: fs.Dirent[]; + try { + items = fs.readdirSync(currentDir, { withFileTypes: true }); + } catch (e) { + terminal.writeWarningLine(`Failed to read directory: ${currentDir}`); + continue; + } + + for (const item of items) { + scanCount++; + // check if exists in zipTree, if not delete + const relativePath: string = path + .relative(baseDir, path.join(currentDir, item.name)) + .replace(/\\/g, '/'); + + const childNode: IReadonlyPathTrieNode | undefined = node?.children?.get(item.name); + + if (item.isFile()) { + terminal.writeVerboseLine(`${padding}${item.name}`); + if (!childNode?.value) { + terminal.writeDebugLine(`Deleting file: ${relativePath}`); + fs.unlinkSync(relativePath); + deletedFilesCount++; + } + } else if (item.isDirectory()) { + terminal.writeVerboseLine(`${padding}${item.name}/`); + queue.push({ dir: relativePath, depth: depth + 1, node: childNode }); + if (!childNode || childNode.value) { + dirsToCleanup.push(relativePath); + } + } else { + terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); + fs.unlinkSync(relativePath); + deletedOtherCount++; + } + } + } + + for (const dir of dirsToCleanup) { + // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. + try { + fs.rmdirSync(dir); + terminal.writeDebugLine(`Deleted empty directory: ${dir}`); + deletedFoldersCount++; + } catch (e) { + // Probably not empty + terminal.writeDebugLine(`Directory not empty, skipping: ${dir}`); + } + } + + terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); + markEnd('unpack.scan.existing'); + + markStart('unpack.extract.loop'); + const bufferSize: number = 1 << 25; // 32 MiB + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + + const dirsCreated: Set = new Set(); + + for (const entry of entries) { + if (entry.filename === METADATA_FILENAME) { + continue; + } + + const targetPath: string = path.join(baseDir, entry.filename); + const targetDir: string = path.dirname(targetPath); + if (!dirsCreated.has(targetDir)) { + fs.mkdirSync(targetDir, { recursive: true }); + dirsCreated.add(targetDir); + } + + let shouldExtract: boolean = true; + if (metadata) { + const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; + + if (metadataFile) { + try { + using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); + const existingHash: string | false = computeFileHash(existingFile.fd); + if (existingHash === metadataFile.sha1Hash) { + shouldExtract = false; + skippedCount++; + terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); + } + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT') { + // File does not exist, will extract + } else { + throw e; + } + } + } + } + + if (shouldExtract) { + terminal.writeDebugLine(`Extracting file: ${entry.filename}`); + const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); + let fileData: Buffer; + using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); + if (entry.header.compressionMethod === STORE_COMPRESSION) { + fileData = fileZipBuffer; + let writeOffset: number = 0; + while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { + const written: number = fs.writeSync( + fileHandle.fd, + fileData, + writeOffset, + fileData.length - writeOffset + ); + writeOffset += written; + } + } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { + using inflateIncremental: IIncrementalZlib = createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + let writeOffset: number = 0; + while (lengthBytes > 0 && writeOffset < chunk.byteLength) { + const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); + lengthBytes -= written; + writeOffset += written; + } + }, + 'inflate' + ); + inflateIncremental.update(fileZipBuffer); + inflateIncremental.update(Buffer.alloc(0)); + } else { + throw new Error( + `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + ); + } + + // If data descriptor was used we rely on central directory values already consumed. + extractedCount++; + } + } + markEnd('unpack.extract.loop'); + + markEnd('unpack.total'); + const unpackTotal: number = getDuration('unpack.total'); + terminal.writeLine( + `Extraction complete: ${extractedCount} extracted, ${skippedCount} skipped, ${deletedFilesCount} deleted, ${deletedFoldersCount} folders deleted, ${deletedOtherCount} other entries deleted in ${formatDuration( + unpackTotal + )}` + ); + emitSummary('unpack', terminal); + terminal.writeDebugLine('unpackZip finished'); + return { + metadata, + filesExtracted: extractedCount, + filesSkipped: skippedCount, + filesDeleted: deletedFilesCount, + foldersDeleted: deletedFoldersCount, + otherEntriesDeleted: deletedOtherCount + }; +} diff --git a/apps/zipsync/src/unpackWorker.ts b/apps/zipsync/src/unpackWorker.ts new file mode 100644 index 00000000000..d08e2e7ae20 --- /dev/null +++ b/apps/zipsync/src/unpackWorker.ts @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import { parentPort as rawParentPort, type MessagePort } from 'node:worker_threads'; + +import { Terminal } from '@rushstack/terminal/lib/Terminal'; +import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; + +import { type IZipSyncUnpackOptions, type IZipSyncUnpackResult, unpack } from './unpack'; + +export { type IZipSyncUnpackOptions, type IZipSyncUnpackResult } from './unpack'; + +export interface IHashWorkerData { + basePath: string; +} + +export interface IZipSyncUnpackCommandMessage { + type: 'zipsync-unpack'; + id: number; + options: Omit; +} + +export interface IZipSyncUnpackWorkerResult { + zipSyncReturn: IZipSyncUnpackResult; + zipSyncLogs: string; +} + +interface IZipSyncUnpackSuccessMessage { + id: number; + type: 'zipsync-unpack'; + result: IZipSyncUnpackWorkerResult; +} + +export interface IZipSyncUnpackErrorMessage { + type: 'error'; + id: number; + args: { + message: string; + stack: string; + zipSyncLogs: string; + }; +} + +export type IHostToWorkerMessage = IZipSyncUnpackCommandMessage; +export type IWorkerToHostMessage = IZipSyncUnpackSuccessMessage | IZipSyncUnpackErrorMessage; + +if (!rawParentPort) { + throw new Error('This module must be run in a worker thread.'); +} +const parentPort: MessagePort = rawParentPort; + +function handleMessage(message: IHostToWorkerMessage | false): void { + if (message === false) { + parentPort.removeAllListeners(); + parentPort.close(); + return; + } + + const terminalProvider: StringBufferTerminalProvider = new StringBufferTerminalProvider(); + const terminal: Terminal = new Terminal(terminalProvider); + + try { + switch (message.type) { + case 'zipsync-unpack': { + const { options } = message; + + const successMessage: IZipSyncUnpackSuccessMessage = { + type: message.type, + id: message.id, + result: { + zipSyncReturn: unpack({ ...options, terminal }), + zipSyncLogs: terminalProvider.getOutput() + } + }; + return parentPort.postMessage(successMessage); + } + } + } catch (err) { + const errorMessage: IZipSyncUnpackErrorMessage = { + type: 'error', + id: message.id, + args: { + message: (err as Error).message, + stack: (err as Error).stack || '', + zipSyncLogs: terminalProvider.getOutput() + } + }; + parentPort.postMessage(errorMessage); + } +} + +parentPort.on('message', handleMessage); diff --git a/apps/zipsync/src/unpackWorkerAsync.ts b/apps/zipsync/src/unpackWorkerAsync.ts new file mode 100644 index 00000000000..73714a016b7 --- /dev/null +++ b/apps/zipsync/src/unpackWorkerAsync.ts @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { Worker } from 'node:worker_threads'; + +import type { + IWorkerToHostMessage, + IHostToWorkerMessage, + IZipSyncUnpackWorkerResult, + IZipSyncUnpackOptions +} from './unpackWorker'; + +export type { IZipSyncUnpackWorkerResult } from './unpackWorker'; + +export async function unpackWorkerAsync( + options: Omit +): Promise { + const { Worker } = await import('node:worker_threads'); + + const worker: Worker = new Worker(require.resolve('./unpackWorker')); + + return new Promise((resolve, reject) => { + worker.on('message', (message: IWorkerToHostMessage) => { + switch (message.type) { + case 'zipsync-unpack': { + resolve(message.result); + break; + } + case 'error': { + const error: Error = new Error(message.args.message); + error.stack = message.args.stack; + reject(error); + break; + } + default: { + const exhaustiveCheck: never = message; + throw new Error(`Unexpected message type: ${JSON.stringify(exhaustiveCheck)}`); + } + } + }); + + worker.on('error', (err) => { + reject(err); + }); + + worker.on('exit', (code) => { + if (code !== 0) { + reject(new Error(`Worker stopped with exit code ${code}`)); + } + }); + + const commandMessage: IHostToWorkerMessage = { + type: 'zipsync-unpack', + id: 0, + options + }; + worker.postMessage(commandMessage); + }).finally(() => { + worker.postMessage(false); + }); +} diff --git a/apps/zipsync/src/workerAsync.test.ts b/apps/zipsync/src/workerAsync.test.ts new file mode 100644 index 00000000000..560ff1c5701 --- /dev/null +++ b/apps/zipsync/src/workerAsync.test.ts @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import * as path from 'node:path'; +import * as fs from 'node:fs'; + +import { unpackWorkerAsync } from './unpackWorkerAsync'; +import { packWorkerAsync } from './packWorkerAsync'; +import { getDemoDataDirectoryDisposable } from './testUtils'; + +describe('zipSyncWorkerAsync tests', () => { + it('basic pack test', async () => { + using demoDataDisposable = getDemoDataDirectoryDisposable(5); + const { targetDirectories, baseDir } = demoDataDisposable; + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const { zipSyncReturn: packResult } = await packWorkerAsync({ + compression: 'deflate', + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchSnapshot(); + + using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); + const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; + + const { zipSyncReturn: unpackResult } = await unpackWorkerAsync({ + archivePath, + baseDir: unpackBaseDir, + targetDirectories + }); + + expect(unpackResult).toMatchSnapshot(); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } + } + }); +}); diff --git a/apps/zipsync/src/zipSync.test.ts b/apps/zipsync/src/zipSync.test.ts deleted file mode 100644 index c8f5bc969e2..00000000000 --- a/apps/zipsync/src/zipSync.test.ts +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. -// See LICENSE in the project root for license information. - -import { tmpdir } from 'os'; -import * as path from 'path'; -import * as fs from 'fs'; -import * as crypto from 'crypto'; -import { zipSync } from './zipSync'; -import { NoOpTerminalProvider } from '@rushstack/terminal/lib/NoOpTerminalProvider'; -import { Terminal } from '@rushstack/terminal/lib/Terminal'; - -function getTempDir(): string { - const randomId = crypto.randomUUID(); - const tempDir = path.join(tmpdir(), `zipsync-test-${randomId}`); - fs.mkdirSync(tempDir); - return tempDir; -} - -function getDemoDataDirectoryDisposable(): { - targetDirectories: string[]; - baseDir: string; - [Symbol.dispose](): void; -} { - const baseDir: string = getTempDir(); - - const targetDirectories = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( - (folderName) => { - const dataDir: string = path.join(baseDir, folderName); - fs.mkdirSync(dataDir, { recursive: true }); - const subdir: string = path.join(dataDir, 'subdir'); - fs.mkdirSync(subdir); - for (let i: number = 0; i < 5; ++i) { - const filePath: string = path.join(subdir, `file-${i}.txt`); - fs.writeFileSync(filePath, `This is file ${i} in ${folderName}/subdir\n`, { encoding: 'utf-8' }); - } - return folderName; - } - ); - - return { - targetDirectories, - baseDir, - [Symbol.dispose]() { - fs.rmSync(baseDir, { recursive: true, force: true }); - } - }; -} - -describe('zipSync tests', () => { - it('basic pack test', () => { - const demoDataDisposable = getDemoDataDirectoryDisposable(); - const { targetDirectories, baseDir } = demoDataDisposable; - - const terminal = new Terminal(new NoOpTerminalProvider()); - - const archivePath: string = path.join(baseDir, 'archive.zip'); - const packResult = zipSync({ - mode: 'pack', - terminal: terminal, - compression: 'deflate', - baseDir, - targetDirectories, - archivePath - }); - - expect(packResult).toMatchSnapshot(); - - const unpackBaseDir = getTempDir(); - - const unpackResult = zipSync({ - mode: 'unpack', - terminal: terminal, - archivePath, - baseDir: unpackBaseDir, - targetDirectories, - compression: 'deflate' - }); - - expect(unpackResult).toMatchSnapshot(); - - // Verify files were extracted - for (const targetDirectory of targetDirectories) { - const sourceDir: string = path.join(baseDir, targetDirectory); - for (let i: number = 0; i < 5; ++i) { - const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); - const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); - expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( - fs.readFileSync(sourceFile, { encoding: 'utf-8' }) - ); - } - } - - demoDataDisposable[Symbol.dispose](); - }); -}); diff --git a/apps/zipsync/src/zipSync.ts b/apps/zipsync/src/zipSync.ts deleted file mode 100644 index 5d6be9cdb95..00000000000 --- a/apps/zipsync/src/zipSync.ts +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. -// See LICENSE in the project root for license information. - -import * as fs from 'node:fs'; -import * as path from 'node:path'; -import * as crypto from 'node:crypto'; -import * as zlib from 'node:zlib'; -import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; -import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; -import { crc32Builder } from './crc32'; -import { DISPOSE_SYMBOL, getDisposableFileHandle, type IDisposableFileHandle } from './disposableFileHandle'; -import { type IIncrementalZlib, createIncrementalZlib } from './compress'; -import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; -import { - writeLocalFileHeader, - writeDataDescriptor, - writeCentralDirectoryHeader, - writeEndOfCentralDirectory, - findEndOfCentralDirectory, - parseCentralDirectoryHeader, - getFileFromZip, - DEFLATE_COMPRESSION, - STORE_COMPRESSION, - type ZipMetaCompressionMethod, - type IEndOfCentralDirectory, - type ICentralDirectoryHeaderParseResult, - type IFileEntry, - dosDateTime -} from './zipUtils'; -import { computeFileHash } from './hash'; - -const METADATA_FILENAME: string = '__zipsync_metadata__.json'; -const METADATA_VERSION: string = '1.0'; - -export type IZipMode = 'pack' | 'unpack'; - -type ZipSyncOptionCompression = 'store' | 'deflate' | 'auto'; - -/** - * @public - * Options for zipsync - */ -export interface IZipSyncOptions { - /** - * @rushstack/terminal compatible terminal for logging - */ - terminal: ITerminal; - /** - * Mode of operation: "pack" to create a zip archive, or "unpack" to extract files from a zip archive - */ - mode: IZipMode; - /** - * Zip file path - */ - archivePath: string; - /** - * Target directories to pack or unpack (depending on mode) - */ - targetDirectories: ReadonlyArray; - /** - * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) - */ - baseDir: string; - /** - * Compression mode. If set to 'deflate', file data will be compressed using raw DEFLATE (method 8) when this - * produces a smaller result; otherwise it will fall back to 'store' per-file. - */ - compression: ZipSyncOptionCompression; -} - -interface IDirQueueItem { - dir: string; - depth: number; - node?: IReadonlyPathTrieNode | undefined; -} - -interface IMetadataFileRecord { - size: number; - sha1Hash: string; -} - -interface IMetadata { - version: string; - files: Record; -} - -interface IPackResult { - filesPacked: number; - metadata: IMetadata; -} - -interface IUnpackResult { - metadata: IMetadata; - filesExtracted: number; - filesSkipped: number; - filesDeleted: number; - foldersDeleted: number; - otherEntriesDeleted: number; -} - -const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = - /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; - -function calculateSHA1(data: Buffer): string { - return crypto.createHash('sha1').update(data).digest('hex'); -} - -function rmdirSync(dirPath: string): void { - try { - fs.rmdirSync(dirPath); - } catch (e) { - if ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') { - // Not found, ignore - } else { - throw e; - } - } -} - -function unlinkSync(filePath: string): void { - try { - fs.unlinkSync(filePath); - } catch (e) { - if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { - // Not found, ignore - } else { - throw e; - } - } -} - -function packZip({ - archivePath, - baseDir, - compression, - targetDirectories, - terminal -}: IZipSyncOptions): IPackResult { - markStart('pack.total'); - terminal.writeDebugLine('Starting packZip'); - // Pass 1: enumerate - markStart('pack.enumerate'); - - const filePaths: string[] = []; - const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, depth: 0 })); - - while (queue.length) { - const { dir: currentDir, depth } = queue.shift()!; - terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); - - const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); - - let items: fs.Dirent[]; - try { - items = fs.readdirSync(currentDir, { withFileTypes: true }); - } catch (e) { - if ( - e && - ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') - ) { - terminal.writeWarningLine(`Failed to read directory: ${currentDir}. Ignoring.`); - continue; - } else { - throw e; - } - } - - for (const item of items) { - const fullPath: string = path.join(currentDir, item.name); - if (item.isFile()) { - const relativePath: string = path.relative(baseDir, fullPath).replace(/\\/g, '/'); - terminal.writeVerboseLine(`${padding}${item.name}`); - filePaths.push(relativePath); - } else if (item.isDirectory()) { - terminal.writeVerboseLine(`${padding}${item.name}/`); - queue.push({ dir: fullPath, depth: depth + 1 }); - } else { - throw new Error(`Unexpected item (not file or directory): ${fullPath}. Aborting.`); - } - } - } - - terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); - markEnd('pack.enumerate'); - - // Pass 2: read + hash + compress - markStart('pack.prepareEntries'); - const bufferSize: number = 1 << 25; // 32 MiB - const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - - terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); - using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); - let currentOffset: number = 0; - // Use this function to do any write to the zip file, so that we can track the current offset. - function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { - let offset: number = 0; - while (lengthBytes > 0 && offset < chunk.byteLength) { - // In practice this call always writes all data at once, but the spec says it is not an error - // for it to not do so. Possibly that situation comes up when writing to something that is not - // an ordinary file. - const written: number = fs.writeSync(zipFile.fd, chunk, offset, lengthBytes); - lengthBytes -= written; - offset += written; - } - currentOffset += offset; - } - function writeChunksToZip(chunks: Uint8Array[]): void { - for (const chunk of chunks) { - writeChunkToZip(chunk); - } - } - - const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); - function writeFileEntry(relativePath: string): IFileEntry { - function isLikelyAlreadyCompressed(filename: string): boolean { - return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); - } - const fullPath: string = path.join(baseDir, relativePath); - - const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( - onChunk: (bytesInInputBuffer: number) => void - ): void => { - using inputDisposable: IDisposableFileHandle = getDisposableFileHandle(fullPath, 'r'); - - let bytesInInputBuffer: number = 0; - // The entire input buffer will be drained in each loop iteration - // So run until EOF - while (!isNaN(inputDisposable.fd)) { - bytesInInputBuffer = fs.readSync(inputDisposable.fd, inputBuffer, 0, inputBuffer.byteLength, -1); - - if (bytesInInputBuffer <= 0) { - // EOF, close the input fd - inputDisposable[DISPOSE_SYMBOL](); - } - - onChunk(bytesInInputBuffer); - } - }; - - let shouldCompress: boolean = false; - if (compression === 'deflate') { - shouldCompress = true; - } else if (compression === 'auto') { - // Heuristic: skip compression for small files or likely-already-compressed files - if (!isLikelyAlreadyCompressed(relativePath)) { - shouldCompress = true; - } else { - terminal.writeVerboseLine( - `Skip compression heuristically (already-compressed) for ${relativePath} (size unknown at this point)` - ); - } - } - - const compressionMethod: ZipMetaCompressionMethod = shouldCompress - ? DEFLATE_COMPRESSION - : STORE_COMPRESSION; - - const entry: IFileEntry = { - filename: relativePath, - size: 0, - compressedSize: 0, - crc32: 0, - sha1Hash: '', - localHeaderOffset: currentOffset, - compressionMethod, - dosDateTime: dosDateTimeNow - }; - - writeChunksToZip(writeLocalFileHeader(entry)); - - const sha1HashBuilder: crypto.Hash = crypto.createHash('sha1'); - let crc32: number = 0; - let uncompressedSize: number = 0; - let compressedSize: number = 0; - - using deflateIncremental: IIncrementalZlib | undefined = shouldCompress - ? createIncrementalZlib( - outputBuffer, - (chunk, lengthBytes) => { - writeChunkToZip(chunk, lengthBytes); - compressedSize += lengthBytes; - }, - 'deflate' - ) - : undefined; - - // Also capture content if we might need it (for compression decision or storing raw data). - // We'll accumulate into an array of buffers to avoid repeated concatenations for large files. - readInputInChunks((bytesInInputBuffer: number) => { - const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); - sha1HashBuilder.update(slice); - crc32 = crc32Builder(slice, crc32); - if (deflateIncremental) { - deflateIncremental.update(slice); - } else { - writeChunkToZip(slice, bytesInInputBuffer); - } - uncompressedSize += bytesInInputBuffer; - }); - - // finalize hashes, compression - deflateIncremental?.update(Buffer.alloc(0)); - crc32 = crc32 >>> 0; - const sha1Hash: string = sha1HashBuilder.digest('hex'); - - if (!shouldCompress) { - compressedSize = uncompressedSize; - } - - entry.size = uncompressedSize; - entry.compressedSize = compressedSize; - entry.crc32 = crc32; - entry.sha1Hash = sha1Hash; - - writeChunkToZip(writeDataDescriptor(entry)); - - terminal.writeVerboseLine( - `${relativePath} (sha1=${entry.sha1Hash}, crc32=${entry.crc32.toString(16)}, size=${ - entry.size - }, compressed=${entry.compressedSize}, method=${entry.compressionMethod}, compressed ${( - 100 - - (entry.compressedSize / entry.size) * 100 - ).toFixed(1)}%)` - ); - return entry; - } - - const entries: IFileEntry[] = []; - for (const relativePath of filePaths) { - entries.push(writeFileEntry(relativePath)); - } - - markEnd('pack.prepareEntries'); - terminal.writeLine(`Prepared ${entries.length} file entries`); - - markStart('pack.metadata.build'); - const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; - for (const entry of entries) { - metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; - } - - const metadataContent: string = JSON.stringify(metadata); - const metadataBuffer: Buffer = Buffer.from(metadataContent, 'utf8'); - terminal.writeDebugLine( - `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` - ); - - let metadataCompressionMethod: ZipMetaCompressionMethod = STORE_COMPRESSION; - let metadataData: Buffer = metadataBuffer; - let metadataCompressedSize: number = metadataBuffer.length; - if ((compression === 'deflate' || compression === 'auto') && metadataBuffer.length > 64) { - const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); - if (compressed.length < metadataBuffer.length) { - metadataCompressionMethod = DEFLATE_COMPRESSION; - metadataData = compressed; - metadataCompressedSize = compressed.length; - terminal.writeDebugLine( - `Metadata compressed (orig=${metadataBuffer.length}, compressed=${compressed.length})` - ); - } else { - terminal.writeDebugLine('Metadata compression skipped (not smaller)'); - } - } - - const metadataEntry: IFileEntry = { - filename: METADATA_FILENAME, - size: metadataBuffer.length, - compressedSize: metadataCompressedSize, - crc32: crc32Builder(metadataBuffer), - sha1Hash: calculateSHA1(metadataBuffer), - localHeaderOffset: currentOffset, - compressionMethod: metadataCompressionMethod, - dosDateTime: dosDateTimeNow - }; - - writeChunksToZip(writeLocalFileHeader(metadataEntry)); - writeChunkToZip(metadataData, metadataCompressedSize); - writeChunkToZip(writeDataDescriptor(metadataEntry)); - - entries.push(metadataEntry); - terminal.writeVerboseLine(`Total entries including metadata: ${entries.length}`); - - markEnd('pack.metadata.build'); - - markStart('pack.write.entries'); - const outputDir: string = path.dirname(archivePath); - fs.mkdirSync(outputDir, { recursive: true }); - - markEnd('pack.write.entries'); - - markStart('pack.write.centralDirectory'); - const centralDirOffset: number = currentOffset; - for (const entry of entries) { - writeChunksToZip(writeCentralDirectoryHeader(entry)); - } - const centralDirSize: number = currentOffset - centralDirOffset; - markEnd('pack.write.centralDirectory'); - - // Write end of central directory - markStart('pack.write.eocd'); - writeChunkToZip(writeEndOfCentralDirectory(centralDirOffset, centralDirSize, entries.length)); - terminal.writeDebugLine('EOCD record written'); - markEnd('pack.write.eocd'); - - markEnd('pack.total'); - const total: number = getDuration('pack.total'); - emitSummary('pack', terminal); - terminal.writeLine(`Successfully packed ${entries.length} files in ${formatDuration(total)}`); - return { filesPacked: entries.length, metadata }; -} - -function unpackZip({ archivePath, baseDir, targetDirectories, terminal }: IZipSyncOptions): IUnpackResult { - markStart('unpack.total'); - terminal.writeDebugLine('Starting unpackZip'); - - markStart('unpack.read.archive'); - const zipBuffer: Buffer = fs.readFileSync(archivePath); - terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); - markEnd('unpack.read.archive'); - - markStart('unpack.parse.centralDirectory'); - const zipTree: LookupByPath = new LookupByPath(); - const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); - - const centralDirBuffer: Buffer = zipBuffer.subarray( - endOfCentralDir.centralDirOffset, - endOfCentralDir.centralDirOffset + endOfCentralDir.centralDirSize - ); - terminal.writeDebugLine( - `Central directory slice size=${centralDirBuffer.length} (expected=${endOfCentralDir.centralDirSize})` - ); - - let metadataEntry: ICentralDirectoryHeaderParseResult | undefined; - const entries: Array = []; - let offset: number = 0; - - for (let i: number = 0; i < endOfCentralDir.totalCentralDirRecords; i++) { - const result: ICentralDirectoryHeaderParseResult = parseCentralDirectoryHeader(centralDirBuffer, offset); - zipTree.setItem(result.filename, true); - - if (result.filename === METADATA_FILENAME) { - if (metadataEntry) { - throw new Error('Multiple metadata entries found in archive'); - } - metadataEntry = result; - } - - entries.push(result); - offset = result.nextOffset; - terminal.writeDebugLine( - `Parsed central entry ${result.filename} (method=${result.header.compressionMethod}, compSize=${result.header.compressedSize})` - ); - } - markEnd('unpack.parse.centralDirectory'); - - if (!metadataEntry) { - throw new Error(`Metadata entry not found in archive`); - } - - markStart('unpack.read.metadata'); - terminal.writeDebugLine('Metadata entry found, reading'); - const metadataZipBuffer: Buffer = getFileFromZip(zipBuffer, metadataEntry); - - let metadataBuffer: Buffer; - if (metadataEntry.header.compressionMethod === STORE_COMPRESSION) { - metadataBuffer = metadataZipBuffer; - } else if (metadataEntry.header.compressionMethod === DEFLATE_COMPRESSION) { - metadataBuffer = zlib.inflateRawSync(metadataZipBuffer); - if (metadataBuffer.length !== metadataEntry.header.uncompressedSize) { - throw new Error( - `Metadata size mismatch (expected ${metadataEntry.header.uncompressedSize}, got ${metadataBuffer.length})` - ); - } - } else { - throw new Error(`Unsupported compression method for metadata: ${metadataEntry.header.compressionMethod}`); - } - - const metadata: IMetadata = JSON.parse(metadataBuffer.toString('utf8')) as IMetadata; - - if (metadata.version !== METADATA_VERSION) { - throw new Error(`Unsupported metadata version: ${metadata.version}`); - } - - terminal.writeDebugLine( - `Metadata (version=${metadata.version}) parsed (fileCount=${Object.keys(metadata.files).length}, rawSize=${metadataBuffer.length})` - ); - markEnd('unpack.read.metadata'); - - terminal.writeLine(`Found ${entries.length} files in archive`); - - for (const targetDirectory of targetDirectories) { - fs.mkdirSync(targetDirectory, { recursive: true }); - terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); - } - - let extractedCount: number = 0; - let skippedCount: number = 0; - let deletedFilesCount: number = 0; - let deletedOtherCount: number = 0; - let deletedFoldersCount: number = 0; - let scanCount: number = 0; - - const dirsToCleanup: string[] = []; - - markStart('unpack.scan.existing'); - const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ - dir, - depth: 0, - node: zipTree.getNodeAtPrefix(path.relative(baseDir, dir)) - })); - - while (queue.length) { - const { dir: currentDir, depth, node } = queue.shift()!; - terminal.writeDebugLine(`Enumerating directory: ${currentDir}`); - - const padding: string = depth === 0 ? '' : '-↳'.repeat(depth); - - let items: fs.Dirent[]; - try { - items = fs.readdirSync(currentDir, { withFileTypes: true }); - } catch (e) { - terminal.writeWarningLine(`Failed to read directory: ${currentDir}`); - continue; - } - - for (const item of items) { - scanCount++; - // check if exists in zipTree, if not delete - const relativePath: string = path - .relative(baseDir, path.join(currentDir, item.name)) - .replace(/\\/g, '/'); - - const childNode: IReadonlyPathTrieNode | undefined = node?.children?.get(item.name); - - if (item.isFile()) { - terminal.writeVerboseLine(`${padding}${item.name}`); - if (!childNode?.value) { - terminal.writeDebugLine(`Deleting file: ${relativePath}`); - unlinkSync(relativePath); - deletedFilesCount++; - } - } else if (item.isDirectory()) { - terminal.writeVerboseLine(`${padding}${item.name}/`); - queue.push({ dir: relativePath, depth: depth + 1, node: childNode }); - if (!childNode || childNode.value) { - dirsToCleanup.push(relativePath); - } - } else { - terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); - unlinkSync(relativePath); - deletedOtherCount++; - } - } - } - - for (const dir of dirsToCleanup) { - // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. - try { - rmdirSync(dir); - terminal.writeDebugLine(`Deleted empty directory: ${dir}`); - deletedFoldersCount++; - } catch (e) { - // Probably not empty - terminal.writeDebugLine(`Directory not empty, skipping: ${dir}`); - } - } - - terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); - markEnd('unpack.scan.existing'); - - markStart('unpack.extract.loop'); - const bufferSize: number = 1 << 25; // 32 MiB - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - - const dirsCreated: Set = new Set(); - - for (const entry of entries) { - if (entry.filename === METADATA_FILENAME) { - continue; - } - - const targetPath: string = path.join(baseDir, entry.filename); - const targetDir: string = path.dirname(targetPath); - if (!dirsCreated.has(targetDir)) { - fs.mkdirSync(targetDir, { recursive: true }); - dirsCreated.add(targetDir); - } - - let shouldExtract: boolean = true; - if (metadata) { - const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; - - if (metadataFile) { - try { - using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); - const existingHash: string | false = computeFileHash(existingFile.fd); - if (existingHash === metadataFile.sha1Hash) { - shouldExtract = false; - skippedCount++; - terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); - } - } catch (e) { - if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { - // File does not exist, will extract - } else { - throw e; - } - } - } - } - - if (shouldExtract) { - terminal.writeDebugLine(`Extracting file: ${entry.filename}`); - const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); - let fileData: Buffer; - using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); - if (entry.header.compressionMethod === STORE_COMPRESSION) { - fileData = fileZipBuffer; - let writeOffset: number = 0; - while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { - const written: number = fs.writeSync( - fileHandle.fd, - fileData, - writeOffset, - fileData.length - writeOffset - ); - writeOffset += written; - } - } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { - using inflateIncremental: IIncrementalZlib = createIncrementalZlib( - outputBuffer, - (chunk, lengthBytes) => { - let writeOffset: number = 0; - while (lengthBytes > 0 && writeOffset < chunk.byteLength) { - const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); - lengthBytes -= written; - writeOffset += written; - } - }, - 'inflate' - ); - inflateIncremental.update(fileZipBuffer); - inflateIncremental.update(Buffer.alloc(0)); - } else { - throw new Error( - `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` - ); - } - - // If data descriptor was used we rely on central directory values already consumed. - extractedCount++; - } - } - markEnd('unpack.extract.loop'); - - markEnd('unpack.total'); - const unpackTotal: number = getDuration('unpack.total'); - terminal.writeLine( - `Extraction complete: ${extractedCount} extracted, ${skippedCount} skipped, ${deletedFilesCount} deleted, ${deletedFoldersCount} folders deleted, ${deletedOtherCount} other entries deleted in ${formatDuration( - unpackTotal - )}` - ); - emitSummary('unpack', terminal); - terminal.writeDebugLine('unpackZip finished'); - return { - metadata, - filesExtracted: extractedCount, - filesSkipped: skippedCount, - filesDeleted: deletedFilesCount, - foldersDeleted: deletedFoldersCount, - otherEntriesDeleted: deletedOtherCount - }; -} - -/** - * Packs (creates) or unpacks (synchronizes) a ZIP archive. - * - * @public - */ -export function zipSync( - options: T -): T['mode'] extends 'pack' ? IPackResult : IUnpackResult { - const { - terminal, - mode, - archivePath, - targetDirectories: rawTargetDirectories, - baseDir: rawBaseDir - } = options; - const baseDir: string = (options.baseDir = path.resolve(rawBaseDir)); - options.targetDirectories = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); - - if (mode === 'pack') { - terminal.writeLine(`Packing to ${archivePath} from ${rawTargetDirectories.join(', ')}`); - return packZip(options) as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; - } else { - terminal.writeLine(`Unpacking to ${rawTargetDirectories.join(', ')} from ${archivePath}`); - return unpackZip(options) as T['mode'] extends 'pack' ? IPackResult : IUnpackResult; - } -} diff --git a/apps/zipsync/src/zipSyncUtils.ts b/apps/zipsync/src/zipSyncUtils.ts new file mode 100644 index 00000000000..99b2e36219a --- /dev/null +++ b/apps/zipsync/src/zipSyncUtils.ts @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. +// See LICENSE in the project root for license information. + +import type { IReadonlyPathTrieNode } from '@rushstack/lookup-by-path/lib/LookupByPath'; + +export const METADATA_FILENAME: string = '__zipsync_metadata__.json'; +export const METADATA_VERSION: string = '1.0'; + +export interface IDirQueueItem { + dir: string; + depth: number; + node?: IReadonlyPathTrieNode | undefined; +} + +export interface IMetadataFileRecord { + size: number; + sha1Hash: string; +} + +export interface IMetadata { + version: string; + files: Record; +} + +export type IZipSyncMode = 'pack' | 'unpack'; + +export type ZipSyncOptionCompression = 'store' | 'deflate' | 'auto'; diff --git a/apps/zipsync/src/zipSyncWorkerAsync.test.ts b/apps/zipsync/src/zipSyncWorkerAsync.test.ts deleted file mode 100644 index ea5c9148eb4..00000000000 --- a/apps/zipsync/src/zipSyncWorkerAsync.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. -// See LICENSE in the project root for license information. - -import { tmpdir } from 'os'; -import * as path from 'path'; -import * as fs from 'fs'; -import * as crypto from 'crypto'; -import { zipSyncWorkerAsync } from './zipSyncWorkerAsync'; - -function getTempDir(): string { - const randomId = crypto.randomUUID(); - const tempDir = path.join(tmpdir(), `zipsync-test-${randomId}`); - fs.mkdirSync(tempDir); - return tempDir; -} - -function getDemoDataDirectoryDisposable(): { - targetDirectories: string[]; - baseDir: string; - [Symbol.dispose](): void; -} { - const baseDir: string = getTempDir(); - - const targetDirectories = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( - (folderName) => { - const dataDir: string = path.join(baseDir, folderName); - fs.mkdirSync(dataDir, { recursive: true }); - const subdir: string = path.join(dataDir, 'subdir'); - fs.mkdirSync(subdir); - for (let i: number = 0; i < 5; ++i) { - const filePath: string = path.join(subdir, `file-${i}.txt`); - fs.writeFileSync(filePath, `This is file ${i} in ${folderName}/subdir\n`, { encoding: 'utf-8' }); - } - return folderName; - } - ); - - return { - targetDirectories, - baseDir, - [Symbol.dispose]() { - fs.rmSync(baseDir, { recursive: true, force: true }); - } - }; -} - -describe('zipSyncWorkerAsync tests', () => { - it('basic pack test', async () => { - const demoDataDisposable = getDemoDataDirectoryDisposable(); - const { targetDirectories, baseDir } = demoDataDisposable; - - const archivePath: string = path.join(baseDir, 'archive.zip'); - const { zipSyncReturn: packResult } = await zipSyncWorkerAsync({ - mode: 'pack', - compression: 'deflate', - baseDir, - targetDirectories, - archivePath - }); - - expect(packResult).toMatchSnapshot(); - - const unpackBaseDir = getTempDir(); - - const { zipSyncReturn: unpackResult } = await zipSyncWorkerAsync({ - mode: 'unpack', - archivePath, - baseDir: unpackBaseDir, - targetDirectories, - compression: 'deflate' - }); - - expect(unpackResult).toMatchSnapshot(); - - // Verify files were extracted - for (const targetDirectory of targetDirectories) { - const sourceDir: string = path.join(baseDir, targetDirectory); - for (let i: number = 0; i < 5; ++i) { - const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); - const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); - expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( - fs.readFileSync(sourceFile, { encoding: 'utf-8' }) - ); - } - } - - demoDataDisposable[Symbol.dispose](); - }); -}); diff --git a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts index 7bbd0f0d7ac..31d4a2825cc 100644 --- a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts +++ b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts @@ -1,12 +1,14 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. +import * as path from 'path'; import * as crypto from 'crypto'; import * as fs from 'fs'; import { FileSystem, type FolderItem, InternalError, Async } from '@rushstack/node-core-library'; import type { ITerminal } from '@rushstack/terminal'; -import { zipSyncWorkerAsync } from '@rushstack/zipsync/lib/zipSyncWorkerAsync'; +import { packWorkerAsync, type IZipSyncPackWorkerResult } from '@rushstack/zipsync/lib/packWorkerAsync'; +import { unpackWorkerAsync, type IZipSyncUnpackWorkerResult } from '@rushstack/zipsync/lib/unpackWorkerAsync'; import type { RushConfigurationProject } from '../../api/RushConfigurationProject'; import type { BuildCacheConfiguration } from '../../api/BuildCacheConfiguration'; @@ -167,15 +169,28 @@ export class OperationBuildCache { let restoreSuccess: boolean = false; try { + const logFilePath: string = this._getLogFilePath(cacheId, 'unpack'); + let unpackWorkerResult: IZipSyncUnpackWorkerResult; + try { + unpackWorkerResult = await unpackWorkerAsync({ + archivePath: localCacheEntryPath!, + targetDirectories: this._projectOutputFolderNames, + baseDir: projectFolderPath + }); + } catch (e) { + const { zipSyncLogs } = e as { zipSyncLogs: string | undefined }; + if (zipSyncLogs) { + fs.writeFileSync(logFilePath, zipSyncLogs); + terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); + } + throw e; + } const { - zipSyncReturn: { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted } - } = await zipSyncWorkerAsync({ - mode: 'unpack', - compression: 'auto', - archivePath: localCacheEntryPath!, - targetDirectories: this._projectOutputFolderNames, - baseDir: projectFolderPath - }); + zipSyncReturn: { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted }, + zipSyncLogs + } = unpackWorkerResult; + fs.writeFileSync(logFilePath, zipSyncLogs); + terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); terminal.writeVerboseLine(`Restored ${filesExtracted + filesSkipped} files from cache.`); if (filesExtracted > 0) { terminal.writeVerboseLine(`Extracted ${filesExtracted} files to target folders.`); @@ -240,15 +255,29 @@ export class OperationBuildCache { terminal.writeVerboseLine(`Using zipsync to create cache archive.`); try { + const logFilePath: string = this._getLogFilePath(cacheId, 'pack'); + let packWorkerResult: IZipSyncPackWorkerResult; + try { + packWorkerResult = await packWorkerAsync({ + compression: 'auto', + archivePath: tempLocalCacheEntryPath, + targetDirectories: this._projectOutputFolderNames, + baseDir: this._project.projectFolder + }); + } catch (e) { + const { zipSyncLogs } = e as { zipSyncLogs: string | undefined }; + if (zipSyncLogs) { + fs.writeFileSync(logFilePath, zipSyncLogs); + terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); + } + throw e; + } const { - zipSyncReturn: { filesPacked } - } = await zipSyncWorkerAsync({ - mode: 'pack', - compression: 'auto', - archivePath: tempLocalCacheEntryPath, - targetDirectories: this._projectOutputFolderNames, - baseDir: this._project.projectFolder - }); + zipSyncReturn: { filesPacked }, + zipSyncLogs + } = packWorkerResult; + fs.writeFileSync(logFilePath, zipSyncLogs); + terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); terminal.writeVerboseLine(`Packed ${filesPacked} files for caching.`); // Move after the archive is finished so that if the process is interrupted we aren't left with an invalid file @@ -389,6 +418,10 @@ export class OperationBuildCache { }; } + private _getLogFilePath(cacheId: string, mode: 'pack' | 'unpack'): string { + return path.join(this._project.projectRushTempFolder, `${cacheId}.${mode}.log`); + } + private static _getCacheId(options: IProjectBuildCacheOptions): string | undefined { const { buildCacheConfiguration, From b4352ff49f5c4fa680eb2f74c173b4163e955b38 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:04:21 -0700 Subject: [PATCH 08/20] fixup benchmark --- apps/zipsync/src/benchmark.test.ts | 149 ++++++++++++++--------------- 1 file changed, 71 insertions(+), 78 deletions(-) diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index b0f1c819ff9..1e612190fa3 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -61,14 +61,14 @@ interface IMeasurement { sizeBytes?: number; } const measurements: IMeasurement[] = []; -// Allow specifying iterations via env BENCH_ITERATIONS or command arg --iterations N (jest passes args; we scan process.argv) +// Allow specifying iterations via env BENCH_ITERATIONS. Defaults to 0 to avoid running the benchmark unless explicitly enabled. function detectIterations(): number { - let iter = 1; + let iter = 0; const envParsed: number = parseInt(process.env.BENCH_ITERATIONS || '', 10); if (!isNaN(envParsed) && envParsed > 0) { iter = envParsed; } - return iter || 5; + return iter; } const ITERATIONS: number = detectIterations(); @@ -223,16 +223,16 @@ function benchZipSyncScenario( } // the benchmarks are skipped by default because they require external tools (tar, zip) to be installed -describe.skip(`archive benchmarks (iterations=${ITERATIONS})`, () => { - it('tar', () => { +describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { + it('gtar', () => { if (!isTarAvailable()) { console.log('Skipping tar test because tar is not available'); return; } if (!tempDir) throw new Error('Temp directory is not set up.'); - bench('tar', { - pack: ({ archive, demoDir }) => execSync(`tar -cf "${archive}" -C "${demoDir}" .`), - unpack: ({ archive, unpackDir }) => execSync(`tar -xf "${archive}" -C "${unpackDir}"`), + bench('gtar', { + pack: ({ archive, demoDir }) => execSync(`gtar -cf "${archive}" -C "${demoDir}" .`), + unpack: ({ archive, unpackDir }) => execSync(`gtar -xf "${archive}" -C "${unpackDir}"`), archive: path.join(tempDir, 'archive.tar'), unpackDir: path.join(tempDir, 'unpacked-tar'), populateUnpackDir: 'full', @@ -388,87 +388,80 @@ afterAll(() => { ] } ]; - interface ITableRow { - group: string; - isBaseline: boolean; - s: IStats; - deltaMeanPct: number; - } - const tableRows: ITableRow[] = []; - for (const g of groupsDef) { - const baselinePack: IStats | undefined = stats.find((s) => s.kind === g.baseline && s.phase === 'pack'); - const baselineUnpack: IStats | undefined = stats.find( - (s) => s.kind === g.baseline && s.phase === 'unpack' - ); - for (const member of g.members) { - for (const phase of ['pack', 'unpack'] as const) { - const s = stats.find((st) => st.kind === member && st.phase === phase); - if (!s) continue; - const baseline = phase === 'pack' ? baselinePack : baselineUnpack; - const deltaMeanPct = baseline ? ((s.mean - baseline.mean) / baseline.mean) * 100 : 0; - tableRows.push({ group: g.title, isBaseline: member === g.baseline, s, deltaMeanPct }); + // Build per-group markdown tables (no Group column) for each phase + function buildGroupTable( + group: { title: string; baseline: string; members: string[] }, + phase: 'pack' | 'unpack' + ): string[] { + // Human readable bytes formatter + function formatBytes(bytes: number): string { + const units = ['B', 'KB', 'MB', 'GB']; + let value = bytes; + let i = 0; + while (value >= 1024 && i < units.length - 1) { + value /= 1024; + i++; } + const formatted = value >= 100 ? value.toFixed(0) : value >= 10 ? value.toFixed(1) : value.toFixed(2); + return `${formatted} ${units[i]}`; } - } - - function buildTable(rowsData: ITableRow[], phaseFilter: 'pack' | 'unpack'): string[] { const headers = - phaseFilter === 'pack' - ? [ - 'Group', - 'Archive', - 'iter', - 'min(ms)', - 'mean(ms)', - 'Δmean%', - 'p95(ms)', - 'max(ms)', - 'std(ms)', - 'size(bytes)' - ] - : ['Group', 'Archive', 'iter', 'min(ms)', 'mean(ms)', 'Δmean%', 'p95(ms)', 'max(ms)', 'std(ms)']; - const rows: string[][] = [headers]; - for (const row of rowsData.filter((r) => r.s.phase === phaseFilter)) { - const baseCols = [ - row.isBaseline ? row.group : '', - row.s.kind + (row.isBaseline ? '*' : ''), - String(row.s.n), - row.s.min.toFixed(2), - row.s.mean.toFixed(2), - (row.deltaMeanPct >= 0 ? '+' : '') + row.deltaMeanPct.toFixed(1), - row.s.p95.toFixed(2), - row.s.max.toFixed(2), - row.s.std.toFixed(2) + phase === 'pack' + ? ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed×', 'size'] + : ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed×']; + const lines: string[] = []; + lines.push('| ' + headers.join(' | ') + ' |'); + const align: string[] = headers.map((header, idx) => (idx === 0 ? '---' : '---:')); + lines.push('| ' + align.join(' | ') + ' |'); + const baselineStats: IStats | undefined = stats.find( + (s) => s.kind === group.baseline && s.phase === phase + ); + for (const member of group.members) { + const s: IStats | undefined = stats.find((st) => st.kind === member && st.phase === phase); + if (!s) continue; + const isBaseline: boolean = member === group.baseline; + const speedFactor: number = baselineStats ? baselineStats.mean / s.mean : 1; + const cols: string[] = [ + (isBaseline ? '**' : '') + s.kind + (isBaseline ? '**' : ''), + s.min.toFixed(2), + s.mean.toFixed(2), + s.p95.toFixed(2), + s.max.toFixed(2), + s.std.toFixed(2), + speedFactor.toFixed(2) + 'x' ]; - if (phaseFilter === 'pack') { - baseCols.push(row.s.sizeMean !== undefined ? Math.round(row.s.sizeMean).toString() : ''); + if (phase === 'pack') { + cols.push(s.sizeMean !== undefined ? formatBytes(Math.round(s.sizeMean)) : ''); } - rows.push(baseCols); + lines.push('| ' + cols.join(' | ') + ' |'); } - const colWidths: number[] = headers.map((header, i) => - rows.reduce((w, r) => Math.max(w, r[i].length), 0) - ); - return rows.map((r) => r.map((c, i) => c.padStart(colWidths[i], ' ')).join(' ')); + return lines; } - const packTable: string[] = buildTable(tableRows, 'pack'); - const unpackTable: string[] = buildTable(tableRows, 'unpack'); const outputLines: string[] = []; - outputLines.push('\nBenchmark Results (iterations=' + ITERATIONS + '):'); - outputLines.push('PACK PHASE:'); - outputLines.push(packTable[0]); - outputLines.push('-'.repeat(packTable[0].length)); - for (let i = 1; i < packTable.length; i++) outputLines.push(packTable[i]); - outputLines.push('* baseline (pack)'); + outputLines.push('# Benchmark Results'); + outputLines.push(''); + outputLines.push( + 'This document contains performance measurements for packing and unpacking a synthetic dataset using traditional archive tools (tar, zip) and various zipsync modes. The dataset consists of two directory trees (subdir1, subdir2) populated with text files. Each scenario was executed multiple iterations; metrics shown are aggregated timing statistics. The speed× column shows how many times faster a scenario is compared to the baseline in that group (values >1 = faster, <1 = slower). Baseline rows are shown in bold.' + ); + outputLines.push(''); + outputLines.push(`Iterations: ${ITERATIONS}`); outputLines.push(''); - outputLines.push('UNPACK PHASE:'); - outputLines.push(unpackTable[0]); - outputLines.push('-'.repeat(unpackTable[0].length)); - for (let i = 1; i < unpackTable.length; i++) outputLines.push(unpackTable[i]); - outputLines.push('* baseline (unpack)'); + for (const g of groupsDef) { + outputLines.push(`## ${g.title}`); + outputLines.push(''); + outputLines.push('### Pack Phase'); + outputLines.push(''); + outputLines.push(...buildGroupTable(g, 'pack')); + outputLines.push(''); + outputLines.push('### Unpack Phase'); + outputLines.push(''); + outputLines.push(...buildGroupTable(g, 'unpack')); + outputLines.push(''); + } const resultText = outputLines.join('\n'); console.log(resultText); try { - const resultFile = path.join(__dirname, '..', 'temp', `benchmark-results.txt`); + const resultFile = path.join(__dirname, '..', 'temp', `benchmark-results.md`); fs.writeFileSync(resultFile, resultText, { encoding: 'utf-8' }); console.log(`Benchmark results written to: ${resultFile}`); } catch (e) { @@ -488,7 +481,7 @@ function isZipAvailable(): boolean { } function isTarAvailable(): boolean { try { - const checkTar = process.platform === 'win32' ? 'where tar' : 'command -v tar'; + const checkTar = process.platform === 'win32' ? 'where gtar' : 'command -v gtar'; execSync(checkTar, { stdio: 'ignore' }); return true; } catch { From e0dc2527a26d8280468b1eb89335e6d6f548ba33 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:14:55 -0700 Subject: [PATCH 09/20] fix unlink --- apps/zipsync/src/unpack.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index 3e3672e1ed5..1cc227fdff8 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -8,7 +8,7 @@ import * as zlib from 'node:zlib'; import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; import type { ITerminal } from '@rushstack/terminal'; -import { getDisposableFileHandle, type IDisposableFileHandle } from './fs'; +import { getDisposableFileHandle, unlinkSync, type IDisposableFileHandle } from './fs'; import { type IIncrementalZlib, createIncrementalZlib } from './compress'; import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; import { @@ -191,7 +191,7 @@ export function unpack({ terminal.writeVerboseLine(`${padding}${item.name}`); if (!childNode?.value) { terminal.writeDebugLine(`Deleting file: ${relativePath}`); - fs.unlinkSync(relativePath); + unlinkSync(relativePath); deletedFilesCount++; } } else if (item.isDirectory()) { @@ -202,7 +202,7 @@ export function unpack({ } } else { terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); - fs.unlinkSync(relativePath); + unlinkSync(relativePath); deletedOtherCount++; } } From c614e31f7f9fb26d98023ac855264ab918c6c741 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:20:56 -0700 Subject: [PATCH 10/20] fixup benchmark --- apps/zipsync/src/benchmark.test.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index 1e612190fa3..5fe18858534 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -224,15 +224,15 @@ function benchZipSyncScenario( // the benchmarks are skipped by default because they require external tools (tar, zip) to be installed describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { - it('gtar', () => { + it('tar', () => { if (!isTarAvailable()) { console.log('Skipping tar test because tar is not available'); return; } if (!tempDir) throw new Error('Temp directory is not set up.'); - bench('gtar', { - pack: ({ archive, demoDir }) => execSync(`gtar -cf "${archive}" -C "${demoDir}" .`), - unpack: ({ archive, unpackDir }) => execSync(`gtar -xf "${archive}" -C "${unpackDir}"`), + bench('tar', { + pack: ({ archive, demoDir }) => execSync(`tar -cf "${archive}" -C "${demoDir}" .`), + unpack: ({ archive, unpackDir }) => execSync(`tar -xf "${archive}" -C "${unpackDir}"`), archive: path.join(tempDir, 'archive.tar'), unpackDir: path.join(tempDir, 'unpacked-tar'), populateUnpackDir: 'full', @@ -481,7 +481,7 @@ function isZipAvailable(): boolean { } function isTarAvailable(): boolean { try { - const checkTar = process.platform === 'win32' ? 'where gtar' : 'command -v gtar'; + const checkTar = process.platform === 'win32' ? 'where tar' : 'command -v tar'; execSync(checkTar, { stdio: 'ignore' }); return true; } catch { From 9eac3075c69dc5d77ff10e50cd57490d14d7136b Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:49:00 -0700 Subject: [PATCH 11/20] fixup benchmark --- apps/zipsync/src/benchmark.test.ts | 79 ++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index 5fe18858534..73f8ca48fa7 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -3,7 +3,7 @@ /* eslint-disable no-console */ import { execSync } from 'child_process'; -import { tmpdir } from 'os'; +import { tmpdir, cpus, platform, release, arch, totalmem } from 'os'; import * as path from 'path'; import * as fs from 'fs'; import { createHash, randomUUID } from 'crypto'; @@ -14,6 +14,7 @@ import { pack } from './pack'; import { unpack } from './unpack'; // create a tempdir and setup dummy files there for benchmarking +const NUM_FILES = 1000; // number of files per subdir let tempDir: string; const runId = randomUUID(); async function setupDemoDataAsync(): Promise { @@ -26,7 +27,7 @@ async function setupDemoDataAsync(): Promise { const demoSubDir2 = path.join(tempDir, 'demo-data', 'subdir2'); fs.mkdirSync(demoSubDir2, { recursive: true }); - for (let i = 0; i < 1000; i++) { + for (let i = 0; i < NUM_FILES; i++) { const filePath1 = path.join(demoSubDir1, `file${i}.txt`); fs.writeFileSync(filePath1, `This is file ${i} in subdir1\n`.repeat(1000), { encoding: 'utf-8' }); const filePath2 = path.join(demoSubDir2, `file${i}.txt`); @@ -138,13 +139,11 @@ function bench(kind: string, commands: IBenchCommands): void { for (let i = 0; i < ITERATIONS; i++) { // Ensure previous artifacts removed if (fs.existsSync(commands.archive)) fs.rmSync(commands.archive, { force: true }); - if (fs.existsSync(commands.unpackDir)) fs.rmSync(commands.unpackDir, { recursive: true, force: true }); - fs.mkdirSync(commands.unpackDir, { recursive: true }); if (commands.populateUnpackDir === 'full') { fs.cpSync(srcDir, commands.unpackDir, { recursive: true }); } else if (commands.populateUnpackDir === 'partial') { // Copy half the files - for (let j = 0; j < 500; j++) { + for (let j = 0; j < NUM_FILES / 2; j++) { const file1 = path.join(srcDir, 'subdir1', `file${j}.txt`); const file2 = path.join(srcDir, 'subdir2', `file${j}.txt`); const dest1 = path.join(commands.unpackDir, 'subdir1', `file${j}.txt`); @@ -218,7 +217,7 @@ function benchZipSyncScenario( archive: path.join(tempDir, `archive-zipsync-${compression}.zip`), unpackDir: path.join(tempDir, `unpacked-zipsync-${compression}-${existingFiles}`), populateUnpackDir: existingFiles === 'all' ? 'full' : existingFiles === 'partial' ? 'partial' : undefined, - cleanBeforeUnpack: false + cleanBeforeUnpack: false // cleaning is handled internally by zipsync }); } @@ -239,13 +238,13 @@ describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { cleanBeforeUnpack: true }); }); - it('tar.gz', () => { + it('tar-gz', () => { if (!isTarAvailable()) { console.log('Skipping tar test because tar is not available'); return; } if (!tempDir) throw new Error('Temp directory is not set up.'); - bench('tar.gz', { + bench('tar-gz', { pack: ({ archive, demoDir }) => execSync(`tar -czf "${archive}" -C "${demoDir}" .`), unpack: ({ archive, unpackDir }) => execSync(`tar -xzf "${archive}" -C "${unpackDir}"`), archive: path.join(tempDir, 'archive.tar.gz'), @@ -363,21 +362,10 @@ afterAll(() => { // Organize into groups const groupsDef: Array<{ title: string; baseline: string; members: string[] }> = [ { - title: 'Uncompressed (baseline: tar)', - baseline: 'tar', - members: [ - 'tar', - 'zip-store', - 'zipsync-store-all-existing', - 'zipsync-store-none-existing', - 'zipsync-store-partial-existing' - ] - }, - { - title: 'Compressed (baseline: tar.gz)', - baseline: 'tar.gz', + title: 'Compressed (baseline: tar-gz)', + baseline: 'tar-gz', members: [ - 'tar.gz', + 'tar-gz', 'zip-deflate', 'zipsync-deflate-all-existing', 'zipsync-deflate-none-existing', @@ -386,6 +374,17 @@ afterAll(() => { 'zipsync-auto-none-existing', 'zipsync-auto-partial-existing' ] + }, + { + title: 'Uncompressed (baseline: tar)', + baseline: 'tar', + members: [ + 'tar', + 'zip-store', + 'zipsync-store-all-existing', + 'zipsync-store-none-existing', + 'zipsync-store-partial-existing' + ] } ]; // Build per-group markdown tables (no Group column) for each phase @@ -407,8 +406,8 @@ afterAll(() => { } const headers = phase === 'pack' - ? ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed×', 'size'] - : ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed×']; + ? ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed (x)', 'size'] + : ['Archive', 'min (ms)', 'mean (ms)', 'p95 (ms)', 'max (ms)', 'std (ms)', 'speed (x)']; const lines: string[] = []; lines.push('| ' + headers.join(' | ') + ' |'); const align: string[] = headers.map((header, idx) => (idx === 0 ? '---' : '---:')); @@ -441,9 +440,39 @@ afterAll(() => { outputLines.push('# Benchmark Results'); outputLines.push(''); outputLines.push( - 'This document contains performance measurements for packing and unpacking a synthetic dataset using traditional archive tools (tar, zip) and various zipsync modes. The dataset consists of two directory trees (subdir1, subdir2) populated with text files. Each scenario was executed multiple iterations; metrics shown are aggregated timing statistics. The speed× column shows how many times faster a scenario is compared to the baseline in that group (values >1 = faster, <1 = slower). Baseline rows are shown in bold.' + ` +This document contains performance measurements for packing and unpacking a synthetic dataset using tar, zip, and zipsync. + +The dataset consists of two directory trees (subdir1, subdir2) populated with ${NUM_FILES} text files each. + +zipsync scenarios +* "all-existing": unpack directory is fully populated with existing files +* "none-existing": unpack directory is empty +* "partial-existing": unpack directory contains half of the files + +zip and tar scenarios clean the unpack directory before unpacking. This time is included in the measurements because +zipsync internally handles cleaning as part of its operation. +` ); outputLines.push(''); + // System info + try { + const cpuList = cpus(); + const cpuModelRaw: string | undefined = cpuList[0]?.model; + const cpuModel: string = cpuModelRaw ? cpuModelRaw.replace(/\|/g, ' ').trim() : 'unknown'; + const logicalCores: number = cpuList.length || 0; + const memGB: string = (totalmem() / 1024 ** 3).toFixed(1); + outputLines.push('**System**'); + outputLines.push(''); + outputLines.push('| OS | Arch | Node | CPU | Logical Cores | Memory |'); + outputLines.push('| --- | --- | --- | --- | ---: | --- |'); + outputLines.push( + `| ${platform()} ${release()} | ${arch()} | ${process.version} | ${cpuModel} | ${logicalCores} | ${memGB} GB |` + ); + outputLines.push(''); + } catch { + // ignore system info errors + } outputLines.push(`Iterations: ${ITERATIONS}`); outputLines.push(''); for (const g of groupsDef) { From 1a709e3e5a2bf6a541ae3828ed824bf2a0f08747 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:03:21 -0700 Subject: [PATCH 12/20] pr feedback --- apps/zipsync/src/unpack.ts | 156 ++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 73 deletions(-) diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index 1cc227fdff8..f2a7c6b1dbe 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -8,7 +8,7 @@ import * as zlib from 'node:zlib'; import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-path/lib/LookupByPath'; import type { ITerminal } from '@rushstack/terminal'; -import { getDisposableFileHandle, unlinkSync, type IDisposableFileHandle } from './fs'; +import { getDisposableFileHandle, rmdirSync, unlinkSync, type IDisposableFileHandle } from './fs'; import { type IIncrementalZlib, createIncrementalZlib } from './compress'; import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; import { @@ -55,6 +55,80 @@ export interface IZipSyncUnpackResult { otherEntriesDeleted: number; } +const bufferSize: number = 1 << 25; // 32 MiB +const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); +function extractFileFromZip( + terminal: ITerminal, + targetPath: string, + zipBuffer: Buffer, + entry: ICentralDirectoryHeaderParseResult +): void { + terminal.writeDebugLine(`Extracting file: ${entry.filename}`); + const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); + let fileData: Buffer; + using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); + if (entry.header.compressionMethod === STORE_COMPRESSION) { + fileData = fileZipBuffer; + let writeOffset: number = 0; + while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { + const written: number = fs.writeSync( + fileHandle.fd, + fileData, + writeOffset, + fileData.length - writeOffset + ); + writeOffset += written; + } + } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { + using inflateIncremental: IIncrementalZlib = createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + let writeOffset: number = 0; + while (lengthBytes > 0 && writeOffset < chunk.byteLength) { + const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); + lengthBytes -= written; + writeOffset += written; + } + }, + 'inflate' + ); + inflateIncremental.update(fileZipBuffer); + inflateIncremental.update(Buffer.alloc(0)); + } else { + throw new Error( + `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + ); + } +} + +function shouldExtract( + terminal: ITerminal, + targetPath: string, + entry: ICentralDirectoryHeaderParseResult, + metadata: IMetadata | undefined +): boolean { + if (metadata) { + const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; + + if (metadataFile) { + try { + using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); + const existingHash: string | false = computeFileHash(existingFile.fd); + if (existingHash === metadataFile.sha1Hash) { + return false; + } + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT') { + terminal.writeDebugLine(`File does not exist, will extract: ${entry.filename}`); + } else { + throw e; + } + } + } + } + return true; +} + export function unpack({ archivePath, targetDirectories: rawTargetDirectories, @@ -210,22 +284,15 @@ export function unpack({ for (const dir of dirsToCleanup) { // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. - try { - fs.rmdirSync(dir); - terminal.writeDebugLine(`Deleted empty directory: ${dir}`); - deletedFoldersCount++; - } catch (e) { - // Probably not empty - terminal.writeDebugLine(`Directory not empty, skipping: ${dir}`); - } + rmdirSync(dir); + terminal.writeDebugLine(`Deleted empty directory: ${dir}`); + deletedFoldersCount++; } terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); markEnd('unpack.scan.existing'); markStart('unpack.extract.loop'); - const bufferSize: number = 1 << 25; // 32 MiB - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); const dirsCreated: Set = new Set(); @@ -241,69 +308,12 @@ export function unpack({ dirsCreated.add(targetDir); } - let shouldExtract: boolean = true; - if (metadata) { - const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; - - if (metadataFile) { - try { - using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); - const existingHash: string | false = computeFileHash(existingFile.fd); - if (existingHash === metadataFile.sha1Hash) { - shouldExtract = false; - skippedCount++; - terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); - } - } catch (e) { - if ((e as NodeJS.ErrnoException).code === 'ENOENT') { - // File does not exist, will extract - } else { - throw e; - } - } - } - } - - if (shouldExtract) { - terminal.writeDebugLine(`Extracting file: ${entry.filename}`); - const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); - let fileData: Buffer; - using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); - if (entry.header.compressionMethod === STORE_COMPRESSION) { - fileData = fileZipBuffer; - let writeOffset: number = 0; - while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { - const written: number = fs.writeSync( - fileHandle.fd, - fileData, - writeOffset, - fileData.length - writeOffset - ); - writeOffset += written; - } - } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { - using inflateIncremental: IIncrementalZlib = createIncrementalZlib( - outputBuffer, - (chunk, lengthBytes) => { - let writeOffset: number = 0; - while (lengthBytes > 0 && writeOffset < chunk.byteLength) { - const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); - lengthBytes -= written; - writeOffset += written; - } - }, - 'inflate' - ); - inflateIncremental.update(fileZipBuffer); - inflateIncremental.update(Buffer.alloc(0)); - } else { - throw new Error( - `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` - ); - } - - // If data descriptor was used we rely on central directory values already consumed. + if (shouldExtract(terminal, targetPath, entry, metadata)) { + extractFileFromZip(terminal, targetPath, zipBuffer, entry); extractedCount++; + } else { + skippedCount++; + terminal.writeDebugLine(`Skip unchanged file: ${entry.filename}`); } } markEnd('unpack.extract.loop'); From 8c33fbd00f40fcf1a1c2a65a78adde38bed59aa0 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:11:10 -0700 Subject: [PATCH 13/20] fixup benchmark --- apps/zipsync/src/benchmark.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index 73f8ca48fa7..df9382768ac 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -478,14 +478,14 @@ zipsync internally handles cleaning as part of its operation. for (const g of groupsDef) { outputLines.push(`## ${g.title}`); outputLines.push(''); - outputLines.push('### Pack Phase'); - outputLines.push(''); - outputLines.push(...buildGroupTable(g, 'pack')); - outputLines.push(''); outputLines.push('### Unpack Phase'); outputLines.push(''); outputLines.push(...buildGroupTable(g, 'unpack')); outputLines.push(''); + outputLines.push('### Pack Phase'); + outputLines.push(''); + outputLines.push(...buildGroupTable(g, 'pack')); + outputLines.push(''); } const resultText = outputLines.join('\n'); console.log(resultText); From ac896b299789777aeff757670828f77e135fec7c Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:40:08 -0700 Subject: [PATCH 14/20] try zstd --- apps/zipsync/src/ZipSyncCommandLineParser.ts | 4 +- .../src/__snapshots__/index.test.ts.snap | 562 +++++++++++++++++- .../src/__snapshots__/start.test.ts.snap | 13 +- apps/zipsync/src/benchmark.test.ts | 42 +- apps/zipsync/src/compress.ts | 46 +- apps/zipsync/src/fs.ts | 8 +- apps/zipsync/src/index.test.ts | 88 +-- apps/zipsync/src/pack.ts | 38 +- apps/zipsync/src/testUtils.ts | 11 + apps/zipsync/src/unpack.ts | 42 +- apps/zipsync/src/zipSyncUtils.ts | 2 +- apps/zipsync/src/zipUtils.ts | 6 +- 12 files changed, 744 insertions(+), 118 deletions(-) diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts index 3b72d163651..cf11145f224 100644 --- a/apps/zipsync/src/ZipSyncCommandLineParser.ts +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -83,8 +83,8 @@ export class ZipSyncCommandLineParser extends CommandLineParser { parameterLongName: '--compression', parameterShortName: '-z', description: - 'Compression strategy when packing. "deflate" attempts DEFLATE for every file (keeps only if smaller); "auto" first skips likely-compressed types before attempting; "store" disables compression.', - alternatives: ['store', 'deflate', 'auto'], + 'Compression strategy when packing. "deflate" and "zlib" attempts compression for every file (keeps only if smaller); "auto" first skips likely-compressed types before attempting; "store" disables compression.', + alternatives: ['store', 'deflate', 'zstd', 'auto'], required: true }); } diff --git a/apps/zipsync/src/__snapshots__/index.test.ts.snap b/apps/zipsync/src/__snapshots__/index.test.ts.snap index cabaf56b437..8617fdf1af4 100644 --- a/apps/zipsync/src/__snapshots__/index.test.ts.snap +++ b/apps/zipsync/src/__snapshots__/index.test.ts.snap @@ -1,6 +1,6 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`zipSync tests basic pack test 1`] = ` +exports[`zipSync tests basic pack test (auto) 1`] = ` Object { "filesPacked": 21, "metadata": Object { @@ -91,7 +91,565 @@ Object { } `; -exports[`zipSync tests basic pack test 2`] = ` +exports[`zipSync tests basic pack test (auto) 2`] = ` +Object { + "filesDeleted": 0, + "filesExtracted": 12, + "filesSkipped": 8, + "foldersDeleted": 0, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, + "otherEntriesDeleted": 0, +} +`; + +exports[`zipSync tests basic pack test (deflate) 1`] = ` +Object { + "filesPacked": 21, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, +} +`; + +exports[`zipSync tests basic pack test (deflate) 2`] = ` +Object { + "filesDeleted": 0, + "filesExtracted": 12, + "filesSkipped": 8, + "foldersDeleted": 0, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, + "otherEntriesDeleted": 0, +} +`; + +exports[`zipSync tests basic pack test (store) 1`] = ` +Object { + "filesPacked": 21, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, +} +`; + +exports[`zipSync tests basic pack test (store) 2`] = ` +Object { + "filesDeleted": 0, + "filesExtracted": 12, + "filesSkipped": 8, + "foldersDeleted": 0, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, + "otherEntriesDeleted": 0, +} +`; + +exports[`zipSync tests basic pack test (zstd) 1`] = ` +Object { + "filesPacked": 21, + "metadata": Object { + "files": Object { + "demo-data-1/subdir/file-0.txt": Object { + "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", + "size": 37, + }, + "demo-data-1/subdir/file-1.txt": Object { + "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", + "size": 37, + }, + "demo-data-1/subdir/file-2.txt": Object { + "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", + "size": 37, + }, + "demo-data-1/subdir/file-3.txt": Object { + "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", + "size": 37, + }, + "demo-data-1/subdir/file-4.txt": Object { + "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", + "size": 37, + }, + "demo-data-2/subdir/file-0.txt": Object { + "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", + "size": 37, + }, + "demo-data-2/subdir/file-1.txt": Object { + "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", + "size": 37, + }, + "demo-data-2/subdir/file-2.txt": Object { + "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", + "size": 37, + }, + "demo-data-2/subdir/file-3.txt": Object { + "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", + "size": 37, + }, + "demo-data-2/subdir/file-4.txt": Object { + "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", + "size": 37, + }, + "demo-data-3/subdir/file-0.txt": Object { + "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", + "size": 37, + }, + "demo-data-3/subdir/file-1.txt": Object { + "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", + "size": 37, + }, + "demo-data-3/subdir/file-2.txt": Object { + "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", + "size": 37, + }, + "demo-data-3/subdir/file-3.txt": Object { + "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", + "size": 37, + }, + "demo-data-3/subdir/file-4.txt": Object { + "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", + "size": 37, + }, + "nested/demo/dir/4/subdir/file-0.txt": Object { + "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-1.txt": Object { + "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-2.txt": Object { + "sha1Hash": "cd170868740762e98657d851724406b494581030", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-3.txt": Object { + "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", + "size": 43, + }, + "nested/demo/dir/4/subdir/file-4.txt": Object { + "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", + "size": 43, + }, + }, + "version": "1.0", + }, +} +`; + +exports[`zipSync tests basic pack test (zstd) 2`] = ` Object { "filesDeleted": 0, "filesExtracted": 12, diff --git a/apps/zipsync/src/__snapshots__/start.test.ts.snap b/apps/zipsync/src/__snapshots__/start.test.ts.snap index 2c8b0be1c63..1459c2e3afd 100644 --- a/apps/zipsync/src/__snapshots__/start.test.ts.snap +++ b/apps/zipsync/src/__snapshots__/start.test.ts.snap @@ -5,7 +5,7 @@ exports[`CLI Tool Tests should display help for "zipsync --help" 1`] = ` zipsync 0.0.0 - https://rushstack.io usage: zipsync [-h] [-d] [-v] -m {pack,unpack} -a ARCHIVE_PATH -t - TARGET_DIRECTORIES -b BASE_DIR -z {store,deflate,auto} + TARGET_DIRECTORIES -b BASE_DIR -z {store,deflate,zstd,auto} Optional arguments: @@ -22,11 +22,12 @@ Optional arguments: Target directories to pack or unpack -b BASE_DIR, --base-dir BASE_DIR Base directory for relative paths within the archive - -z {store,deflate,auto}, --compression {store,deflate,auto} - Compression strategy when packing. \\"deflate\\" attempts - DEFLATE for every file (keeps only if smaller); - \\"auto\\" first skips likely-compressed types before - attempting; \\"store\\" disables compression. + -z {store,deflate,zstd,auto}, --compression {store,deflate,zstd,auto} + Compression strategy when packing. \\"deflate\\" and + \\"zlib\\" attempts compression for every file (keeps + only if smaller); \\"auto\\" first skips + likely-compressed types before attempting; \\"store\\" + disables compression. For detailed help about a specific command, use: zipsync -h " diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index df9382768ac..0143441181a 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -10,8 +10,12 @@ import { createHash, randomUUID } from 'crypto'; import { NoOpTerminalProvider, Terminal } from '@rushstack/terminal'; +import type { ZipSyncOptionCompression } from './zipSyncUtils'; import { pack } from './pack'; import { unpack } from './unpack'; +import { getCompressionOptions } from './testUtils'; + +const compressionOptions = getCompressionOptions() satisfies ZipSyncOptionCompression[]; // create a tempdir and setup dummy files there for benchmarking const NUM_FILES = 1000; // number of files per subdir @@ -187,7 +191,7 @@ function bench(kind: string, commands: IBenchCommands): void { function benchZipSyncScenario( kind: string, - compression: 'store' | 'deflate' | 'auto', + compression: ZipSyncOptionCompression, existingFiles: 'all' | 'none' | 'partial' ): void { if (!tempDir) throw new Error('Temp directory is not set up.'); @@ -283,32 +287,13 @@ describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { cleanBeforeUnpack: true }); }); - it('zipsync-store-all-existing', () => { - benchZipSyncScenario('zipsync-store-all-existing', 'store', 'all'); - }); - it('zipsync-store-none-existing', () => { - benchZipSyncScenario('zipsync-store-none-existing', 'store', 'none'); - }); - it('zipsync-store-partial-existing', () => { - benchZipSyncScenario('zipsync-store-partial-existing', 'store', 'partial'); - }); - it('zipsync-deflate-all-existing', () => { - benchZipSyncScenario('zipsync-deflate-all-existing', 'deflate', 'all'); - }); - it('zipsync-deflate-none-existing', () => { - benchZipSyncScenario('zipsync-deflate-none-existing', 'deflate', 'none'); - }); - it('zipsync-deflate-partial-existing', () => { - benchZipSyncScenario('zipsync-deflate-partial-existing', 'deflate', 'partial'); - }); - it('zipsync-auto-all-existing', () => { - benchZipSyncScenario('zipsync-auto-all-existing', 'auto', 'all'); - }); - it('zipsync-auto-none-existing', () => { - benchZipSyncScenario('zipsync-auto-none-existing', 'auto', 'none'); - }); - it('zipsync-auto-partial-existing', () => { - benchZipSyncScenario('zipsync-auto-partial-existing', 'auto', 'partial'); + const existingFileOptions: ['all', 'none', 'partial'] = ['all', 'none', 'partial']; + compressionOptions.forEach((compression) => { + existingFileOptions.forEach((existingFiles) => { + it(`zipsync-${compression}-${existingFiles}-existing`, () => { + benchZipSyncScenario(`zipsync-${compression}-${existingFiles}-existing`, compression, existingFiles); + }); + }); }); }); @@ -367,6 +352,9 @@ afterAll(() => { members: [ 'tar-gz', 'zip-deflate', + 'zipsync-zstd-all-existing', + 'zipsync-zstd-none-existing', + 'zipsync-zstd-partial-existing', 'zipsync-deflate-all-existing', 'zipsync-deflate-none-existing', 'zipsync-deflate-partial-existing', diff --git a/apps/zipsync/src/compress.ts b/apps/zipsync/src/compress.ts index 89c1223b77b..1c89ef8c58e 100644 --- a/apps/zipsync/src/compress.ts +++ b/apps/zipsync/src/compress.ts @@ -84,10 +84,18 @@ export type IIncrementalZlib = Disposable & { update: (inputBuffer: Uint8Array) => void; }; +// zstd is available in Node 22+ +type IExtendedZlib = typeof zlib & { + createZstdCompress: (options?: zlib.ZlibOptions) => Transform; + createZstdDecompress: (options?: zlib.ZlibOptions) => Transform; +}; + +export type IncrementalZlibMode = 'deflate' | 'inflate' | 'zstd-compress' | 'zstd-decompress'; + export function createIncrementalZlib( outputBuffer: Uint8Array, handleOutputChunk: OutputChunkHandler, - mode: 'deflate' | 'inflate' + mode: IncrementalZlibMode ): IIncrementalZlib { // The zlib constructors all allocate a buffer of size chunkSize using Buffer.allocUnsafe // We want to ensure that that invocation doesn't allocate a buffer. @@ -100,15 +108,33 @@ export function createIncrementalZlib( try { //@ts-expect-error Buffer.allocUnsafe = () => outputBuffer; - if (mode === 'inflate') { - compressor = zlib.createInflateRaw({ - chunkSize: outputBuffer.byteLength - }) as unknown as Transform & IZlibInternals; - } else { - compressor = zlib.createDeflateRaw({ - chunkSize: outputBuffer.byteLength, - level: zlib.constants.Z_BEST_COMPRESSION - }) as unknown as Transform & IZlibInternals; + switch (mode) { + case 'inflate': + compressor = zlib.createInflateRaw({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + break; + case 'deflate': + compressor = zlib.createDeflateRaw({ + chunkSize: outputBuffer.byteLength, + level: zlib.constants.Z_BEST_COMPRESSION + }) as unknown as Transform & IZlibInternals; + break; + case 'zstd-compress': + // available in Node 22.15+ + compressor = (zlib as IExtendedZlib).createZstdCompress({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + break; + case 'zstd-decompress': + // available in Node 22.15+ + compressor = (zlib as IExtendedZlib).createZstdDecompress({ + chunkSize: outputBuffer.byteLength + }) as unknown as Transform & IZlibInternals; + break; + default: + // Unsupported mode (types currently restrict to 'deflate' | 'inflate') + break; } } finally { Buffer.allocUnsafe = savedAllocUnsafe; diff --git a/apps/zipsync/src/fs.ts b/apps/zipsync/src/fs.ts index 96da7164f15..ced3056a6c6 100644 --- a/apps/zipsync/src/fs.ts +++ b/apps/zipsync/src/fs.ts @@ -27,9 +27,10 @@ export function getDisposableFileHandle(path: string, openMode: OpenMode): IDisp return result; } -export function rmdirSync(dirPath: string): void { +export function rmdirSync(dirPath: string): boolean { try { fs.rmdirSync(dirPath); + return true; } catch (e) { if ((e as NodeJS.ErrnoException).code === 'ENOENT' || (e as NodeJS.ErrnoException).code === 'ENOTDIR') { // Not found, ignore @@ -37,11 +38,13 @@ export function rmdirSync(dirPath: string): void { throw e; } } + return false; } -export function unlinkSync(filePath: string): void { +export function unlinkSync(filePath: string): boolean { try { fs.unlinkSync(filePath); + return true; } catch (e) { if (e && (e as NodeJS.ErrnoException).code === 'ENOENT') { // Not found, ignore @@ -49,4 +52,5 @@ export function unlinkSync(filePath: string): void { throw e; } } + return false; } diff --git a/apps/zipsync/src/index.test.ts b/apps/zipsync/src/index.test.ts index e07b680773a..5dc2280a08c 100644 --- a/apps/zipsync/src/index.test.ts +++ b/apps/zipsync/src/index.test.ts @@ -7,49 +7,55 @@ import * as fs from 'node:fs'; import { NoOpTerminalProvider } from '@rushstack/terminal/lib/NoOpTerminalProvider'; import { Terminal } from '@rushstack/terminal/lib/Terminal'; -import { pack, unpack } from './index'; -import { getDemoDataDirectoryDisposable } from './testUtils'; +import { pack } from './pack'; +import { unpack } from './unpack'; +import { getCompressionOptions, getDemoDataDirectoryDisposable } from './testUtils'; +import type { ZipSyncOptionCompression } from './zipSyncUtils'; -describe('zipSync tests', () => { - it('basic pack test', () => { - using demoDataDisposable = getDemoDataDirectoryDisposable(5); - const { targetDirectories, baseDir } = demoDataDisposable; - - const terminal = new Terminal(new NoOpTerminalProvider()); - - const archivePath: string = path.join(baseDir, 'archive.zip'); - const packResult = pack({ - terminal: terminal, - compression: 'deflate', - baseDir, - targetDirectories, - archivePath - }); - - expect(packResult).toMatchSnapshot(); - - using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); - const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; +const compressionOptions = getCompressionOptions() satisfies ZipSyncOptionCompression[]; - const unpackResult = unpack({ - terminal: terminal, - archivePath, - baseDir: unpackBaseDir, - targetDirectories - }); - - expect(unpackResult).toMatchSnapshot(); - - // Verify files were extracted - for (const targetDirectory of targetDirectories) { - const sourceDir: string = path.join(baseDir, targetDirectory); - for (let i: number = 0; i < 5; ++i) { - const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); - const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); - expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( - fs.readFileSync(sourceFile, { encoding: 'utf-8' }) - ); +describe('zipSync tests', () => { + compressionOptions.forEach((compression: ZipSyncOptionCompression) => { + it(`basic pack test (${compression})`, () => { + using demoDataDisposable = getDemoDataDirectoryDisposable(5); + const { targetDirectories, baseDir } = demoDataDisposable; + + const terminal = new Terminal(new NoOpTerminalProvider()); + + const archivePath: string = path.join(baseDir, 'archive.zip'); + const packResult = pack({ + terminal: terminal, + compression, + baseDir, + targetDirectories, + archivePath + }); + + expect(packResult).toMatchSnapshot(); + + using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); + const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; + + const unpackResult = unpack({ + terminal: terminal, + archivePath, + baseDir: unpackBaseDir, + targetDirectories + }); + + expect(unpackResult).toMatchSnapshot(); + + // Verify files were extracted + for (const targetDirectory of targetDirectories) { + const sourceDir: string = path.join(baseDir, targetDirectory); + for (let i: number = 0; i < 5; ++i) { + const sourceFile: string = path.join(sourceDir, 'subdir', `file-${i}.txt`); + const destFile: string = path.join(unpackBaseDir, targetDirectory, 'subdir', `file-${i}.txt`); + expect(fs.readFileSync(destFile, { encoding: 'utf-8' })).toEqual( + fs.readFileSync(sourceFile, { encoding: 'utf-8' }) + ); + } } - } + }); }); }); diff --git a/apps/zipsync/src/pack.ts b/apps/zipsync/src/pack.ts index 8a8a25e57d7..4268301462a 100644 --- a/apps/zipsync/src/pack.ts +++ b/apps/zipsync/src/pack.ts @@ -10,13 +10,14 @@ import type { ITerminal } from '@rushstack/terminal/lib/ITerminal'; import { crc32Builder } from './crc32'; import { DISPOSE_SYMBOL, getDisposableFileHandle, type IDisposableFileHandle } from './fs'; -import { type IIncrementalZlib, createIncrementalZlib } from './compress'; +import { type IIncrementalZlib, type IncrementalZlibMode, createIncrementalZlib } from './compress'; import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; import { writeLocalFileHeader, writeDataDescriptor, writeCentralDirectoryHeader, writeEndOfCentralDirectory, + ZSTD_COMPRESSION, DEFLATE_COMPRESSION, STORE_COMPRESSION, type ZipMetaCompressionMethod, @@ -35,6 +36,19 @@ import { const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; +const zlibPackModes: Record = { + [ZSTD_COMPRESSION]: 'zstd-compress', + [DEFLATE_COMPRESSION]: 'deflate', + [STORE_COMPRESSION]: undefined +} as const; + +const zipSyncCompressionOptions: Record = { + store: STORE_COMPRESSION, + deflate: DEFLATE_COMPRESSION, + zstd: ZSTD_COMPRESSION, + auto: DEFLATE_COMPRESSION // 'auto' is handled specially in the code +} as const; + /** * @public * Options for zipsync @@ -182,7 +196,7 @@ export function pack({ }; let shouldCompress: boolean = false; - if (compression === 'deflate') { + if (compression === 'deflate' || compression === 'zstd') { shouldCompress = true; } else if (compression === 'auto') { // Heuristic: skip compression for small files or likely-already-compressed files @@ -196,8 +210,8 @@ export function pack({ } const compressionMethod: ZipMetaCompressionMethod = shouldCompress - ? DEFLATE_COMPRESSION - : STORE_COMPRESSION; + ? zipSyncCompressionOptions[compression] + : zipSyncCompressionOptions.store; const entry: IFileEntry = { filename: relativePath, @@ -217,14 +231,14 @@ export function pack({ let uncompressedSize: number = 0; let compressedSize: number = 0; - using deflateIncremental: IIncrementalZlib | undefined = shouldCompress + using incrementalZlib: IIncrementalZlib | undefined = shouldCompress ? createIncrementalZlib( outputBuffer, (chunk, lengthBytes) => { writeChunkToZip(chunk, lengthBytes); compressedSize += lengthBytes; }, - 'deflate' + zlibPackModes[compressionMethod]! ) : undefined; @@ -234,8 +248,8 @@ export function pack({ const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); sha1HashBuilder.update(slice); crc32 = crc32Builder(slice, crc32); - if (deflateIncremental) { - deflateIncremental.update(slice); + if (incrementalZlib) { + incrementalZlib.update(slice); } else { writeChunkToZip(slice, bytesInInputBuffer); } @@ -243,7 +257,7 @@ export function pack({ }); // finalize hashes, compression - deflateIncremental?.update(Buffer.alloc(0)); + incrementalZlib?.update(Buffer.alloc(0)); crc32 = crc32 >>> 0; const sha1Hash: string = sha1HashBuilder.digest('hex'); @@ -289,13 +303,13 @@ export function pack({ `Metadata size=${metadataBuffer.length} bytes, fileCount=${Object.keys(metadata.files).length}` ); - let metadataCompressionMethod: ZipMetaCompressionMethod = STORE_COMPRESSION; + let metadataCompressionMethod: ZipMetaCompressionMethod = zipSyncCompressionOptions.store; let metadataData: Buffer = metadataBuffer; let metadataCompressedSize: number = metadataBuffer.length; - if ((compression === 'deflate' || compression === 'auto') && metadataBuffer.length > 64) { + if (compression !== 'store' && metadataBuffer.length > 64) { const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); if (compressed.length < metadataBuffer.length) { - metadataCompressionMethod = DEFLATE_COMPRESSION; + metadataCompressionMethod = zipSyncCompressionOptions.deflate; metadataData = compressed; metadataCompressedSize = compressed.length; terminal.writeDebugLine( diff --git a/apps/zipsync/src/testUtils.ts b/apps/zipsync/src/testUtils.ts index 489c14445a5..36138fbdc37 100644 --- a/apps/zipsync/src/testUtils.ts +++ b/apps/zipsync/src/testUtils.ts @@ -5,6 +5,7 @@ import { tmpdir } from 'node:os'; import * as path from 'node:path'; import * as fs from 'node:fs'; import * as crypto from 'node:crypto'; +import type { ZipSyncOptionCompression } from './zipSyncUtils'; export function getTempDir(): string { const randomId: string = crypto.randomUUID(); @@ -42,3 +43,13 @@ export function getDemoDataDirectoryDisposable(numFiles: number): { } }; } + +export function getCompressionOptions(): ZipSyncOptionCompression[] { + const options: ZipSyncOptionCompression[] = ['store', 'deflate', 'auto']; + // zstd is available in Node 22.15+ + const [major, minor] = process.versions.node.split('.').map((x) => parseInt(x, 10)); + if (major > 22 || (major === 22 && minor >= 15)) { + options.push('zstd'); + } + return options; +} diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index f2a7c6b1dbe..48070bb962c 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -9,20 +9,28 @@ import { type IReadonlyPathTrieNode, LookupByPath } from '@rushstack/lookup-by-p import type { ITerminal } from '@rushstack/terminal'; import { getDisposableFileHandle, rmdirSync, unlinkSync, type IDisposableFileHandle } from './fs'; -import { type IIncrementalZlib, createIncrementalZlib } from './compress'; +import { type IIncrementalZlib, type IncrementalZlibMode, createIncrementalZlib } from './compress'; import { markStart, markEnd, getDuration, emitSummary, formatDuration } from './perf'; import { findEndOfCentralDirectory, parseCentralDirectoryHeader, getFileFromZip, + ZSTD_COMPRESSION, DEFLATE_COMPRESSION, STORE_COMPRESSION, type IEndOfCentralDirectory, - type ICentralDirectoryHeaderParseResult + type ICentralDirectoryHeaderParseResult, + type ZipMetaCompressionMethod } from './zipUtils'; import { computeFileHash } from './hash'; import { METADATA_FILENAME, METADATA_VERSION, type IDirQueueItem, type IMetadata } from './zipSyncUtils'; +const zlibUnpackModes: Record = { + [ZSTD_COMPRESSION]: 'zstd-decompress', + [DEFLATE_COMPRESSION]: 'inflate', + [STORE_COMPRESSION]: undefined +} as const; + /** * @public * Options for zipsync @@ -79,8 +87,11 @@ function extractFileFromZip( ); writeOffset += written; } - } else if (entry.header.compressionMethod === DEFLATE_COMPRESSION) { - using inflateIncremental: IIncrementalZlib = createIncrementalZlib( + } else if ( + entry.header.compressionMethod === DEFLATE_COMPRESSION || + entry.header.compressionMethod === ZSTD_COMPRESSION + ) { + using incrementalZlib: IIncrementalZlib = createIncrementalZlib( outputBuffer, (chunk, lengthBytes) => { let writeOffset: number = 0; @@ -90,10 +101,10 @@ function extractFileFromZip( writeOffset += written; } }, - 'inflate' + zlibUnpackModes[entry.header.compressionMethod]! ); - inflateIncremental.update(fileZipBuffer); - inflateIncremental.update(Buffer.alloc(0)); + incrementalZlib.update(fileZipBuffer); + incrementalZlib.update(Buffer.alloc(0)); } else { throw new Error( `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` @@ -265,8 +276,9 @@ export function unpack({ terminal.writeVerboseLine(`${padding}${item.name}`); if (!childNode?.value) { terminal.writeDebugLine(`Deleting file: ${relativePath}`); - unlinkSync(relativePath); - deletedFilesCount++; + if (unlinkSync(relativePath)) { + deletedFilesCount++; + } } } else if (item.isDirectory()) { terminal.writeVerboseLine(`${padding}${item.name}/`); @@ -276,17 +288,19 @@ export function unpack({ } } else { terminal.writeVerboseLine(`${padding}${item.name} (not file or directory, deleting)`); - unlinkSync(relativePath); - deletedOtherCount++; + if (unlinkSync(relativePath)) { + deletedOtherCount++; + } } } } for (const dir of dirsToCleanup) { // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. - rmdirSync(dir); - terminal.writeDebugLine(`Deleted empty directory: ${dir}`); - deletedFoldersCount++; + if (rmdirSync(dir)) { + terminal.writeDebugLine(`Deleted empty directory: ${dir}`); + deletedFoldersCount++; + } } terminal.writeDebugLine(`Existing entries tracked: ${scanCount}`); diff --git a/apps/zipsync/src/zipSyncUtils.ts b/apps/zipsync/src/zipSyncUtils.ts index 99b2e36219a..e4a6cbc77ce 100644 --- a/apps/zipsync/src/zipSyncUtils.ts +++ b/apps/zipsync/src/zipSyncUtils.ts @@ -24,4 +24,4 @@ export interface IMetadata { export type IZipSyncMode = 'pack' | 'unpack'; -export type ZipSyncOptionCompression = 'store' | 'deflate' | 'auto'; +export type ZipSyncOptionCompression = 'store' | 'deflate' | 'zstd' | 'auto'; diff --git a/apps/zipsync/src/zipUtils.ts b/apps/zipsync/src/zipUtils.ts index 746fb9dd71e..eb00477d8a5 100644 --- a/apps/zipsync/src/zipUtils.ts +++ b/apps/zipsync/src/zipUtils.ts @@ -10,7 +10,11 @@ const DATA_DESCRIPTOR_SIGNATURE: number = 0x08074b50; export const STORE_COMPRESSION: 0 = 0; export const DEFLATE_COMPRESSION: 8 = 8; -export type ZipMetaCompressionMethod = typeof STORE_COMPRESSION | typeof DEFLATE_COMPRESSION; +export const ZSTD_COMPRESSION: 93 = 93; +export type ZipMetaCompressionMethod = + | typeof STORE_COMPRESSION + | typeof DEFLATE_COMPRESSION + | typeof ZSTD_COMPRESSION; export interface IFileEntry { filename: string; From bd3e70199644ca9fef451c8b66153b96709845c6 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:27:56 -0700 Subject: [PATCH 15/20] fixup --- apps/zipsync/src/unpack.ts | 148 +++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 79 deletions(-) diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index 48070bb962c..d12bb13ab55 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -63,83 +63,6 @@ export interface IZipSyncUnpackResult { otherEntriesDeleted: number; } -const bufferSize: number = 1 << 25; // 32 MiB -const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); -function extractFileFromZip( - terminal: ITerminal, - targetPath: string, - zipBuffer: Buffer, - entry: ICentralDirectoryHeaderParseResult -): void { - terminal.writeDebugLine(`Extracting file: ${entry.filename}`); - const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); - let fileData: Buffer; - using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); - if (entry.header.compressionMethod === STORE_COMPRESSION) { - fileData = fileZipBuffer; - let writeOffset: number = 0; - while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { - const written: number = fs.writeSync( - fileHandle.fd, - fileData, - writeOffset, - fileData.length - writeOffset - ); - writeOffset += written; - } - } else if ( - entry.header.compressionMethod === DEFLATE_COMPRESSION || - entry.header.compressionMethod === ZSTD_COMPRESSION - ) { - using incrementalZlib: IIncrementalZlib = createIncrementalZlib( - outputBuffer, - (chunk, lengthBytes) => { - let writeOffset: number = 0; - while (lengthBytes > 0 && writeOffset < chunk.byteLength) { - const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); - lengthBytes -= written; - writeOffset += written; - } - }, - zlibUnpackModes[entry.header.compressionMethod]! - ); - incrementalZlib.update(fileZipBuffer); - incrementalZlib.update(Buffer.alloc(0)); - } else { - throw new Error( - `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` - ); - } -} - -function shouldExtract( - terminal: ITerminal, - targetPath: string, - entry: ICentralDirectoryHeaderParseResult, - metadata: IMetadata | undefined -): boolean { - if (metadata) { - const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; - - if (metadataFile) { - try { - using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); - const existingHash: string | false = computeFileHash(existingFile.fd); - if (existingHash === metadataFile.sha1Hash) { - return false; - } - } catch (e) { - if ((e as NodeJS.ErrnoException).code === 'ENOENT') { - terminal.writeDebugLine(`File does not exist, will extract: ${entry.filename}`); - } else { - throw e; - } - } - } - } - return true; -} - export function unpack({ archivePath, targetDirectories: rawTargetDirectories, @@ -308,6 +231,73 @@ export function unpack({ markStart('unpack.extract.loop'); + const bufferSize: number = 1 << 25; // 32 MiB + const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + function extractFileFromZip(targetPath: string, entry: ICentralDirectoryHeaderParseResult): void { + terminal.writeDebugLine(`Extracting file: ${entry.filename}`); + const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); + let fileData: Buffer; + using fileHandle: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'w'); + if (entry.header.compressionMethod === STORE_COMPRESSION) { + fileData = fileZipBuffer; + let writeOffset: number = 0; + while (writeOffset < fileData.length && !isNaN(fileHandle.fd)) { + const written: number = fs.writeSync( + fileHandle.fd, + fileData, + writeOffset, + fileData.length - writeOffset + ); + writeOffset += written; + } + } else if ( + entry.header.compressionMethod === DEFLATE_COMPRESSION || + entry.header.compressionMethod === ZSTD_COMPRESSION + ) { + using incrementalZlib: IIncrementalZlib = createIncrementalZlib( + outputBuffer, + (chunk, lengthBytes) => { + let writeOffset: number = 0; + while (lengthBytes > 0 && writeOffset < chunk.byteLength) { + const written: number = fs.writeSync(fileHandle.fd, chunk, writeOffset, lengthBytes); + lengthBytes -= written; + writeOffset += written; + } + }, + zlibUnpackModes[entry.header.compressionMethod]! + ); + incrementalZlib.update(fileZipBuffer); + incrementalZlib.update(Buffer.alloc(0)); + } else { + throw new Error( + `Unsupported compression method: ${entry.header.compressionMethod} for ${entry.filename}` + ); + } + } + + function shouldExtract(targetPath: string, entry: ICentralDirectoryHeaderParseResult): boolean { + if (metadata) { + const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; + + if (metadataFile) { + try { + using existingFile: IDisposableFileHandle = getDisposableFileHandle(targetPath, 'r'); + const existingHash: string | false = computeFileHash(existingFile.fd); + if (existingHash === metadataFile.sha1Hash) { + return false; + } + } catch (e) { + if ((e as NodeJS.ErrnoException).code === 'ENOENT') { + terminal.writeDebugLine(`File does not exist, will extract: ${entry.filename}`); + } else { + throw e; + } + } + } + } + return true; + } + const dirsCreated: Set = new Set(); for (const entry of entries) { @@ -322,8 +312,8 @@ export function unpack({ dirsCreated.add(targetDir); } - if (shouldExtract(terminal, targetPath, entry, metadata)) { - extractFileFromZip(terminal, targetPath, zipBuffer, entry); + if (shouldExtract(targetPath, entry)) { + extractFileFromZip(targetPath, entry); extractedCount++; } else { skippedCount++; From c8c1bb7cfb8ec35ad0ea0d5273763e8af9fdf286 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:15:29 -0700 Subject: [PATCH 16/20] fix tests --- .../src/__snapshots__/index.test.ts.snap | 745 ------------------ .../__snapshots__/workerAsync.test.ts.snap | 187 ----- apps/zipsync/src/benchmark.test.ts | 10 +- apps/zipsync/src/index.test.ts | 30 +- apps/zipsync/src/testUtils.ts | 22 +- apps/zipsync/src/workerAsync.test.ts | 12 +- 6 files changed, 49 insertions(+), 957 deletions(-) delete mode 100644 apps/zipsync/src/__snapshots__/index.test.ts.snap delete mode 100644 apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap diff --git a/apps/zipsync/src/__snapshots__/index.test.ts.snap b/apps/zipsync/src/__snapshots__/index.test.ts.snap deleted file mode 100644 index 8617fdf1af4..00000000000 --- a/apps/zipsync/src/__snapshots__/index.test.ts.snap +++ /dev/null @@ -1,745 +0,0 @@ -// Jest Snapshot v1, https://goo.gl/fbAQLP - -exports[`zipSync tests basic pack test (auto) 1`] = ` -Object { - "filesPacked": 21, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, -} -`; - -exports[`zipSync tests basic pack test (auto) 2`] = ` -Object { - "filesDeleted": 0, - "filesExtracted": 12, - "filesSkipped": 8, - "foldersDeleted": 0, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, - "otherEntriesDeleted": 0, -} -`; - -exports[`zipSync tests basic pack test (deflate) 1`] = ` -Object { - "filesPacked": 21, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, -} -`; - -exports[`zipSync tests basic pack test (deflate) 2`] = ` -Object { - "filesDeleted": 0, - "filesExtracted": 12, - "filesSkipped": 8, - "foldersDeleted": 0, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, - "otherEntriesDeleted": 0, -} -`; - -exports[`zipSync tests basic pack test (store) 1`] = ` -Object { - "filesPacked": 21, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, -} -`; - -exports[`zipSync tests basic pack test (store) 2`] = ` -Object { - "filesDeleted": 0, - "filesExtracted": 12, - "filesSkipped": 8, - "foldersDeleted": 0, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, - "otherEntriesDeleted": 0, -} -`; - -exports[`zipSync tests basic pack test (zstd) 1`] = ` -Object { - "filesPacked": 21, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, -} -`; - -exports[`zipSync tests basic pack test (zstd) 2`] = ` -Object { - "filesDeleted": 0, - "filesExtracted": 12, - "filesSkipped": 8, - "foldersDeleted": 0, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, - "otherEntriesDeleted": 0, -} -`; diff --git a/apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap b/apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap deleted file mode 100644 index 9eec6c0bd40..00000000000 --- a/apps/zipsync/src/__snapshots__/workerAsync.test.ts.snap +++ /dev/null @@ -1,187 +0,0 @@ -// Jest Snapshot v1, https://goo.gl/fbAQLP - -exports[`zipSyncWorkerAsync tests basic pack test 1`] = ` -Object { - "filesPacked": 21, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, -} -`; - -exports[`zipSyncWorkerAsync tests basic pack test 2`] = ` -Object { - "filesDeleted": 0, - "filesExtracted": 12, - "filesSkipped": 8, - "foldersDeleted": 0, - "metadata": Object { - "files": Object { - "demo-data-1/subdir/file-0.txt": Object { - "sha1Hash": "a7f3d7debc1ad5f045f672be8a8e67d32c909fee", - "size": 37, - }, - "demo-data-1/subdir/file-1.txt": Object { - "sha1Hash": "748119e4bafabea3b45d3f790c9a9b676ebab405", - "size": 37, - }, - "demo-data-1/subdir/file-2.txt": Object { - "sha1Hash": "ace390b17191467c0a0480c264b6a2af0dff0d07", - "size": 37, - }, - "demo-data-1/subdir/file-3.txt": Object { - "sha1Hash": "c7e2d154422af74d8b9242565e4a5988d5361b8c", - "size": 37, - }, - "demo-data-1/subdir/file-4.txt": Object { - "sha1Hash": "f81f87be14a0c7d5e75ae0dd730526dc333fe1bd", - "size": 37, - }, - "demo-data-2/subdir/file-0.txt": Object { - "sha1Hash": "423a13f056c5a2a3c5aaba7d8097af61f49819d5", - "size": 37, - }, - "demo-data-2/subdir/file-1.txt": Object { - "sha1Hash": "0141eb229e83217fbf001e339ca58f0f0c820df1", - "size": 37, - }, - "demo-data-2/subdir/file-2.txt": Object { - "sha1Hash": "31fb9ce64e21441701e44a7b9d2cc50446d55462", - "size": 37, - }, - "demo-data-2/subdir/file-3.txt": Object { - "sha1Hash": "7e95ef682efc09c20871ae9e7abf8383b28b1fb1", - "size": 37, - }, - "demo-data-2/subdir/file-4.txt": Object { - "sha1Hash": "744ee77039600639a4c2d50b0248ad39e2bc10d5", - "size": 37, - }, - "demo-data-3/subdir/file-0.txt": Object { - "sha1Hash": "504cd07cb5b0d6dfc186247b34b2b960668a9977", - "size": 37, - }, - "demo-data-3/subdir/file-1.txt": Object { - "sha1Hash": "b7b12f3f33565792d8563c23edaccd09b0f324ab", - "size": 37, - }, - "demo-data-3/subdir/file-2.txt": Object { - "sha1Hash": "fab730bce23d9a87d3eb0b7bb30b992a47263505", - "size": 37, - }, - "demo-data-3/subdir/file-3.txt": Object { - "sha1Hash": "36a9851256e27975b707ec335c9099ee4f70aac9", - "size": 37, - }, - "demo-data-3/subdir/file-4.txt": Object { - "sha1Hash": "078d98ee46c668fefe397fa81b682ed24b70418b", - "size": 37, - }, - "nested/demo/dir/4/subdir/file-0.txt": Object { - "sha1Hash": "70bcbc5ec0a9e2394edf1760dc2b57bff4f2486f", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-1.txt": Object { - "sha1Hash": "0977052a86aa8ccd3ee550218d529e34550a62d7", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-2.txt": Object { - "sha1Hash": "cd170868740762e98657d851724406b494581030", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-3.txt": Object { - "sha1Hash": "6de572afa4315a597a0a43e5636a228573993da5", - "size": 43, - }, - "nested/demo/dir/4/subdir/file-4.txt": Object { - "sha1Hash": "c391c02c9bc1139bee8452c911bce484497169ba", - "size": 43, - }, - }, - "version": "1.0", - }, - "otherEntriesDeleted": 0, -} -`; diff --git a/apps/zipsync/src/benchmark.test.ts b/apps/zipsync/src/benchmark.test.ts index 0143441181a..fdbdaa67d6b 100644 --- a/apps/zipsync/src/benchmark.test.ts +++ b/apps/zipsync/src/benchmark.test.ts @@ -13,9 +13,8 @@ import { NoOpTerminalProvider, Terminal } from '@rushstack/terminal'; import type { ZipSyncOptionCompression } from './zipSyncUtils'; import { pack } from './pack'; import { unpack } from './unpack'; -import { getCompressionOptions } from './testUtils'; -const compressionOptions = getCompressionOptions() satisfies ZipSyncOptionCompression[]; +const compressionOptions = ['store', 'deflate', 'zstd', 'auto'] satisfies ZipSyncOptionCompression[]; // create a tempdir and setup dummy files there for benchmarking const NUM_FILES = 1000; // number of files per subdir @@ -289,6 +288,13 @@ describe(`archive benchmarks (iterations=${ITERATIONS})`, () => { }); const existingFileOptions: ['all', 'none', 'partial'] = ['all', 'none', 'partial']; compressionOptions.forEach((compression) => { + if (compression === 'zstd') { + const [major, minor] = process.versions.node.split('.').map((x) => parseInt(x, 10)); + if (major < 22 || (major === 22 && minor < 15)) { + console.warn(`Skipping zstd test on Node ${process.versions.node}`); + return; + } + } existingFileOptions.forEach((existingFiles) => { it(`zipsync-${compression}-${existingFiles}-existing`, () => { benchZipSyncScenario(`zipsync-${compression}-${existingFiles}-existing`, compression, existingFiles); diff --git a/apps/zipsync/src/index.test.ts b/apps/zipsync/src/index.test.ts index 5dc2280a08c..5fe98676aca 100644 --- a/apps/zipsync/src/index.test.ts +++ b/apps/zipsync/src/index.test.ts @@ -9,16 +9,24 @@ import { Terminal } from '@rushstack/terminal/lib/Terminal'; import { pack } from './pack'; import { unpack } from './unpack'; -import { getCompressionOptions, getDemoDataDirectoryDisposable } from './testUtils'; +import { getDemoDataDirectoryDisposable } from './testUtils'; import type { ZipSyncOptionCompression } from './zipSyncUtils'; -const compressionOptions = getCompressionOptions() satisfies ZipSyncOptionCompression[]; - describe('zipSync tests', () => { - compressionOptions.forEach((compression: ZipSyncOptionCompression) => { - it(`basic pack test (${compression})`, () => { + it(`basic pack test`, () => { + const compressionOptions = ['store', 'deflate', 'zstd', 'auto'] satisfies ZipSyncOptionCompression[]; + compressionOptions.forEach((compression) => { + if (compression === 'zstd') { + const [major, minor] = process.versions.node.split('.').map((x) => parseInt(x, 10)); + if (major < 22 || (major === 22 && minor < 15)) { + // eslint-disable-next-line no-console + console.warn(`Skipping zstd test on Node ${process.versions.node}`); + return; + } + } + using demoDataDisposable = getDemoDataDirectoryDisposable(5); - const { targetDirectories, baseDir } = demoDataDisposable; + const { targetDirectories, baseDir, metadata } = demoDataDisposable; const terminal = new Terminal(new NoOpTerminalProvider()); @@ -31,7 +39,7 @@ describe('zipSync tests', () => { archivePath }); - expect(packResult).toMatchSnapshot(); + expect(packResult).toMatchObject({ filesPacked: 21, metadata }); using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; @@ -43,7 +51,13 @@ describe('zipSync tests', () => { targetDirectories }); - expect(unpackResult).toMatchSnapshot(); + expect(unpackResult).toMatchObject({ + filesDeleted: 0, + filesExtracted: 12, + filesSkipped: 8, + foldersDeleted: 0, + metadata + }); // Verify files were extracted for (const targetDirectory of targetDirectories) { diff --git a/apps/zipsync/src/testUtils.ts b/apps/zipsync/src/testUtils.ts index 36138fbdc37..ed1555c3cd8 100644 --- a/apps/zipsync/src/testUtils.ts +++ b/apps/zipsync/src/testUtils.ts @@ -5,7 +5,7 @@ import { tmpdir } from 'node:os'; import * as path from 'node:path'; import * as fs from 'node:fs'; import * as crypto from 'node:crypto'; -import type { ZipSyncOptionCompression } from './zipSyncUtils'; +import type { IMetadata } from './zipSyncUtils'; export function getTempDir(): string { const randomId: string = crypto.randomUUID(); @@ -17,10 +17,13 @@ export function getTempDir(): string { export function getDemoDataDirectoryDisposable(numFiles: number): { targetDirectories: string[]; baseDir: string; + metadata: IMetadata; [Symbol.dispose](): void; } { const baseDir: string = getTempDir(); + const metadata: IMetadata = { files: {}, version: '1.0' }; + const targetDirectories: string[] = ['demo-data-1', 'demo-data-2', 'demo-data-3', 'nested/demo/dir/4'].map( (folderName) => { const dataDir: string = path.join(baseDir, folderName); @@ -29,7 +32,11 @@ export function getDemoDataDirectoryDisposable(numFiles: number): { fs.mkdirSync(subdir); for (let i: number = 0; i < numFiles; ++i) { const filePath: string = path.join(subdir, `file-${i}.txt`); - fs.writeFileSync(filePath, `This is file ${i} in ${folderName}/subdir\n`, { encoding: 'utf-8' }); + const content: string = `This is file ${i} in ${folderName}/subdir\n`; + const sha1Hash: string = crypto.createHash('sha1').update(content).digest('hex'); + fs.writeFileSync(filePath, content, { encoding: 'utf-8' }); + const relativeFilePath: string = path.relative(baseDir, filePath).replace(/\\/g, '/'); + metadata.files[relativeFilePath] = { size: content.length, sha1Hash }; } return folderName; } @@ -38,18 +45,9 @@ export function getDemoDataDirectoryDisposable(numFiles: number): { return { targetDirectories, baseDir, + metadata, [Symbol.dispose]() { fs.rmSync(baseDir, { recursive: true, force: true }); } }; } - -export function getCompressionOptions(): ZipSyncOptionCompression[] { - const options: ZipSyncOptionCompression[] = ['store', 'deflate', 'auto']; - // zstd is available in Node 22.15+ - const [major, minor] = process.versions.node.split('.').map((x) => parseInt(x, 10)); - if (major > 22 || (major === 22 && minor >= 15)) { - options.push('zstd'); - } - return options; -} diff --git a/apps/zipsync/src/workerAsync.test.ts b/apps/zipsync/src/workerAsync.test.ts index 560ff1c5701..1392f51223f 100644 --- a/apps/zipsync/src/workerAsync.test.ts +++ b/apps/zipsync/src/workerAsync.test.ts @@ -11,7 +11,7 @@ import { getDemoDataDirectoryDisposable } from './testUtils'; describe('zipSyncWorkerAsync tests', () => { it('basic pack test', async () => { using demoDataDisposable = getDemoDataDirectoryDisposable(5); - const { targetDirectories, baseDir } = demoDataDisposable; + const { targetDirectories, baseDir, metadata } = demoDataDisposable; const archivePath: string = path.join(baseDir, 'archive.zip'); const { zipSyncReturn: packResult } = await packWorkerAsync({ @@ -21,7 +21,7 @@ describe('zipSyncWorkerAsync tests', () => { archivePath }); - expect(packResult).toMatchSnapshot(); + expect(packResult).toMatchObject({ filesPacked: 21, metadata }); using unpackDemoDataDisposable = getDemoDataDirectoryDisposable(2); const { baseDir: unpackBaseDir } = unpackDemoDataDisposable; @@ -32,7 +32,13 @@ describe('zipSyncWorkerAsync tests', () => { targetDirectories }); - expect(unpackResult).toMatchSnapshot(); + expect(unpackResult).toMatchObject({ + filesDeleted: 0, + filesExtracted: 12, + filesSkipped: 8, + foldersDeleted: 0, + metadata + }); // Verify files were extracted for (const targetDirectory of targetDirectories) { From 9433c9a826d2c9fbeb0e2ec3c2e16fcfbadb2572 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Fri, 26 Sep 2025 16:10:30 -0700 Subject: [PATCH 17/20] revert build cache integration --- .../bmiddha-zipsync-3_2025-09-16-00-42.json | 10 - .../build-tests-subspace/repo-state.json | 2 +- .../config/subspaces/default/pnpm-lock.yaml | 3 - common/reviews/api/rush-lib.api.md | 2 +- libraries/rush-lib/package.json | 1 - libraries/rush-lib/src/logic/RushConstants.ts | 2 +- .../logic/buildCache/OperationBuildCache.ts | 193 ++++++++---------- 7 files changed, 88 insertions(+), 125 deletions(-) delete mode 100644 common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json diff --git a/common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json b/common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json deleted file mode 100644 index 1dea9e02004..00000000000 --- a/common/changes/@microsoft/rush/bmiddha-zipsync-3_2025-09-16-00-42.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "changes": [ - { - "packageName": "@microsoft/rush", - "comment": "Add support for zipsync for build cache packing and unpacking.", - "type": "none" - } - ], - "packageName": "@microsoft/rush" -} \ No newline at end of file diff --git a/common/config/subspaces/build-tests-subspace/repo-state.json b/common/config/subspaces/build-tests-subspace/repo-state.json index 54a27474b51..5264367b934 100644 --- a/common/config/subspaces/build-tests-subspace/repo-state.json +++ b/common/config/subspaces/build-tests-subspace/repo-state.json @@ -2,5 +2,5 @@ { "pnpmShrinkwrapHash": "2ac01ba33e09661dc0e7d7faa36d215bb3d3b91e", "preferredVersionsHash": "550b4cee0bef4e97db6c6aad726df5149d20e7d9", - "packageJsonInjectedDependenciesHash": "73ac91f09601cd919fddc7f1d193a03eab37102a" + "packageJsonInjectedDependenciesHash": "79ac135cb61506457e8d49c7ec1342d419bde3e2" } diff --git a/common/config/subspaces/default/pnpm-lock.yaml b/common/config/subspaces/default/pnpm-lock.yaml index 936041daa66..834d96fe91a 100644 --- a/common/config/subspaces/default/pnpm-lock.yaml +++ b/common/config/subspaces/default/pnpm-lock.yaml @@ -3639,9 +3639,6 @@ importers: '@rushstack/ts-command-line': specifier: workspace:* version: link:../ts-command-line - '@rushstack/zipsync': - specifier: workspace:* - version: link:../../apps/zipsync '@yarnpkg/lockfile': specifier: ~1.0.2 version: 1.0.2 diff --git a/common/reviews/api/rush-lib.api.md b/common/reviews/api/rush-lib.api.md index b0ca862444f..58ddeefaf83 100644 --- a/common/reviews/api/rush-lib.api.md +++ b/common/reviews/api/rush-lib.api.md @@ -1424,7 +1424,7 @@ export class RushConstants { static readonly artifactoryFilename: 'artifactory.json'; static readonly browserApprovedPackagesFilename: 'browser-approved-packages.json'; static readonly buildCacheFilename: 'build-cache.json'; - static readonly buildCacheVersion: 2; + static readonly buildCacheVersion: 1; static readonly buildCommandName: 'build'; static readonly bulkCommandKind: 'bulk'; static readonly bypassPolicyFlagLongName: '--bypass-policy'; diff --git a/libraries/rush-lib/package.json b/libraries/rush-lib/package.json index 7d3f55556be..0ce5842b6a6 100644 --- a/libraries/rush-lib/package.json +++ b/libraries/rush-lib/package.json @@ -42,7 +42,6 @@ "@rushstack/stream-collator": "workspace:*", "@rushstack/terminal": "workspace:*", "@rushstack/ts-command-line": "workspace:*", - "@rushstack/zipsync": "workspace:*", "@yarnpkg/lockfile": "~1.0.2", "builtin-modules": "~3.1.0", "cli-table": "~0.3.1", diff --git a/libraries/rush-lib/src/logic/RushConstants.ts b/libraries/rush-lib/src/logic/RushConstants.ts index 75eea19bd32..e04c03d2f2e 100644 --- a/libraries/rush-lib/src/logic/RushConstants.ts +++ b/libraries/rush-lib/src/logic/RushConstants.ts @@ -226,7 +226,7 @@ export class RushConstants { * Build cache version number, incremented when the logic to create cache entries changes. * Changing this ensures that cache entries generated by an old version will no longer register as a cache hit. */ - public static readonly buildCacheVersion: 2 = 2; + public static readonly buildCacheVersion: 1 = 1; /** * Cobuild configuration file. diff --git a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts index 31d4a2825cc..d603c094547 100644 --- a/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts +++ b/libraries/rush-lib/src/logic/buildCache/OperationBuildCache.ts @@ -3,17 +3,16 @@ import * as path from 'path'; import * as crypto from 'crypto'; -import * as fs from 'fs'; import { FileSystem, type FolderItem, InternalError, Async } from '@rushstack/node-core-library'; import type { ITerminal } from '@rushstack/terminal'; -import { packWorkerAsync, type IZipSyncPackWorkerResult } from '@rushstack/zipsync/lib/packWorkerAsync'; -import { unpackWorkerAsync, type IZipSyncUnpackWorkerResult } from '@rushstack/zipsync/lib/unpackWorkerAsync'; import type { RushConfigurationProject } from '../../api/RushConfigurationProject'; import type { BuildCacheConfiguration } from '../../api/BuildCacheConfiguration'; import type { ICloudBuildCacheProvider } from './ICloudBuildCacheProvider'; import type { FileSystemBuildCacheProvider } from './FileSystemBuildCacheProvider'; +import { TarExecutable } from '../../utilities/TarExecutable'; +import { EnvironmentVariableNames } from '../../api/EnvironmentConfiguration'; import type { IOperationExecutionResult } from '../operations/IOperationExecutionResult'; /** @@ -61,6 +60,8 @@ interface IPathsToCache { * @internal */ export class OperationBuildCache { + private static _tarUtilityPromise: Promise | undefined; + private readonly _project: RushConfigurationProject; private readonly _localBuildCacheProvider: FileSystemBuildCacheProvider; private readonly _cloudBuildCacheProvider: ICloudBuildCacheProvider | undefined; @@ -89,6 +90,14 @@ export class OperationBuildCache { this._cacheId = cacheId; } + private static _tryGetTarUtility(terminal: ITerminal): Promise { + if (!OperationBuildCache._tarUtilityPromise) { + OperationBuildCache._tarUtilityPromise = TarExecutable.tryInitializeAsync(terminal); + } + + return OperationBuildCache._tarUtilityPromise; + } + public get cacheId(): string | undefined { return this._cacheId; } @@ -167,52 +176,32 @@ export class OperationBuildCache { const projectFolderPath: string = this._project.projectFolder; + // Purge output folders + terminal.writeVerboseLine(`Clearing cached folders: ${this._projectOutputFolderNames.join(', ')}`); + await Promise.all( + this._projectOutputFolderNames.map((outputFolderName: string) => + FileSystem.deleteFolderAsync(`${projectFolderPath}/${outputFolderName}`) + ) + ); + + const tarUtility: TarExecutable | undefined = await OperationBuildCache._tryGetTarUtility(terminal); let restoreSuccess: boolean = false; - try { - const logFilePath: string = this._getLogFilePath(cacheId, 'unpack'); - let unpackWorkerResult: IZipSyncUnpackWorkerResult; - try { - unpackWorkerResult = await unpackWorkerAsync({ - archivePath: localCacheEntryPath!, - targetDirectories: this._projectOutputFolderNames, - baseDir: projectFolderPath - }); - } catch (e) { - const { zipSyncLogs } = e as { zipSyncLogs: string | undefined }; - if (zipSyncLogs) { - fs.writeFileSync(logFilePath, zipSyncLogs); - terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); - } - throw e; - } - const { - zipSyncReturn: { filesDeleted, filesExtracted, filesSkipped, foldersDeleted, otherEntriesDeleted }, - zipSyncLogs - } = unpackWorkerResult; - fs.writeFileSync(logFilePath, zipSyncLogs); - terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); - terminal.writeVerboseLine(`Restored ${filesExtracted + filesSkipped} files from cache.`); - if (filesExtracted > 0) { - terminal.writeVerboseLine(`Extracted ${filesExtracted} files to target folders.`); - } - if (filesSkipped > 0) { - terminal.writeVerboseLine(`Skipped ${filesSkipped} files that were already up to date.`); - } - if (filesDeleted > 0) { - terminal.writeVerboseLine(`Deleted ${filesDeleted} files from target folders.`); - } - if (foldersDeleted > 0) { - terminal.writeVerboseLine(`Deleted ${foldersDeleted} empty folders from target folders.`); - } - if (otherEntriesDeleted > 0) { - terminal.writeVerboseLine( - `Deleted ${otherEntriesDeleted} items (e.g. symbolic links) from target folders.` + if (tarUtility && localCacheEntryPath) { + const logFilePath: string = this._getTarLogFilePath(cacheId, 'untar'); + const tarExitCode: number = await tarUtility.tryUntarAsync({ + archivePath: localCacheEntryPath, + outputFolderPath: projectFolderPath, + logFilePath + }); + if (tarExitCode === 0) { + restoreSuccess = true; + terminal.writeLine('Successfully restored output from the build cache.'); + } else { + terminal.writeWarningLine( + 'Unable to restore output from the build cache. ' + + `See "${logFilePath}" for logs from the tar process.` ); } - restoreSuccess = true; - terminal.writeLine('Successfully restored output from the build cache.'); - } catch (e) { - terminal.writeWarningLine(`Unable to restore output from the build cache: ${e}`); } if (updateLocalCacheSuccess === false) { @@ -245,71 +234,59 @@ export class OperationBuildCache { let localCacheEntryPath: string | undefined; - const finalLocalCacheEntryPath: string = this._localBuildCacheProvider.getCacheEntryPath(cacheId); - - // Derive the temp file from the destination path to ensure they are on the same volume - // In the case of a shared network drive containing the build cache, we also need to make - // sure the the temp path won't be shared by two parallel rush builds. - const randomSuffix: string = crypto.randomBytes(8).toString('hex'); - const tempLocalCacheEntryPath: string = `${finalLocalCacheEntryPath}-${randomSuffix}.temp`; + const tarUtility: TarExecutable | undefined = await OperationBuildCache._tryGetTarUtility(terminal); + if (tarUtility) { + const finalLocalCacheEntryPath: string = this._localBuildCacheProvider.getCacheEntryPath(cacheId); + + // Derive the temp file from the destination path to ensure they are on the same volume + // In the case of a shared network drive containing the build cache, we also need to make + // sure the the temp path won't be shared by two parallel rush builds. + const randomSuffix: string = crypto.randomBytes(8).toString('hex'); + const tempLocalCacheEntryPath: string = `${finalLocalCacheEntryPath}-${randomSuffix}.temp`; + + const logFilePath: string = this._getTarLogFilePath(cacheId, 'tar'); + const tarExitCode: number = await tarUtility.tryCreateArchiveFromProjectPathsAsync({ + archivePath: tempLocalCacheEntryPath, + paths: filesToCache.outputFilePaths, + project: this._project, + logFilePath + }); - terminal.writeVerboseLine(`Using zipsync to create cache archive.`); - try { - const logFilePath: string = this._getLogFilePath(cacheId, 'pack'); - let packWorkerResult: IZipSyncPackWorkerResult; - try { - packWorkerResult = await packWorkerAsync({ - compression: 'auto', - archivePath: tempLocalCacheEntryPath, - targetDirectories: this._projectOutputFolderNames, - baseDir: this._project.projectFolder - }); - } catch (e) { - const { zipSyncLogs } = e as { zipSyncLogs: string | undefined }; - if (zipSyncLogs) { - fs.writeFileSync(logFilePath, zipSyncLogs); - terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); - } - throw e; - } - const { - zipSyncReturn: { filesPacked }, - zipSyncLogs - } = packWorkerResult; - fs.writeFileSync(logFilePath, zipSyncLogs); - terminal.writeVerboseLine(`The zipsync log has been written to: ${logFilePath}`); - terminal.writeVerboseLine(`Packed ${filesPacked} files for caching.`); - - // Move after the archive is finished so that if the process is interrupted we aren't left with an invalid file - try { - await Async.runWithRetriesAsync({ - action: () => - new Promise((resolve, reject) => { - fs.rename(tempLocalCacheEntryPath, finalLocalCacheEntryPath, (err) => { - if (err) { - reject(err); - } else { - resolve(); - } - }); - }), - maxRetries: 2, - retryDelayMs: 500 - }); - } catch (moveError) { + if (tarExitCode === 0) { + // Move after the archive is finished so that if the process is interrupted we aren't left with an invalid file try { - await FileSystem.deleteFileAsync(tempLocalCacheEntryPath); - } catch (deleteError) { - // Ignored + await Async.runWithRetriesAsync({ + action: () => + FileSystem.moveAsync({ + sourcePath: tempLocalCacheEntryPath, + destinationPath: finalLocalCacheEntryPath, + overwrite: true + }), + maxRetries: 2, + retryDelayMs: 500 + }); + } catch (moveError) { + try { + await FileSystem.deleteFileAsync(tempLocalCacheEntryPath); + } catch (deleteError) { + // Ignored + } + throw moveError; } - throw moveError; + localCacheEntryPath = finalLocalCacheEntryPath; + } else { + terminal.writeWarningLine( + `"tar" exited with code ${tarExitCode} while attempting to create the cache entry. ` + + `See "${logFilePath}" for logs from the tar process.` + ); + return false; } - localCacheEntryPath = finalLocalCacheEntryPath; - } catch (e) { - await FileSystem.deleteFileAsync(tempLocalCacheEntryPath).catch(() => { - /* ignore delete error */ - }); - throw e; + } else { + terminal.writeWarningLine( + `Unable to locate "tar". Please ensure that "tar" is on your PATH environment variable, or set the ` + + `${EnvironmentVariableNames.RUSH_TAR_BINARY_PATH} environment variable to the full path to the "tar" binary.` + ); + return false; } let cacheEntryBuffer: Buffer | undefined; @@ -418,7 +395,7 @@ export class OperationBuildCache { }; } - private _getLogFilePath(cacheId: string, mode: 'pack' | 'unpack'): string { + private _getTarLogFilePath(cacheId: string, mode: 'tar' | 'untar'): string { return path.join(this._project.projectRushTempFolder, `${cacheId}.${mode}.log`); } From a0c3125ee15c3bf4a4efa420e109414b7e84e104 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Fri, 26 Sep 2025 16:59:11 -0700 Subject: [PATCH 18/20] docs --- apps/zipsync/README.md | 52 ++++++++++++++++------ apps/zipsync/src/pack.ts | 56 +++++++++++++++++++++-- apps/zipsync/src/unpack.ts | 17 +++++++ apps/zipsync/src/zipUtils.ts | 86 +++++++++++++++++++++++++++++++++--- 4 files changed, 189 insertions(+), 22 deletions(-) diff --git a/apps/zipsync/README.md b/apps/zipsync/README.md index a607dc2d67d..661629a6f94 100644 --- a/apps/zipsync/README.md +++ b/apps/zipsync/README.md @@ -1,22 +1,48 @@ # @rushstack/zipsync -zipsync is a tool to pack and unpack zip archives. It is designed as a single-purpose tool to pack and unpack build cache entries. +zipsync is a focused tool for packing and unpacking build cache entries using a constrained subset of the ZIP format for high performance. It optimizes the common scenario where most files already exist in the target location and are unchanged. -## Implementation +## Goals & Rationale -### Unpack +- **Optimize partial unpack**: Most builds reuse the majority of previously produced outputs. Skipping rewrites preserves filesystem and page cache state. +- **Only write when needed**: Fewer syscalls. +- **Integrated cleanup**: Removes the need for a separate `rm -rf` pass; extra files and empty directories are pruned automatically. +- **ZIP subset**: Compatibility with malware scanners. +- **Fast inspection**: The central directory can be enumerated without inflating the entire archive (unlike tar+gzip). -- Read the zip central directory record at the end of the zip file and enumerate zip entries -- Parse the zipsync metadata file in the archive. This contains the SHA-1 hashes of the files -- Enumerate the target directories, cleanup any files or folders that aren't in the archive -- If a file exists with matching size + SHA‑1, skip writing; else unpack it +## How It Works -### Pack +### Pack Flow -- Enumerate the target directories. -- For each file compute a SHA-1 hash for the zipsync metadata file, and the CRC32 (required by zip format), then compress it if needed. Write the headers and file contents to the zip archive. -- Write the metadata file to the zip archive and the zip central directory record. +``` +for each file F + write LocalFileHeader(F) + stream chunks: + read -> hash + crc + maybe compress -> write + finalize compressor + write DataDescriptor(F) +add metadata entry (same pattern) +write central directory records +``` -## Constraints +### Unpack Flow -Though archives created by zipsync can be used by other zip compatible programs, the opposite is not the case. zipsync only implements a subset of zip features to achieve greater performance. +``` +load archive -> parse central dir -> read metadata +scan filesystem & delete extraneous entries +for each entry (except metadata): + if unchanged (sha1 matches) => skip + else extract (decompress if needed) +``` + +## Why ZIP (vs tar + gzip) + +Pros for this scenario: + +- Central directory enables cheap listing without decompressing entire payload. +- Widely understood / tooling-friendly (system explorers, scanners, CI tooling). +- Per-file compression keeps selective unpack simple (no need to inflate all bytes). + +Trade-offs: + +- Tar+gzip can exploit cross-file redundancy for better compressed size in datasets with many similar files. diff --git a/apps/zipsync/src/pack.ts b/apps/zipsync/src/pack.ts index 4268301462a..80d15d83201 100644 --- a/apps/zipsync/src/pack.ts +++ b/apps/zipsync/src/pack.ts @@ -33,20 +33,31 @@ import { METADATA_FILENAME } from './zipSyncUtils'; +/** + * File extensions for which additional DEFLATE/ZSTD compression is unlikely to help. + * Used by the 'auto' compression heuristic to avoid wasting CPU on data that is already + * compressed (images, media, existing archives, fonts, etc.). + */ const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; +/** + * Map zip compression method code -> incremental zlib mode label + */ const zlibPackModes: Record = { [ZSTD_COMPRESSION]: 'zstd-compress', [DEFLATE_COMPRESSION]: 'deflate', [STORE_COMPRESSION]: undefined } as const; +/** + * Public facing CLI option -> actual zip method used for a file we decide to compress. + */ const zipSyncCompressionOptions: Record = { store: STORE_COMPRESSION, deflate: DEFLATE_COMPRESSION, zstd: ZSTD_COMPRESSION, - auto: DEFLATE_COMPRESSION // 'auto' is handled specially in the code + auto: DEFLATE_COMPRESSION } as const; /** @@ -82,6 +93,18 @@ export interface IZipSyncPackResult { metadata: IMetadata; } +/** + * Create a zipsync archive by enumerating target directories, then streaming each file into the + * output zip using the local file header + (optional compressed data) + data descriptor pattern. + * + * Performance characteristics: + * - Single pass per file (no read-then-compress-then-write buffering). CRC32 + SHA-1 are computed + * while streaming so the metadata JSON can later be used for selective unpack. + * - Data descriptor usage (bit 3) allows writing headers before we know sizes or CRC32. + * - A single timestamp (captured once) is applied to all entries for determinism. + * - Metadata entry is added as a normal zip entry at the end (before central directory) so legacy + * tools can still list/extract it, while zipsync can quickly parse file hashes. + */ export function pack({ archivePath, targetDirectories: rawTargetDirectories, @@ -95,7 +118,7 @@ export function pack({ markStart('pack.total'); terminal.writeDebugLine('Starting pack'); - // Pass 1: enumerate + // Pass 1: enumerate files with a queue to avoid deep recursion markStart('pack.enumerate'); const filePaths: string[] = []; @@ -140,7 +163,7 @@ export function pack({ terminal.writeLine(`Found ${filePaths.length} files to pack (enumerated)`); markEnd('pack.enumerate'); - // Pass 2: read + hash + compress + // Pass 2: stream each file: read chunks -> hash + (maybe) compress -> write local header + data descriptor. markStart('pack.prepareEntries'); const bufferSize: number = 1 << 25; // 32 MiB const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); @@ -150,6 +173,9 @@ export function pack({ using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); let currentOffset: number = 0; // Use this function to do any write to the zip file, so that we can track the current offset. + /** + * Write a raw chunk to the archive file descriptor, updating current offset. + */ function writeChunkToZip(chunk: Uint8Array, lengthBytes: number = chunk.byteLength): void { let offset: number = 0; while (lengthBytes > 0 && offset < chunk.byteLength) { @@ -162,6 +188,7 @@ export function pack({ } currentOffset += offset; } + /** Convenience wrapper for writing multiple buffers sequentially. */ function writeChunksToZip(chunks: Uint8Array[]): void { for (const chunk of chunks) { writeChunkToZip(chunk); @@ -169,12 +196,27 @@ export function pack({ } const dosDateTimeNow: { time: number; date: number } = dosDateTime(new Date()); + /** + * Stream a single file into the archive. + * Steps: + * 1. Decide compression (based on user choice + heuristic). + * 2. Emit local file header (sizes/CRC zeroed because we use a data descriptor). + * 3. Read file in 32 MiB chunks: update SHA-1 + CRC32; optionally feed compressor or write raw. + * 4. Flush compressor (if any) and write trailing data descriptor containing sizes + CRC. + * 5. Return populated entry metadata for later central directory + JSON metadata. + */ function writeFileEntry(relativePath: string): IFileEntry { + /** + * Basic heuristic: skip re-compressing file types that are already compressed. + */ function isLikelyAlreadyCompressed(filename: string): boolean { return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); } const fullPath: string = path.join(baseDir, relativePath); + /** + * Read file in large fixed-size buffer; invoke callback for each filled chunk. + */ const readInputInChunks: (onChunk: (bytesInInputBuffer: number) => void) => void = ( onChunk: (bytesInInputBuffer: number) => void ): void => { @@ -231,6 +273,9 @@ export function pack({ let uncompressedSize: number = 0; let compressedSize: number = 0; + /** + * Compressor instance (deflate or zstd) created only if needed. + */ using incrementalZlib: IIncrementalZlib | undefined = shouldCompress ? createIncrementalZlib( outputBuffer, @@ -270,6 +315,7 @@ export function pack({ entry.crc32 = crc32; entry.sha1Hash = sha1Hash; + // Trailing data descriptor now that final CRC/sizes are known. writeChunkToZip(writeDataDescriptor(entry)); terminal.writeVerboseLine( @@ -284,6 +330,7 @@ export function pack({ } const entries: IFileEntry[] = []; + // Emit all file entries in enumeration order. for (const relativePath of filePaths) { entries.push(writeFileEntry(relativePath)); } @@ -293,6 +340,7 @@ export function pack({ markStart('pack.metadata.build'); const metadata: IMetadata = { version: METADATA_VERSION, files: {} }; + // Build metadata map used for selective unpack (size + SHA‑1 per file). for (const entry of entries) { metadata.files[entry.filename] = { size: entry.size, sha1Hash: entry.sha1Hash }; } @@ -306,6 +354,7 @@ export function pack({ let metadataCompressionMethod: ZipMetaCompressionMethod = zipSyncCompressionOptions.store; let metadataData: Buffer = metadataBuffer; let metadataCompressedSize: number = metadataBuffer.length; + // Compress metadata (deflate) iff user allowed compression and it helps (>64 bytes & smaller result). if (compression !== 'store' && metadataBuffer.length > 64) { const compressed: Buffer = zlib.deflateRawSync(metadataBuffer, { level: 9 }); if (compressed.length < metadataBuffer.length) { @@ -348,6 +397,7 @@ export function pack({ markStart('pack.write.centralDirectory'); const centralDirOffset: number = currentOffset; + // Emit central directory records. for (const entry of entries) { writeChunksToZip(writeCentralDirectoryHeader(entry)); } diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index d12bb13ab55..a8eeaf0e90e 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -63,6 +63,9 @@ export interface IZipSyncUnpackResult { otherEntriesDeleted: number; } +/** + * Unpack a zipsync archive into the provided target directories. + */ export function unpack({ archivePath, targetDirectories: rawTargetDirectories, @@ -76,11 +79,13 @@ export function unpack({ markStart('unpack.total'); terminal.writeDebugLine('Starting unpackZip'); + // Read entire archive into memory (build cache entries are expected to be relatively small/medium). markStart('unpack.read.archive'); const zipBuffer: Buffer = fs.readFileSync(archivePath); terminal.writeDebugLine(`Archive size=${zipBuffer.length} bytes`); markEnd('unpack.read.archive'); + // Locate & parse central directory so we have random-access metadata for all entries. markStart('unpack.parse.centralDirectory'); const zipTree: LookupByPath = new LookupByPath(); const endOfCentralDir: IEndOfCentralDirectory = findEndOfCentralDirectory(zipBuffer); @@ -151,6 +156,7 @@ export function unpack({ terminal.writeLine(`Found ${entries.length} files in archive`); + // Ensure root target directories exist (they may be empty initially for cache misses). for (const targetDirectory of targetDirectories) { fs.mkdirSync(targetDirectory, { recursive: true }); terminal.writeDebugLine(`Ensured target directory: ${targetDirectory}`); @@ -165,6 +171,7 @@ export function unpack({ const dirsToCleanup: string[] = []; + // Phase: scan filesystem to delete entries not present in archive and record empty dirs for later removal. markStart('unpack.scan.existing'); const queue: IDirQueueItem[] = targetDirectories.map((dir) => ({ dir, @@ -218,6 +225,7 @@ export function unpack({ } } + // Try to delete now-empty directories (created in previous builds but not in this archive). for (const dir of dirsToCleanup) { // Try to remove the directory. If it is not empty, this will throw and we can ignore the error. if (rmdirSync(dir)) { @@ -233,6 +241,10 @@ export function unpack({ const bufferSize: number = 1 << 25; // 32 MiB const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + /** + * Stream-decompress (or copy) an individual file from the archive into place. + * We allocate a single large output buffer reused for all inflation operations to limit GC. + */ function extractFileFromZip(targetPath: string, entry: ICentralDirectoryHeaderParseResult): void { terminal.writeDebugLine(`Extracting file: ${entry.filename}`); const fileZipBuffer: Buffer = getFileFromZip(zipBuffer, entry); @@ -275,6 +287,10 @@ export function unpack({ } } + /** + * Decide whether a file needs extraction by comparing existing file SHA‑1 vs metadata. + * If file is missing or hash differs we extract; otherwise we skip to preserve existing inode/data. + */ function shouldExtract(targetPath: string, entry: ICentralDirectoryHeaderParseResult): boolean { if (metadata) { const metadataFile: { size: number; sha1Hash: string } | undefined = metadata.files[entry.filename]; @@ -300,6 +316,7 @@ export function unpack({ const dirsCreated: Set = new Set(); + // Iterate all entries excluding metadata; create parent dirs lazily; selective extraction. for (const entry of entries) { if (entry.filename === METADATA_FILENAME) { continue; diff --git a/apps/zipsync/src/zipUtils.ts b/apps/zipsync/src/zipUtils.ts index eb00477d8a5..5dbd1d1f59c 100644 --- a/apps/zipsync/src/zipUtils.ts +++ b/apps/zipsync/src/zipUtils.ts @@ -1,12 +1,28 @@ // Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license. // See LICENSE in the project root for license information. -// zip spec: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT - -const LOCAL_FILE_HEADER_SIGNATURE: number = 0x04034b50; -const CENTRAL_DIR_HEADER_SIGNATURE: number = 0x02014b50; -const END_OF_CENTRAL_DIR_SIGNATURE: number = 0x06054b50; -const DATA_DESCRIPTOR_SIGNATURE: number = 0x08074b50; +/** + * Low-level ZIP structure helpers used by the zipsync pack/unpack pipeline. + * + * Spec reference: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + */ + +/** + * Local file header signature PK\x03\x04 + */ +const LOCAL_FILE_HEADER_SIGNATURE: number = 0x04034b50; // PK\x03\x04 +/** + * Central directory file header signature PK\x01\x02 + */ +const CENTRAL_DIR_HEADER_SIGNATURE: number = 0x02014b50; // PK\x01\x02 +/** + * End of central directory signature PK\x05\x06 + */ +const END_OF_CENTRAL_DIR_SIGNATURE: number = 0x06054b50; // PK\x05\x06 +/** + * Data descriptor signature PK\x07\x08 + */ +const DATA_DESCRIPTOR_SIGNATURE: number = 0x08074b50; // PK\x07\x08 export const STORE_COMPRESSION: 0 = 0; export const DEFLATE_COMPRESSION: 8 = 8; @@ -88,6 +104,10 @@ function readUInt16LE(buffer: Buffer, offset: number): number { return buffer.readUInt16LE(offset); } +/** + * Convert a JS Date into packed DOS time/date fields used by classic ZIP. + * Seconds are stored /2 (range 0-29 => 0-58s). Years are offset from 1980. + */ export function dosDateTime(date: Date): { time: number; date: number } { /* eslint-disable no-bitwise */ const time: number = @@ -102,7 +122,23 @@ export function dosDateTime(date: Date): { time: number; date: number } { return { time, date: dateVal }; } +/** + * Reusable scratch buffer for the fixed-length local file header (30 bytes). + * Using a single Buffer avoids per-file allocations; callers must copy/use synchronously. + */ const localFileHeaderBuffer: Buffer = Buffer.allocUnsafe(30); +/** + * Write the fixed portion of a local file header for an entry (with data descriptor flag set) and + * return the header buffer plus the variable-length filename buffer. + * + * Layout (little-endian): + * signature(4) versionNeeded(2) flags(2) method(2) modTime(2) modDate(2) + * crc32(4) compSize(4) uncompSize(4) nameLen(2) extraLen(2) + * + * Because we set bit 3 of the general purpose flag, crc32/compSize/uncompSize are zero here and the + * actual values appear later in a trailing data descriptor record. This enables streaming without + * buffering entire file contents beforehand. + */ export function writeLocalFileHeader( entry: IFileEntry ): [fileHeaderWithoutVariableLengthData: Buffer, fileHeaderVariableLengthData: Buffer] { @@ -141,7 +177,15 @@ export function writeLocalFileHeader( return [localFileHeaderBuffer, filenameBuffer]; } +/** + * Reusable scratch buffer for central directory entries (fixed-length 46 bytes before filename) + */ const centralDirHeaderBuffer: Buffer = Buffer.allocUnsafe(46); +/** + * Write a central directory header referencing an already written local file entry. + * Central directory consolidates the final CRC + sizes (always present here) and provides a table + * for fast enumeration without scanning the archive sequentially. + */ export function writeCentralDirectoryHeader(entry: IFileEntry): Buffer[] { const filenameBuffer: Buffer = Buffer.from(entry.filename, 'utf8'); @@ -188,7 +232,14 @@ export function writeCentralDirectoryHeader(entry: IFileEntry): Buffer[] { return [centralDirHeaderBuffer, filenameBuffer]; } +/** + * Data descriptor: signature(4) crc32(4) compSize(4) uncompSize(4) + */ const dataDescriptorBuffer: Buffer = Buffer.allocUnsafe(16); +/** + * Write the trailing data descriptor for an entry. Only used because we set flag bit 3 in the + * local file header allowing deferred CRC/size calculation. + */ export function writeDataDescriptor(entry: IFileEntry): Buffer { let offset: number = 0; writeUInt32LE(dataDescriptorBuffer, DATA_DESCRIPTOR_SIGNATURE, offset); // signature PK\x07\x08 @@ -201,7 +252,14 @@ export function writeDataDescriptor(entry: IFileEntry): Buffer { return dataDescriptorBuffer; } +/** + * End of central directory (EOCD) record (22 bytes when comment length = 0) + */ const endOfCentralDirBuffer: Buffer = Buffer.allocUnsafe(22); +/** + * Write the EOCD record referencing the accumulated central directory. We omit archive comments + * and do not support ZIP64 (sufficient for build cache archive sizes today). + */ export function writeEndOfCentralDirectory( centralDirOffset: number, centralDirSize: number, @@ -232,6 +290,10 @@ interface ILocalFileHeaderParseResult { nextOffset: number; } +/** + * Parse a local file header at the provided offset. Minimal validation: signature check only. + * Returns header plus the offset pointing just past the variable-length name+extra field. + */ export function parseLocalFileHeader(buffer: Buffer, offset: number): ILocalFileHeaderParseResult { const signature: number = readUInt32LE(buffer, offset); if (signature !== LOCAL_FILE_HEADER_SIGNATURE) { @@ -265,6 +327,10 @@ export interface ICentralDirectoryHeaderParseResult { nextOffset: number; } +/** + * Parse a central directory header at the given offset (within a sliced central directory buffer). + * Returns header, filename string, and nextOffset pointing to the next structure. + */ export function parseCentralDirectoryHeader( buffer: Buffer, offset: number @@ -306,6 +372,10 @@ export function parseCentralDirectoryHeader( }; } +/** + * Locate the EOCD record by reverse scanning. Since we never write a comment the EOCD will be the + * first matching signature encountered scanning backwards from the end. + */ export function findEndOfCentralDirectory(buffer: Buffer): IEndOfCentralDirectory { for (let i: number = buffer.length - 22; i >= 0; i--) { if (readUInt32LE(buffer, i) === END_OF_CENTRAL_DIR_SIGNATURE) { @@ -325,6 +395,10 @@ export function findEndOfCentralDirectory(buffer: Buffer): IEndOfCentralDirector throw new Error('End of central directory not found'); } +/** + * Slice out the (possibly compressed) file data bytes for a central directory entry. + * Caller will decompress if needed based on entry.header.compressionMethod. + */ export function getFileFromZip(zipBuffer: Buffer, entry: ICentralDirectoryHeaderParseResult): Buffer { const { header: localFileHeader } = parseLocalFileHeader(zipBuffer, entry.header.localHeaderOffset); const localDataOffset: number = From a89719125f97e5e3d17ce6b70453d379154ef6b0 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Fri, 26 Sep 2025 17:05:12 -0700 Subject: [PATCH 19/20] pr feedback --- apps/zipsync/src/ZipSyncCommandLineParser.ts | 2 +- apps/zipsync/src/__snapshots__/start.test.ts.snap | 4 ++-- apps/zipsync/src/pack.ts | 7 ++++--- apps/zipsync/src/unpack.ts | 5 +++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/apps/zipsync/src/ZipSyncCommandLineParser.ts b/apps/zipsync/src/ZipSyncCommandLineParser.ts index cf11145f224..922ea25fff5 100644 --- a/apps/zipsync/src/ZipSyncCommandLineParser.ts +++ b/apps/zipsync/src/ZipSyncCommandLineParser.ts @@ -83,7 +83,7 @@ export class ZipSyncCommandLineParser extends CommandLineParser { parameterLongName: '--compression', parameterShortName: '-z', description: - 'Compression strategy when packing. "deflate" and "zlib" attempts compression for every file (keeps only if smaller); "auto" first skips likely-compressed types before attempting; "store" disables compression.', + 'Compression strategy when packing. "deflate" and "zlib" attempts compression for every file (keeps only if smaller); "auto" first skips likely-compressed types before attempting "deflate" compression; "store" disables compression.', alternatives: ['store', 'deflate', 'zstd', 'auto'], required: true }); diff --git a/apps/zipsync/src/__snapshots__/start.test.ts.snap b/apps/zipsync/src/__snapshots__/start.test.ts.snap index 1459c2e3afd..6c68a44b411 100644 --- a/apps/zipsync/src/__snapshots__/start.test.ts.snap +++ b/apps/zipsync/src/__snapshots__/start.test.ts.snap @@ -26,8 +26,8 @@ Optional arguments: Compression strategy when packing. \\"deflate\\" and \\"zlib\\" attempts compression for every file (keeps only if smaller); \\"auto\\" first skips - likely-compressed types before attempting; \\"store\\" - disables compression. + likely-compressed types before attempting \\"deflate\\" + compression; \\"store\\" disables compression. For detailed help about a specific command, use: zipsync -h " diff --git a/apps/zipsync/src/pack.ts b/apps/zipsync/src/pack.ts index 80d15d83201..583d9d7d9d4 100644 --- a/apps/zipsync/src/pack.ts +++ b/apps/zipsync/src/pack.ts @@ -93,6 +93,10 @@ export interface IZipSyncPackResult { metadata: IMetadata; } +const bufferSize: number = 1 << 25; // 32 MiB +const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); +const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + /** * Create a zipsync archive by enumerating target directories, then streaming each file into the * output zip using the local file header + (optional compressed data) + data descriptor pattern. @@ -165,9 +169,6 @@ export function pack({ // Pass 2: stream each file: read chunks -> hash + (maybe) compress -> write local header + data descriptor. markStart('pack.prepareEntries'); - const bufferSize: number = 1 << 25; // 32 MiB - const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index a8eeaf0e90e..09cd5ad511a 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -63,6 +63,9 @@ export interface IZipSyncUnpackResult { otherEntriesDeleted: number; } +const bufferSize: number = 1 << 25; // 32 MiB +const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); + /** * Unpack a zipsync archive into the provided target directories. */ @@ -239,8 +242,6 @@ export function unpack({ markStart('unpack.extract.loop'); - const bufferSize: number = 1 << 25; // 32 MiB - const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); /** * Stream-decompress (or copy) an individual file from the archive into place. * We allocate a single large output buffer reused for all inflation operations to limit GC. From f7164c627d4ca2c324d71a96c38f1e0ecbbebd70 Mon Sep 17 00:00:00 2001 From: Bharat Middha <5100938+bmiddha@users.noreply.github.com> Date: Fri, 26 Sep 2025 17:44:28 -0700 Subject: [PATCH 20/20] pr feedback --- apps/zipsync/src/pack.ts | 40 ++++++++++++++++++-------------- apps/zipsync/src/packWorker.ts | 12 +++++++++- apps/zipsync/src/unpack.ts | 22 ++++++++++++------ apps/zipsync/src/unpackWorker.ts | 8 ++++++- apps/zipsync/src/zipSyncUtils.ts | 2 ++ 5 files changed, 58 insertions(+), 26 deletions(-) diff --git a/apps/zipsync/src/pack.ts b/apps/zipsync/src/pack.ts index 583d9d7d9d4..177fc6fec6c 100644 --- a/apps/zipsync/src/pack.ts +++ b/apps/zipsync/src/pack.ts @@ -30,7 +30,8 @@ import { type IMetadata, type IDirQueueItem, METADATA_VERSION, - METADATA_FILENAME + METADATA_FILENAME, + defaultBufferSize } from './zipSyncUtils'; /** @@ -41,6 +42,13 @@ import { const LIKELY_COMPRESSED_EXTENSION_REGEX: RegExp = /\.(?:zip|gz|tgz|bz2|xz|7z|rar|jpg|jpeg|png|gif|webp|avif|mp4|m4v|mov|mkv|webm|mp3|ogg|aac|flac|pdf|woff|woff2)$/; +/** + * Basic heuristic: skip re-compressing file types that are already compressed. + */ +function isLikelyAlreadyCompressed(filename: string): boolean { + return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); +} + /** * Map zip compression method code -> incremental zlib mode label */ @@ -66,7 +74,7 @@ const zipSyncCompressionOptions: Record; /** @@ -86,6 +94,14 @@ export interface IZipSyncPackOptions { * produces a smaller result; otherwise it will fall back to 'store' per-file. */ compression: ZipSyncOptionCompression; + /** + * Optional buffer that can be provided to avoid internal allocations. + */ + inputBuffer?: Buffer; + /** + * Optional buffer that can be provided to avoid internal allocations. + */ + outputBuffer?: Buffer; } export interface IZipSyncPackResult { @@ -93,10 +109,6 @@ export interface IZipSyncPackResult { metadata: IMetadata; } -const bufferSize: number = 1 << 25; // 32 MiB -const inputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); -const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - /** * Create a zipsync archive by enumerating target directories, then streaming each file into the * output zip using the local file header + (optional compressed data) + data descriptor pattern. @@ -114,7 +126,9 @@ export function pack({ targetDirectories: rawTargetDirectories, baseDir: rawBaseDir, compression, - terminal + terminal, + inputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize), + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize) }: IZipSyncPackOptions): IZipSyncPackResult { const baseDir: string = path.resolve(rawBaseDir); const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); @@ -173,7 +187,6 @@ export function pack({ terminal.writeDebugLine(`Opening archive for write: ${archivePath}`); using zipFile: IDisposableFileHandle = getDisposableFileHandle(archivePath, 'w'); let currentOffset: number = 0; - // Use this function to do any write to the zip file, so that we can track the current offset. /** * Write a raw chunk to the archive file descriptor, updating current offset. */ @@ -207,12 +220,6 @@ export function pack({ * 5. Return populated entry metadata for later central directory + JSON metadata. */ function writeFileEntry(relativePath: string): IFileEntry { - /** - * Basic heuristic: skip re-compressing file types that are already compressed. - */ - function isLikelyAlreadyCompressed(filename: string): boolean { - return LIKELY_COMPRESSED_EXTENSION_REGEX.test(filename.toLowerCase()); - } const fullPath: string = path.join(baseDir, relativePath); /** @@ -288,8 +295,7 @@ export function pack({ ) : undefined; - // Also capture content if we might need it (for compression decision or storing raw data). - // We'll accumulate into an array of buffers to avoid repeated concatenations for large files. + // Read input file in chunks, update hashes, and either compress or write raw. readInputInChunks((bytesInInputBuffer: number) => { const slice: Buffer = inputBuffer.subarray(0, bytesInInputBuffer); sha1HashBuilder.update(slice); diff --git a/apps/zipsync/src/packWorker.ts b/apps/zipsync/src/packWorker.ts index fccff8231c5..39e82fe8177 100644 --- a/apps/zipsync/src/packWorker.ts +++ b/apps/zipsync/src/packWorker.ts @@ -7,6 +7,7 @@ import { Terminal } from '@rushstack/terminal/lib/Terminal'; import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; import { type IZipSyncPackOptions, type IZipSyncPackResult, pack } from './pack'; +import { defaultBufferSize } from './zipSyncUtils'; export { type IZipSyncPackOptions, type IZipSyncPackResult } from './pack'; @@ -49,6 +50,9 @@ if (!rawParentPort) { } const parentPort: MessagePort = rawParentPort; +let inputBuffer: Buffer | undefined = undefined; +let outputBuffer: Buffer | undefined = undefined; + function handleMessage(message: IHostToWorkerMessage | false): void { if (message === false) { parentPort.removeAllListeners(); @@ -63,12 +67,18 @@ function handleMessage(message: IHostToWorkerMessage | false): void { switch (message.type) { case 'zipsync-pack': { const { options } = message; + if (!inputBuffer) { + inputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize); + } + if (!outputBuffer) { + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize); + } const successMessage: IZipSyncSuccessMessage = { type: message.type, id: message.id, result: { - zipSyncReturn: pack({ ...options, terminal }), + zipSyncReturn: pack({ ...options, terminal, inputBuffer, outputBuffer }), zipSyncLogs: terminalProvider.getOutput() } }; diff --git a/apps/zipsync/src/unpack.ts b/apps/zipsync/src/unpack.ts index 09cd5ad511a..f4592c34d16 100644 --- a/apps/zipsync/src/unpack.ts +++ b/apps/zipsync/src/unpack.ts @@ -23,7 +23,13 @@ import { type ZipMetaCompressionMethod } from './zipUtils'; import { computeFileHash } from './hash'; -import { METADATA_FILENAME, METADATA_VERSION, type IDirQueueItem, type IMetadata } from './zipSyncUtils'; +import { + defaultBufferSize, + METADATA_FILENAME, + METADATA_VERSION, + type IDirQueueItem, + type IMetadata +} from './zipSyncUtils'; const zlibUnpackModes: Record = { [ZSTD_COMPRESSION]: 'zstd-decompress', @@ -37,7 +43,7 @@ const zlibUnpackModes: Record; /** * Base directory for relative paths within the archive (defaults to common parent of targetDirectories) */ baseDir: string; + /** + * Optional buffer that can be provided to avoid internal allocations. + */ + outputBuffer?: Buffer; } export interface IZipSyncUnpackResult { @@ -63,9 +73,6 @@ export interface IZipSyncUnpackResult { otherEntriesDeleted: number; } -const bufferSize: number = 1 << 25; // 32 MiB -const outputBuffer: Buffer = Buffer.allocUnsafeSlow(bufferSize); - /** * Unpack a zipsync archive into the provided target directories. */ @@ -73,7 +80,8 @@ export function unpack({ archivePath, targetDirectories: rawTargetDirectories, baseDir: rawBaseDir, - terminal + terminal, + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize) }: IZipSyncUnpackOptions): IZipSyncUnpackResult { const baseDir: string = path.resolve(rawBaseDir); const targetDirectories: string[] = rawTargetDirectories.map((dir) => path.join(baseDir, dir)); diff --git a/apps/zipsync/src/unpackWorker.ts b/apps/zipsync/src/unpackWorker.ts index d08e2e7ae20..b8ddedb5fa5 100644 --- a/apps/zipsync/src/unpackWorker.ts +++ b/apps/zipsync/src/unpackWorker.ts @@ -7,6 +7,7 @@ import { Terminal } from '@rushstack/terminal/lib/Terminal'; import { StringBufferTerminalProvider } from '@rushstack/terminal/lib/StringBufferTerminalProvider'; import { type IZipSyncUnpackOptions, type IZipSyncUnpackResult, unpack } from './unpack'; +import { defaultBufferSize } from './zipSyncUtils'; export { type IZipSyncUnpackOptions, type IZipSyncUnpackResult } from './unpack'; @@ -49,6 +50,8 @@ if (!rawParentPort) { } const parentPort: MessagePort = rawParentPort; +let outputBuffer: Buffer | undefined = undefined; + function handleMessage(message: IHostToWorkerMessage | false): void { if (message === false) { parentPort.removeAllListeners(); @@ -63,12 +66,15 @@ function handleMessage(message: IHostToWorkerMessage | false): void { switch (message.type) { case 'zipsync-unpack': { const { options } = message; + if (!outputBuffer) { + outputBuffer = Buffer.allocUnsafeSlow(defaultBufferSize); + } const successMessage: IZipSyncUnpackSuccessMessage = { type: message.type, id: message.id, result: { - zipSyncReturn: unpack({ ...options, terminal }), + zipSyncReturn: unpack({ ...options, terminal, outputBuffer }), zipSyncLogs: terminalProvider.getOutput() } }; diff --git a/apps/zipsync/src/zipSyncUtils.ts b/apps/zipsync/src/zipSyncUtils.ts index e4a6cbc77ce..3e03062eb84 100644 --- a/apps/zipsync/src/zipSyncUtils.ts +++ b/apps/zipsync/src/zipSyncUtils.ts @@ -25,3 +25,5 @@ export interface IMetadata { export type IZipSyncMode = 'pack' | 'unpack'; export type ZipSyncOptionCompression = 'store' | 'deflate' | 'zstd' | 'auto'; + +export const defaultBufferSize: number = 1 << 25; // 32 MiB