From f035eac839cf0b598e058ec185ade29619bf6adb Mon Sep 17 00:00:00 2001 From: rayangler <27821750+rayangler@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:04:29 -0400 Subject: [PATCH 1/5] Remove github user --- modules/persistence/index.ts | 16 ++++----- .../src/services/metadata/index.ts | 4 +-- .../persistence/src/services/pages/index.ts | 34 ++++++------------- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/modules/persistence/index.ts b/modules/persistence/index.ts index 433be411c..baf08f82d 100644 --- a/modules/persistence/index.ts +++ b/modules/persistence/index.ts @@ -17,8 +17,9 @@ import { upsertAssets } from './src/services/assets'; interface ModuleArgs { path: string; - githubUser: string; - jobId: string; + // Leaving around for backwards compatibility, in case a builder attempts to add the variable + githubUser?: string; + jobId?: string; strict: string; [props: string | number | symbol]: unknown; } @@ -31,22 +32,21 @@ const missingPathMessage = 'No path specified in arguments - please specify a bu // Load command line args into a parameterized argv const argv: ModuleArgs = minimist(process.argv.slice(2)); -const app = async (path: string, githubUser: string, jobId: string) => { +const app = async (path: string, jobId?: string) => { try { if (!path) throw missingPathMessage; - const user = githubUser || 'docs-builder-bot'; const zip = new AdmZip(path); // Safely convert jobId in case of empty string - const autobuilderJobId = jobId || undefined; + const autobuilderJobId = jobId; // atomic buildId for all artifacts read by this module - fundamental assumption // that only one build will be used per run of this module. const buildId = new mongodb.ObjectId(autobuilderJobId); - const metadata = await metadataFromZip(zip, user); + const metadata = await metadataFromZip(zip); // initialize db connections to handle shared connections await snootyDb(); await poolDb(); - await Promise.all([insertAndUpdatePages(buildId, zip, user), insertMetadata(buildId, metadata), upsertAssets(zip)]); + await Promise.all([insertAndUpdatePages(buildId, zip), insertMetadata(buildId, metadata), upsertAssets(zip)]); await insertMergedMetadataEntries(buildId, metadata); // DOP-3447 clean up stale metadata await deleteStaleMetadata(metadata); @@ -59,7 +59,7 @@ const app = async (path: string, githubUser: string, jobId: string) => { } }; -app(argv['path'], argv['githubUser'], argv['jobId']).catch(() => { +app(argv['path'], argv['jobId']).catch(() => { console.error('Persistence Module Failure. Ending build.'); process.exit(1); }); diff --git a/modules/persistence/src/services/metadata/index.ts b/modules/persistence/src/services/metadata/index.ts index ef35a9b81..29f0e9767 100644 --- a/modules/persistence/src/services/metadata/index.ts +++ b/modules/persistence/src/services/metadata/index.ts @@ -14,19 +14,17 @@ export interface Metadata { associated_products?: AssociatedProduct[]; toctree: ToC; toctreeOrder: any[]; - github_username?: string; [key: string]: any; } // Service responsible for memoization of metadata entries. // Any extraneous logic performed on metadata entries as part of upload should be added here // or within subfolders of this module -export const metadataFromZip = async (zip: AdmZip, githubUser: string) => { +export const metadataFromZip = async (zip: AdmZip) => { const zipEntries = zip.getEntries(); const metadata = zipEntries .filter((entry) => entry.entryName === 'site.bson') .map((entry) => deserialize(entry.getData()))[0] as Metadata; await verifyMetadata(metadata); - metadata.github_username = githubUser; return metadata; }; diff --git a/modules/persistence/src/services/pages/index.ts b/modules/persistence/src/services/pages/index.ts index 5bfccd56d..fabf41026 100644 --- a/modules/persistence/src/services/pages/index.ts +++ b/modules/persistence/src/services/pages/index.ts @@ -29,7 +29,6 @@ export interface Page { filename: string; ast: PageAst; static_assets: UpdatedAsset[]; - github_username: string; facets?: Facet[]; } @@ -53,13 +52,12 @@ const UPDATED_AST_COLL_NAME = 'updated_documents'; // Service responsible for memoization of page level documents. // Any extraneous logic performed on page level documents as part of upload should be added here // or within subfolders of this module -const pagesFromZip = (zip: AdmZip, githubUser: string): Page[] => { +const pagesFromZip = (zip: AdmZip): Page[] => { const zipPages = zip.getEntries(); return zipPages .filter((entry) => entry.entryName?.startsWith('documents/')) .map((entry) => { const document = deserialize(entry.getData()) as Page; - document.github_username = githubUser; return document; }); }; @@ -72,11 +70,10 @@ const pagesFromZip = (zip: AdmZip, githubUser: string): Page[] => { * @param pageIdPrefix - Includes the Snooty project name, user (docsworker-xlarge), and branch * @param collection - The collection to perform the find query on */ -const findPrevPageDocs = async (pageIdPrefix: string, collection: string, githubUser: string) => { +const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => { const dbSession = await db(); const findQuery = { page_id: { $regex: new RegExp(`^${pageIdPrefix}/`) }, - github_username: githubUser, deleted: false, }; const projection = { @@ -119,21 +116,13 @@ class UpdatedPagesManager { prevPageDocsMapping: PreviousPageMapping; prevPageIds: Set; updateTime: Date; - githubUser: string; buildId: ObjectId; - constructor( - prevPageDocsMapping: PreviousPageMapping, - prevPagesIds: Set, - pages: Page[], - githubUser: string, - buildId: ObjectId - ) { + constructor(prevPageDocsMapping: PreviousPageMapping, prevPagesIds: Set, pages: Page[], buildId: ObjectId) { this.currentPages = pages; this.operations = []; this.prevPageDocsMapping = prevPageDocsMapping; this.prevPageIds = prevPagesIds; - this.githubUser = githubUser; this.buildId = buildId; this.updateTime = new Date(); @@ -162,7 +151,7 @@ class UpdatedPagesManager { if (!isEqual(page.ast, prevPageData?.ast) || !isEqual(page.facets, prevPageData?.facets)) { const operation = { updateOne: { - filter: { page_id: currentPageId, github_username: page.github_username }, + filter: { page_id: currentPageId }, update: { $set: { page_id: currentPageId, @@ -243,7 +232,7 @@ class UpdatedPagesManager { this.prevPageIds.forEach((unseenPageId) => { const operation = { updateOne: { - filter: { page_id: unseenPageId, github_username: this.githubUser }, + filter: { page_id: unseenPageId }, update: { $set: { deleted: true, @@ -270,7 +259,7 @@ class UpdatedPagesManager { * @param pages * @param collection */ -const updatePages = async (pages: Page[], collection: string, githubUser: string, buildId: ObjectId) => { +const updatePages = async (pages: Page[], collection: string, buildId: ObjectId) => { if (pages.length === 0) { return; } @@ -282,12 +271,12 @@ const updatePages = async (pages: Page[], collection: string, githubUser: string // Find all pages that share the same project name + branch. Expects page IDs // to include these two properties after parse const pageIdPrefix = pages[0].page_id.split('/').slice(0, 3).join('/'); - const previousPagesCursor = await findPrevPageDocs(pageIdPrefix, collection, githubUser); + const previousPagesCursor = await findPrevPageDocs(pageIdPrefix, collection); const { mapping: prevPageDocsMapping, pageIds: prevPageIds } = await createPageAstMapping(previousPagesCursor); const diffsTimerLabel = 'finding page differences'; console.time(diffsTimerLabel); - const updatedPagesManager = new UpdatedPagesManager(prevPageDocsMapping, prevPageIds, pages, githubUser, buildId); + const updatedPagesManager = new UpdatedPagesManager(prevPageDocsMapping, prevPageIds, pages, buildId); const operations = updatedPagesManager.getOperations(); console.timeEnd(diffsTimerLabel); @@ -309,13 +298,12 @@ const updatePages = async (pages: Page[], collection: string, githubUser: string } }; -export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip, githubUser: string) => { +export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip) => { try { - // TEMPORARY FIX FOR NETLIFY BUILDS // TODO: DOP-5405 remove parser user from page id altogether - const pages = pagesFromZip(zip, githubUser).map((page: Page) => { + const pages = pagesFromZip(zip).map((page: Page) => { page.page_id = page.page_id.replace('buildbot', 'docsworker-xlarge'); return page; }); @@ -324,7 +312,7 @@ export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip, githu const featureEnabled = process.env.FEATURE_FLAG_UPDATE_PAGES; if (featureEnabled && featureEnabled.toUpperCase() === 'TRUE') { - ops.push(updatePages(pages, UPDATED_AST_COLL_NAME, githubUser, buildId)); + ops.push(updatePages(pages, UPDATED_AST_COLL_NAME, buildId)); } return Promise.all(ops); From d5344cc44850de8db4f900b2e9385a66c61abaf8 Mon Sep 17 00:00:00 2001 From: rayangler <27821750+rayangler@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:13:39 -0400 Subject: [PATCH 2/5] Perhaps unnecessary backwards compatibility --- modules/persistence/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/persistence/index.ts b/modules/persistence/index.ts index baf08f82d..38f073666 100644 --- a/modules/persistence/index.ts +++ b/modules/persistence/index.ts @@ -17,8 +17,6 @@ import { upsertAssets } from './src/services/assets'; interface ModuleArgs { path: string; - // Leaving around for backwards compatibility, in case a builder attempts to add the variable - githubUser?: string; jobId?: string; strict: string; [props: string | number | symbol]: unknown; From cfc126724ef1ccfaf7bdb05474c443021077e5b5 Mon Sep 17 00:00:00 2001 From: rayangler <27821750+rayangler@users.noreply.github.com> Date: Fri, 21 Mar 2025 15:21:29 -0400 Subject: [PATCH 3/5] Clean up readme and tests --- modules/persistence/README.md | 2 -- modules/persistence/tests/metadata/metadata.test.ts | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/modules/persistence/README.md b/modules/persistence/README.md index 0e4105545..1c2de3027 100644 --- a/modules/persistence/README.md +++ b/modules/persistence/README.md @@ -22,14 +22,12 @@ Compiles the module using `tsc`. By default, compiles to the `./dist` directory. Runs the contents of the `./dist` directory. Requires usage of `-- -path` argument, eg `npm run start -- --path ./build/artifacts.zip`. -An optional argument to specify the github user and link the build to the user will require usage of `--githubUser `. Recommended command for running this module in higher than local environments. Requires parser output artifacts to be present in specified directory and zip file at `--path` value specified. ### `npm run dev` Cleans dist, compiles, and runs with arguments `-path ./build/artifacts.zip`. -Optionally, add the usage of this argument to the command to link build to a desired github user `-- --githubUser `. Requires parser output artifacts to be present in specified directory and zip file at `./build/artifacts.zip`. ## Available Arguments diff --git a/modules/persistence/tests/metadata/metadata.test.ts b/modules/persistence/tests/metadata/metadata.test.ts index ec064d752..1a47c8368 100644 --- a/modules/persistence/tests/metadata/metadata.test.ts +++ b/modules/persistence/tests/metadata/metadata.test.ts @@ -48,9 +48,8 @@ describe('metadata module', () => { describe('metadataFromZip', () => { it('should get metadata from site.bson', async () => { - const githubUser = 'gritty'; - const metaFromZip = await _metadataFromZip(zip, githubUser); - expect(metaFromZip).toEqual({ ...meta, github_username: githubUser }); + const metaFromZip = await _metadataFromZip(zip); + expect(metaFromZip).toEqual({ ...meta }); }); }); @@ -58,7 +57,7 @@ describe('metadata module', () => { const buildId = new ObjectId(); it('should insert metadata docs into metadata collection', async () => { try { - const metaFromZip = await _metadataFromZip(zip, 'gritty'); + const metaFromZip = await _metadataFromZip(zip); await insertMetadata(buildId, metaFromZip); } catch (e) { console.log(e); @@ -85,7 +84,7 @@ describe('metadata module', () => { it('removes copies of metadata for same project-branch, keeping the most recent ones', async () => { await mockDb.collection('metadata').insertMany(testData); - const metaFromZip = await _metadataFromZip(zip, 'gritty'); + const metaFromZip = await _metadataFromZip(zip); await deleteStaleMetadata(metaFromZip); const res = await mockDb .collection('metadata') From 16f909039e994f42ee411fafc9403e5f92880fc0 Mon Sep 17 00:00:00 2001 From: rayangler <27821750+rayangler@users.noreply.github.com> Date: Fri, 21 Mar 2025 15:48:21 -0400 Subject: [PATCH 4/5] Clean up more --- .../persistence/tests/services/pages.test.ts | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/modules/persistence/tests/services/pages.test.ts b/modules/persistence/tests/services/pages.test.ts index 2bf6ca20e..35976255f 100644 --- a/modules/persistence/tests/services/pages.test.ts +++ b/modules/persistence/tests/services/pages.test.ts @@ -49,14 +49,12 @@ describe('pages module', () => { filename: 'page0.txt', ast: { foo: 'foo', bar: { foo: 'foo' } }, static_assets: [], - github_username: GH_USER, }, { page_id: `${pagePrefix}/page1.txt`, filename: 'page1.txt', ast: { foo: 'foo', bar: { foo: 'bar' } }, static_assets: [], - github_username: GH_USER, }, ]; }; @@ -76,10 +74,9 @@ describe('pages module', () => { filename: 'includes/included-file.rst', ast: { foo: 'foo', bar: { foo: 'foo' } }, static_assets: [], - github_username: GH_USER, }; pages.push(rstFile); - await _updatePages(pages, collection, GH_USER, new ObjectID()); + await _updatePages(pages, collection, new ObjectID()); res = await mockDb.collection(collection).find(findQuery).toArray(); expect(res).toHaveLength(2); @@ -90,7 +87,7 @@ describe('pages module', () => { it('should update modified pages', async () => { const pagePrefix = generatePagePrefix(); const pages = generatePages(pagePrefix); - await _updatePages(pages, collection, GH_USER, new ObjectID()); + await _updatePages(pages, collection, new ObjectID()); const findQuery = { page_id: { $regex: new RegExp(`^${pagePrefix}`) }, @@ -107,17 +104,15 @@ describe('pages module', () => { filename: 'page0.txt', ast: { foo: 'foo', bar: { foo: 'foo' } }, static_assets: [], - github_username: GH_USER, }, { page_id: `${pagePrefix}/page1.txt`, filename: 'page1.txt', ast: { foo: 'foo', bar: { foo: 'baz' } }, static_assets: [], - github_username: GH_USER, }, ]; - await _updatePages(updatedPages, collection, GH_USER, new ObjectID()); + await _updatePages(updatedPages, collection, new ObjectID()); res = await mockDb.collection(collection).find(findQuery).toArray(); expect(res).toHaveLength(2); @@ -130,7 +125,7 @@ describe('pages module', () => { it('should update pages with modified facets', async () => { const pagePrefix = generatePagePrefix(); const pages = generatePages(pagePrefix); - await _updatePages(pages, collection, GH_USER, new ObjectID()); + await _updatePages(pages, collection, new ObjectID()); // Applying facet updates to both pages const updatedPages = generatePages(pagePrefix); updatedPages[0].facets = [ @@ -152,7 +147,7 @@ describe('pages module', () => { const findQuery = { page_id: { $regex: new RegExp(`^${pagePrefix}`) }, }; - await _updatePages(updatedPages, collection, GH_USER, new ObjectID()); + await _updatePages(updatedPages, collection, new ObjectID()); const res = await mockDb.collection(collection).find(findQuery).toArray(); expect(res).toHaveLength(2); expect(res.every(({ facets }) => facets && facets.length)).toBeTruthy(); @@ -162,7 +157,7 @@ describe('pages module', () => { const removedFacetUpdates = generatePages(pagePrefix).slice(0, 1); delete removedFacetUpdates[0].facets; - await _updatePages(removedFacetUpdates, collection, GH_USER, new ObjectID()); + await _updatePages(removedFacetUpdates, collection, new ObjectID()); const findRemovalQuery = { page_id: { $regex: new RegExp(`^${pagePrefix}/page0.txt`) }, }; @@ -173,7 +168,7 @@ describe('pages module', () => { it('should mark pages for deletion', async () => { const pagePrefix = generatePagePrefix(); const pages = generatePages(pagePrefix); - await _updatePages(pages, collection, GH_USER, new ObjectID()); + await _updatePages(pages, collection, new ObjectID()); const findQuery = { page_id: { $regex: new RegExp(`^${pagePrefix}`) }, @@ -188,10 +183,9 @@ describe('pages module', () => { filename: 'page1.txt', ast: { foo: 'foo', bar: { foo: 'bar' } }, static_assets: [], - github_username: GH_USER, }, ]; - await _updatePages(updatedPages, collection, GH_USER, new ObjectID()); + await _updatePages(updatedPages, collection, new ObjectID()); // There should be 1 page marked as deleted res = await mockDb.collection(collection).find(findQuery).toArray(); @@ -199,7 +193,7 @@ describe('pages module', () => { expect(res[0]).toHaveProperty('filename', 'page0.txt'); // Re-adding the deleted page should lead to no deleted pages - await _updatePages(pages, collection, GH_USER, new ObjectID()); + await _updatePages(pages, collection, new ObjectID()); res = await mockDb.collection(collection).find(findQuery).toArray(); expect(res).toHaveLength(0); }); @@ -216,7 +210,6 @@ describe('pages module', () => { filename: 'page0.txt', ast: { foo: 'foo', bar: { foo: 'foo' } }, static_assets: [], - github_username: GH_USER, }; if (withAssets) { @@ -230,7 +223,7 @@ describe('pages module', () => { // Setup for empty static assets const pagePrefix = generatePagePrefix(); const page = createSamplePage(pagePrefix); - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); const findQuery = { page_id: page.page_id }; let res = await mockDb.collection(collection).findOne(findQuery); @@ -239,7 +232,7 @@ describe('pages module', () => { // Simulate update in page page.ast.foo = 'foobar'; - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); res = await mockDb.collection(collection).findOne(findQuery); expect(res).toBeTruthy(); // Should still be 0 @@ -250,13 +243,13 @@ describe('pages module', () => { // Setup for empty static assets const pagePrefix = generatePagePrefix(); const page = createSamplePage(pagePrefix); - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); // Modify page with new AST; a change in static_assets implies a change in AST page.ast.foo = 'new assets'; page.static_assets = sampleStaticAssets; const numStaticAssets = page.static_assets.length; - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); // Check that both assets were added const findQuery = { page_id: page.page_id }; @@ -270,7 +263,7 @@ describe('pages module', () => { it('should keep assets the same when no assets are changed', async () => { const pagePrefix = generatePagePrefix(); const page = createSamplePage(pagePrefix, true); - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); // Check that static assets were saved const findQuery = { page_id: page.page_id }; @@ -280,7 +273,7 @@ describe('pages module', () => { // Simulate change in AST but not in static assets page.ast.foo = 'no change in assets'; - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); // Check to make sure no changes in static assets res = await mockDb.collection(collection).findOne(findQuery); @@ -293,7 +286,7 @@ describe('pages module', () => { const page = createSamplePage(pagePrefix, true); const originalKey = page.static_assets[1].key; const numStaticAssets = page.static_assets.length; - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); // Check to make sure asset we plan to change was successfully added const findQuery = { page_id: page.page_id }; @@ -306,7 +299,7 @@ describe('pages module', () => { page.ast.foo = 'change in one asset'; const changedKey = '/images/changed-asset-name.svg'; page.static_assets[1].key = changedKey; - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); // Make sure changed asset is different from original asset res = await mockDb.collection(collection).findOne(findQuery); @@ -324,11 +317,11 @@ describe('pages module', () => { // Setup for single static asset const pagePrefix = generatePagePrefix(); const page = createSamplePage(pagePrefix, true); - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); page.ast.foo = 'deleted assets'; page.static_assets = []; - await _updatePages([page], collection, GH_USER, new ObjectID()); + await _updatePages([page], collection, new ObjectID()); const findQuery = { page_id: page.page_id }; const res = await mockDb.collection(collection).findOne(findQuery); From eb991613abcb8f4cd2eabb08c66492a8bd57bc5e Mon Sep 17 00:00:00 2001 From: rayangler <27821750+rayangler@users.noreply.github.com> Date: Mon, 24 Mar 2025 10:10:19 -0400 Subject: [PATCH 5/5] Clean up variables --- modules/persistence/index.ts | 5 +---- modules/persistence/tests/services/pages.test.ts | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/modules/persistence/index.ts b/modules/persistence/index.ts index 38f073666..9d2921aa3 100644 --- a/modules/persistence/index.ts +++ b/modules/persistence/index.ts @@ -34,12 +34,9 @@ const app = async (path: string, jobId?: string) => { try { if (!path) throw missingPathMessage; const zip = new AdmZip(path); - - // Safely convert jobId in case of empty string - const autobuilderJobId = jobId; // atomic buildId for all artifacts read by this module - fundamental assumption // that only one build will be used per run of this module. - const buildId = new mongodb.ObjectId(autobuilderJobId); + const buildId = new mongodb.ObjectId(jobId); const metadata = await metadataFromZip(zip); // initialize db connections to handle shared connections await snootyDb(); diff --git a/modules/persistence/tests/services/pages.test.ts b/modules/persistence/tests/services/pages.test.ts index 35976255f..d8e52c0b4 100644 --- a/modules/persistence/tests/services/pages.test.ts +++ b/modules/persistence/tests/services/pages.test.ts @@ -3,8 +3,6 @@ import { Page, UpdatedPage, _updatePages } from '../../src/services/pages'; import { closeDb, setMockDB } from '../utils'; import { ObjectID } from 'bson'; -const GH_USER = 'foo_user'; - // Ignore non-null assertion warnings for this file. Non-null assertions are most likely // to be used for responses that we guarantee are not null /* eslint-disable @typescript-eslint/no-non-null-assertion */