Skip to content
This repository was archived by the owner on Aug 6, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions modules/persistence/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@ Compiles the module using `tsc`. By default, compiles to the `./dist` directory.

Runs the contents of the `./dist` directory.
Requires usage of `-- -path` argument, eg `npm run start -- --path ./build/artifacts.zip`.
An optional argument to specify the github user and link the build to the user will require usage of `--githubUser <github_username>`.
Recommended command for running this module in higher than local environments.
Requires parser output artifacts to be present in specified directory and zip file at `--path` value specified.

### `npm run dev`

Cleans dist, compiles, and runs with arguments `-path ./build/artifacts.zip`.
Optionally, add the usage of this argument to the command to link build to a desired github user `-- --githubUser <github_username>`.
Requires parser output artifacts to be present in specified directory and zip file at `./build/artifacts.zip`.

## Available Arguments
Expand Down
17 changes: 6 additions & 11 deletions modules/persistence/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ import { upsertAssets } from './src/services/assets';

interface ModuleArgs {
path: string;
githubUser: string;
jobId: string;
jobId?: string;
strict: string;
[props: string | number | symbol]: unknown;
}
Expand All @@ -31,22 +30,18 @@ const missingPathMessage = 'No path specified in arguments - please specify a bu
// Load command line args into a parameterized argv
const argv: ModuleArgs = minimist(process.argv.slice(2));

const app = async (path: string, githubUser: string, jobId: string) => {
const app = async (path: string, jobId?: string) => {
try {
if (!path) throw missingPathMessage;
const user = githubUser || 'docs-builder-bot';
const zip = new AdmZip(path);

// Safely convert jobId in case of empty string
const autobuilderJobId = jobId || undefined;
// atomic buildId for all artifacts read by this module - fundamental assumption
// that only one build will be used per run of this module.
const buildId = new mongodb.ObjectId(autobuilderJobId);
const metadata = await metadataFromZip(zip, user);
const buildId = new mongodb.ObjectId(jobId);
const metadata = await metadataFromZip(zip);
// initialize db connections to handle shared connections
await snootyDb();
await poolDb();
await Promise.all([insertAndUpdatePages(buildId, zip, user), insertMetadata(buildId, metadata), upsertAssets(zip)]);
await Promise.all([insertAndUpdatePages(buildId, zip), insertMetadata(buildId, metadata), upsertAssets(zip)]);
await insertMergedMetadataEntries(buildId, metadata);
// DOP-3447 clean up stale metadata
await deleteStaleMetadata(metadata);
Expand All @@ -59,7 +54,7 @@ const app = async (path: string, githubUser: string, jobId: string) => {
}
};

app(argv['path'], argv['githubUser'], argv['jobId']).catch(() => {
app(argv['path'], argv['jobId']).catch(() => {
console.error('Persistence Module Failure. Ending build.');
process.exit(1);
});
4 changes: 1 addition & 3 deletions modules/persistence/src/services/metadata/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,17 @@ export interface Metadata {
associated_products?: AssociatedProduct[];
toctree: ToC;
toctreeOrder: any[];
github_username?: string;
[key: string]: any;
}
// Service responsible for memoization of metadata entries.
// Any extraneous logic performed on metadata entries as part of upload should be added here
// or within subfolders of this module
export const metadataFromZip = async (zip: AdmZip, githubUser: string) => {
export const metadataFromZip = async (zip: AdmZip) => {
const zipEntries = zip.getEntries();
const metadata = zipEntries
.filter((entry) => entry.entryName === 'site.bson')
.map((entry) => deserialize(entry.getData()))[0] as Metadata;
await verifyMetadata(metadata);
metadata.github_username = githubUser;
return metadata;
};

Expand Down
34 changes: 11 additions & 23 deletions modules/persistence/src/services/pages/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ export interface Page {
filename: string;
ast: PageAst;
static_assets: UpdatedAsset[];
github_username: string;
facets?: Facet[];
}

Expand All @@ -53,13 +52,12 @@ const UPDATED_AST_COLL_NAME = 'updated_documents';
// Service responsible for memoization of page level documents.
// Any extraneous logic performed on page level documents as part of upload should be added here
// or within subfolders of this module
const pagesFromZip = (zip: AdmZip, githubUser: string): Page[] => {
const pagesFromZip = (zip: AdmZip): Page[] => {
const zipPages = zip.getEntries();
return zipPages
.filter((entry) => entry.entryName?.startsWith('documents/'))
.map((entry) => {
const document = deserialize(entry.getData()) as Page;
document.github_username = githubUser;
return document;
});
};
Expand All @@ -72,11 +70,10 @@ const pagesFromZip = (zip: AdmZip, githubUser: string): Page[] => {
* @param pageIdPrefix - Includes the Snooty project name, user (docsworker-xlarge), and branch
* @param collection - The collection to perform the find query on
*/
const findPrevPageDocs = async (pageIdPrefix: string, collection: string, githubUser: string) => {
const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => {
const dbSession = await db();
const findQuery = {
page_id: { $regex: new RegExp(`^${pageIdPrefix}/`) },
github_username: githubUser,
deleted: false,
};
const projection = {
Expand Down Expand Up @@ -119,21 +116,13 @@ class UpdatedPagesManager {
prevPageDocsMapping: PreviousPageMapping;
prevPageIds: Set<string>;
updateTime: Date;
githubUser: string;
buildId: ObjectId;

constructor(
prevPageDocsMapping: PreviousPageMapping,
prevPagesIds: Set<string>,
pages: Page[],
githubUser: string,
buildId: ObjectId
) {
constructor(prevPageDocsMapping: PreviousPageMapping, prevPagesIds: Set<string>, pages: Page[], buildId: ObjectId) {
this.currentPages = pages;
this.operations = [];
this.prevPageDocsMapping = prevPageDocsMapping;
this.prevPageIds = prevPagesIds;
this.githubUser = githubUser;
this.buildId = buildId;

this.updateTime = new Date();
Expand Down Expand Up @@ -162,7 +151,7 @@ class UpdatedPagesManager {
if (!isEqual(page.ast, prevPageData?.ast) || !isEqual(page.facets, prevPageData?.facets)) {
const operation = {
updateOne: {
filter: { page_id: currentPageId, github_username: page.github_username },
filter: { page_id: currentPageId },
update: {
$set: {
page_id: currentPageId,
Expand Down Expand Up @@ -243,7 +232,7 @@ class UpdatedPagesManager {
this.prevPageIds.forEach((unseenPageId) => {
const operation = {
updateOne: {
filter: { page_id: unseenPageId, github_username: this.githubUser },
filter: { page_id: unseenPageId },
update: {
$set: {
deleted: true,
Expand All @@ -270,7 +259,7 @@ class UpdatedPagesManager {
* @param pages
* @param collection
*/
const updatePages = async (pages: Page[], collection: string, githubUser: string, buildId: ObjectId) => {
const updatePages = async (pages: Page[], collection: string, buildId: ObjectId) => {
if (pages.length === 0) {
return;
}
Expand All @@ -282,12 +271,12 @@ const updatePages = async (pages: Page[], collection: string, githubUser: string
// Find all pages that share the same project name + branch. Expects page IDs
// to include these two properties after parse
const pageIdPrefix = pages[0].page_id.split('/').slice(0, 3).join('/');
const previousPagesCursor = await findPrevPageDocs(pageIdPrefix, collection, githubUser);
const previousPagesCursor = await findPrevPageDocs(pageIdPrefix, collection);
const { mapping: prevPageDocsMapping, pageIds: prevPageIds } = await createPageAstMapping(previousPagesCursor);

const diffsTimerLabel = 'finding page differences';
console.time(diffsTimerLabel);
const updatedPagesManager = new UpdatedPagesManager(prevPageDocsMapping, prevPageIds, pages, githubUser, buildId);
const updatedPagesManager = new UpdatedPagesManager(prevPageDocsMapping, prevPageIds, pages, buildId);
const operations = updatedPagesManager.getOperations();
console.timeEnd(diffsTimerLabel);

Expand All @@ -309,13 +298,12 @@ const updatePages = async (pages: Page[], collection: string, githubUser: string
}
};

export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip, githubUser: string) => {
export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip) => {
try {

// TEMPORARY FIX FOR NETLIFY BUILDS
// TODO: DOP-5405 remove parser user from page id altogether

const pages = pagesFromZip(zip, githubUser).map((page: Page) => {
const pages = pagesFromZip(zip).map((page: Page) => {
page.page_id = page.page_id.replace('buildbot', 'docsworker-xlarge');
return page;
});
Expand All @@ -324,7 +312,7 @@ export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip, githu

const featureEnabled = process.env.FEATURE_FLAG_UPDATE_PAGES;
if (featureEnabled && featureEnabled.toUpperCase() === 'TRUE') {
ops.push(updatePages(pages, UPDATED_AST_COLL_NAME, githubUser, buildId));
ops.push(updatePages(pages, UPDATED_AST_COLL_NAME, buildId));
}

return Promise.all(ops);
Expand Down
9 changes: 4 additions & 5 deletions modules/persistence/tests/metadata/metadata.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,16 @@ describe('metadata module', () => {

describe('metadataFromZip', () => {
it('should get metadata from site.bson', async () => {
const githubUser = 'gritty';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol i'll miss it

const metaFromZip = await _metadataFromZip(zip, githubUser);
expect(metaFromZip).toEqual({ ...meta, github_username: githubUser });
const metaFromZip = await _metadataFromZip(zip);
expect(metaFromZip).toEqual({ ...meta });
});
});

describe('insertMetadata', () => {
const buildId = new ObjectId();
it('should insert metadata docs into metadata collection', async () => {
try {
const metaFromZip = await _metadataFromZip(zip, 'gritty');
const metaFromZip = await _metadataFromZip(zip);
await insertMetadata(buildId, metaFromZip);
} catch (e) {
console.log(e);
Expand All @@ -85,7 +84,7 @@ describe('metadata module', () => {

it('removes copies of metadata for same project-branch, keeping the most recent ones', async () => {
await mockDb.collection('metadata').insertMany(testData);
const metaFromZip = await _metadataFromZip(zip, 'gritty');
const metaFromZip = await _metadataFromZip(zip);
await deleteStaleMetadata(metaFromZip);
const res = await mockDb
.collection('metadata')
Expand Down
Loading
Loading