From 886d752f4ae36356e99cecd7bd103e311139087c Mon Sep 17 00:00:00 2001 From: takegue Date: Sun, 7 May 2023 22:15:54 +0900 Subject: [PATCH 1/3] Initial implement: import commands --- src/commands/cli.ts | 69 ++++++- src/commands/import.ts | 187 ++++++++++++++++++ src/commands/pull.ts | 21 +- .../__snapshots__/import.spec.ts.snap | 48 +++++ tests/scenario/import.spec.ts | 107 ++++++++++ 5 files changed, 424 insertions(+), 8 deletions(-) create mode 100644 src/commands/import.ts create mode 100644 tests/scenario/__snapshots__/import.spec.ts.snap create mode 100644 tests/scenario/import.spec.ts diff --git a/src/commands/cli.ts b/src/commands/cli.ts index d750a7a..ab473ac 100644 --- a/src/commands/cli.ts +++ b/src/commands/cli.ts @@ -4,6 +4,7 @@ import { formatLocalfiles } from '../../src/commands/fix.js'; import { pushLocalFilesToBigQuery } from '../../src/commands/push.js'; import { createBundleSQL } from '../../src/commands/bundle.js'; import { pullBigQueryResources } from '../../src/commands/pull.js'; +import { importBigQueryResources } from '../../src/commands/import.js'; import { buildThrottledBigQueryClient } from '../../src/bigquery.js'; import type { Query } from '@google-cloud/bigquery'; @@ -47,8 +48,8 @@ export function createCLI() { .option( '-p, --parameter ', `Either a file containing a JSON list of query parameters, or a query parameter in the form "name:type:value". ` + - `An empty name produces a positional parameter. The type may be omitted to assume STRING: name::value or ::value. ` + - `The value "NULL" produces a null value. repeat this option to specify a list of values`, + `An empty name produces a positional parameter. The type may be omitted to assume STRING: name::value or ::value. ` + + `The value "NULL" produces a null value. repeat this option to specify a list of values`, ) .option( '--maximum_bytes_billed ', @@ -247,10 +248,74 @@ export function createCLI() { } }); + const importCommand = new Command('import') + .description( + 'Import other dataset UDF into specified dataset', + ) + .argument('') + .argument('[targets...]') + .action( + async (destination: string, cmdTargets: string[] | undefined, _, cmd) => { + const cmdOptions = cmd.optsWithGlobals(); + const rootDir = cmdOptions.rootPath; + if (!rootDir) { + console.error('CLI Error'); + return; + } + + const bqClient = buildThrottledBigQueryClient( + parseInt(cmdOptions.threads), + 500, + ); + + // Parse targets 'bqutils.fn.sure_nonnull' into [{'project': 'bqutils', dataset: 'fn', routine_id: 'sure_nonnull'}] + const targets = cmdTargets?.map((target) => { + const elms = target.split('.'); + if (elms.length !== 3) { + throw new Error(`Invalid target: ${target}`); + } + return { + project: elms[0] as string, + dataset: elms[1] as string, + routine_id: elms[2] as string, + }; + }) ?? []; + + let paramDestination: { project: string; dataset: string }; + const [projectOrDataset, destinationDataset] = destination.split('.'); + if (destinationDataset) { + paramDestination = { + project: destinationDataset, + dataset: destinationDataset, + }; + } else if (projectOrDataset) { + paramDestination = { + project: '@default', + dataset: projectOrDataset, + }; + } else { + throw new Error(`Invalid destination: ${destination}`); + } + + const ctx = { + bigQuery: bqClient, + rootPath: rootDir, + destination: paramDestination, + importTargets: targets, + options: { + is_update: true, + }, + }; + + await importBigQueryResources(ctx); + }, + ); + program.addCommand(pushCommand); program.addCommand(pullCommand); program.addCommand(formatCommmand); program.addCommand(bundleCommand); + program.addCommand(importCommand); return program; } diff --git a/src/commands/import.ts b/src/commands/import.ts new file mode 100644 index 0000000..d916cb8 --- /dev/null +++ b/src/commands/import.ts @@ -0,0 +1,187 @@ +import type { + BigQuery, + // DatasetOptions, + // GetDatasetsOptions, + Routine, +} from '@google-cloud/bigquery'; + +import { pullMetadataTaskBuilder } from '../../src/commands/pull.js'; +import { Task } from '../../src/tasks/base.js'; +import { ReporterMap } from '../../src/reporter/index.js'; + +type ImportContext = { + bigQuery: BigQuery; + rootPath: string; + destination: { + project: string; + dataset: string; + }; + importTargets: { + project: string; + dataset: string; + routine_id: string; + }[]; + options: { + is_update: boolean; + }; +}; + +type NormalProject = { + kind: 'normal'; + value: string; +}; +type SpecialProject = { + kind: 'special'; + value: '@default'; + resolved_value: string; +}; + +type BQPPRojectID = NormalProject | SpecialProject; + +const parseProjectID = async ( + ctx: ImportContext, + projectID: string, +): Promise => { + if (projectID === '@default') { + return { + kind: 'special', + value: '@default', + resolved_value: await ctx.bigQuery.getProjectId(), + }; + } else { + return { + kind: 'normal', + value: projectID, + }; + } +}; + +const importRoutine = async ( + client: BigQuery, + destination: { project: string; dataset: string }, + routine: Routine, +) => { + /* + * This function will fetch routine from BigQuery and return it as a string. + * + * This procses is done by following steps: + * 1. Fetch routine DDL from BigQuery and write a file to local. + * 2. Replace routine dataset or project ids in routine with destination + * 3. Deploy routines to destinatino + * 4. Update metadata in local + */ + const [metadata] = await routine.getMetadata(); + + const [imported, _] = await client.dataset(destination.dataset, { + projectId: destination.project, + }) + .createRoutine( + metadata.routineReference.routineId, + { + arguments: metadata.arguments, + definitionBody: metadata.definitionBody, + description: metadata.description, + determinismLevel: metadata.determinismLevel, + language: metadata.language, + returnType: metadata.returnType, + routineType: metadata.routineType, + }, + ); + + return imported; +}; + +async function importBigQueryResources( + ctx: ImportContext, +): Promise { + const tasks: Task[] = []; + + const genPullTask = pullMetadataTaskBuilder( + { + BQIDs: [], + BigQuery: ctx.bigQuery, + rootPath: ctx.rootPath, + withDDL: true, + forceAll: false, + reporter: 'json', + }, + async (bqId: string) => { + const [datasetAndProject, routineId] = bqId.split('.'); + if (!datasetAndProject) { + return undefined; + } + const [projectId, datasetId] = datasetAndProject.split(':'); + if (!projectId || !datasetId || !routineId) { + return undefined; + } + + const [metadata] = await ctx.bigQuery.dataset(datasetId, { projectId }) + .routine(routineId) + .getMetadata(); + + if (metadata.routineReference === undefined) { + return undefined; + } + return [ + `create or replace function ${projectId}.${datasetId}.${routineId}` + + `(${metadata.arguments.map((arg: any) => + `${arg.name} ${arg.dataType ?? arg.argumentKind.replace('ANY_TYPE', 'ANY TYPE') + }` + ) + .join( + ', ', + ) + })`, + metadata.language == 'js' ? `language ${metadata.language}` : '', + metadata.returnType ? `return ${metadata.returnType}` : '', + `as (${metadata.definitionBody})`, + ] + .join('\n'); + }, + ); + + const jobs = ctx.importTargets.map(async (target) => { + const parsed: BQPPRojectID = await parseProjectID( + ctx, + ctx.destination.project, + ); + const parsedDestination = { + project: (parsed.kind == 'special' ? parsed.resolved_value : undefined) ?? + parsed.value, + dataset: ctx.destination.dataset, + }; + const importedRoutine: Routine = await importRoutine( + ctx.bigQuery, + parsedDestination, + ctx.bigQuery + .dataset(target.dataset, { projectId: target.project }) + .routine(target.routine_id), + ); + const task = await genPullTask(importedRoutine); + task.run(); + tasks.push(task); + return 0; + }); + await Promise.all(jobs); + + // const reporterType: BuiltInReporters = 'console'; + const reporter = new ReporterMap['json'](); + try { + reporter.onInit(tasks); + tasks.forEach((t) => t.run()); + while (tasks.some((t) => !t.done())) { + reporter.onUpdate(); + await new Promise((resolve) => setTimeout(resolve, 100)); + } + reporter.onUpdate(); + } catch (e: unknown) { + } finally { + reporter.onFinished(); + } + + const failedTasks = + tasks.filter((t) => t.result().status !== 'success').length; + return failedTasks; +} + +export { importBigQueryResources }; diff --git a/src/commands/pull.ts b/src/commands/pull.ts index 27a3793..066aea4 100644 --- a/src/commands/pull.ts +++ b/src/commands/pull.ts @@ -234,11 +234,20 @@ const fsWriter = async ( return retFiles; } - if (!ctx.withDDL || !bqObj.metadata?.id || !bqObj.id || !ddlReader) { + const bqObjID = + (bqObj.metadata.datasetReference != undefined ? bqObj.id : undefined) ?? + (bqObj.metadata.tableReference != undefined ? bqObj.id : undefined) ?? + (bqObj.metadata.routineReference != undefined + ? `${bqObj.metadata.routineReference.projectId}:${bqObj.metadata.routineReference.datasetId}.${bqObj.metadata.routineReference.routineId}` + : undefined) ?? + (bqObj.metadata.modelReference != undefined + ? `${bqObj.metadata.modelReference.projectId}:${bqObj.metadata.modelReference.datasetId}.${bqObj.metadata.modelReference.modelId}` + : undefined); + + if (!ctx.withDDL || !bqObjID || !ddlReader) { return retFiles; } - - const ddlStatement = await ddlReader(bqObj.metadata?.id ?? bqObj.id); + const ddlStatement = await ddlReader(bqObjID); if (!ddlStatement) { return retFiles; } @@ -332,7 +341,7 @@ async function* crawlBigQueryDataset( } const p = Promise.allSettled(promises); - const pool = async function* () { + const pool = async function*() { while (true) { try { await Promise.race([ @@ -372,7 +381,7 @@ const pullMetadataTaskBuilder = ( const bqId = bq2path( bqObj as BigQueryResource, projectId === undefined || - projectId === await ctx.BigQuery.getProjectId(), + projectId === await ctx.BigQuery.getProjectId(), ); const task = new Task( @@ -537,4 +546,4 @@ async function pullBigQueryResources( return failedTasks; } -export { pullBigQueryResources }; +export { pullBigQueryResources, pullMetadataTaskBuilder }; diff --git a/tests/scenario/__snapshots__/import.spec.ts.snap b/tests/scenario/__snapshots__/import.spec.ts.snap new file mode 100644 index 0000000..22fff3a --- /dev/null +++ b/tests/scenario/__snapshots__/import.spec.ts.snap @@ -0,0 +1,48 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`CLI: import > import _testImport bqutil.fn.sure_nonnull > Imported Files: Contents 1`] = ` +Map { + "@default/_testImport/@routines/sure_nonnull/README.md" => "If non-NULL argument is passed, +the function returns input \`value\` as-is; however if NULL +value is passed, it throws an error. +", + "@default/_testImport/@routines/sure_nonnull/ddl.sql" => "create or replace function takegue._testImport.sure_nonnull(value ANY TYPE) + + +as (IF( + value IS NOT NULL, + value, + ERROR(\\"\`bqutil.fn.sure_nonnull\`: Value must be nonull\\") + ))", + "@default/_testImport/@routines/sure_nonnull/metadata.json" => "{ + \\"routineType\\": \\"SCALAR_FUNCTION\\", + \\"language\\": \\"SQL\\", + \\"arguments\\": [ + { + \\"name\\": \\"value\\", + \\"argumentKind\\": \\"ANY_TYPE\\" + } + ] +}", +} +`; + +exports[`CLI: import > import _testImport bqutil.fn.sure_nonnull > Imported Files: List 1`] = `{}`; + +exports[`CLI: import > import _testImport bqutil.fn.sure_nonnull 1`] = `[]`; + +exports[`CLI: import > import --help 1`] = `Map {}`; + +exports[`CLI: import > import --help 2`] = `[]`; + +exports[`CLI: import > import --help 3`] = ` +[ + "Usage: program import [options] [targets...] + +Import other dataset UDF into specified dataset + +Options: + -h, --help display help for command +", +] +`; diff --git a/tests/scenario/import.spec.ts b/tests/scenario/import.spec.ts new file mode 100644 index 0000000..e0188e0 --- /dev/null +++ b/tests/scenario/import.spec.ts @@ -0,0 +1,107 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { tmpdir } from 'node:os'; +import * as path from 'node:path'; +import * as fs from 'node:fs'; + +import { Command } from 'commander'; +import { BigQuery } from '@google-cloud/bigquery'; + +import { createCLI } from '../../src/commands/cli.js'; +import { walk } from '../../src/util.js'; + +interface CLITestContext { + cli: Command; + out: string[]; + err: string[]; + rootPath: string; +} + +describe('CLI: import', () => { + const tempDir = path.join(tmpdir(), `bqport-`); + const setupCommanderForTest = (c: Command, out: string[], err: string[]) => { + c + .exitOverride((e: Error) => { + throw e; + }) + .configureOutput({ + writeOut: (s) => out.push(s), + writeErr: (s) => err.push(s), + }); + + // Fix up display column wdith for comamnder.js + if (process.stdout.isTTY) { + process.stdout.columns = 120; + process.stderr.columns = 120; + } + }; + + const crawlFs = async (root: string) => { + const file2content: Map = new Map(); + for (const f of await walk(root)) { + const content = await fs.promises.readFile(f, 'utf-8'); + file2content.set(path.relative(root, f), content); + } + return file2content; + }; + + beforeEach(async (ctx) => { + const out: string[] = []; + const err: string[] = []; + const cli = createCLI(); + setupCommanderForTest(cli, out, err); + for (const c of cli.commands) { + setupCommanderForTest(c, out, err); + } + + vi.spyOn(console, 'log') + .mockImplementation((s: string) => { + out.push(s); + }); + vi.spyOn(console, 'error') + .mockImplementation((s: string) => { + err.push(s); + }); + + ctx.cli = cli; + ctx.out = out; + ctx.err = err; + ctx.rootPath = path.resolve(fs.mkdtempSync(tempDir)); + }); + + afterEach(async () => { + vi.resetAllMocks(); + }); + + it( + `import --help`, + async ({ meta, cli, out, err, rootPath }) => { + await expect( + cli.parseAsync(meta.name.split(' '), { from: 'user' }), + ).rejects.toThrow(); + expect(await crawlFs(rootPath)).toMatchSnapshot(); + expect(err).toMatchSnapshot(); + expect(out).toMatchSnapshot(); + }, + ); + + it( + `import _testImport bqutil.fn.sure_nonnull`, + async ({ meta, cli, err, rootPath }) => { + const dataset = new BigQuery().dataset('_testImport'); + try { + await dataset.delete({ force: true }); + } catch (e) { + } + await dataset.create(); + + await cli.parseAsync([...meta.name.split(' '), ...['-C', rootPath]], { + from: 'user', + }); + const files = await crawlFs(path.join(rootPath)); + expect(files.keys()).toMatchSnapshot('Imported Files: List'); + expect(files).toMatchSnapshot('Imported Files: Contents'); + expect(err).toMatchSnapshot(); + }, + ); +}); From a0030efc8b99223b6809b7b0b4e2726b5f212dbd Mon Sep 17 00:00:00 2001 From: takegue Date: Mon, 8 May 2023 08:33:19 +0900 Subject: [PATCH 2/3] Impl. FullResourceID extractor --- src/bigquery.ts | 27 ++++++++++++++++++++++++++- src/test.ts | 10 ++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 src/test.ts diff --git a/src/bigquery.ts b/src/bigquery.ts index 64fc11f..cf325c8 100644 --- a/src/bigquery.ts +++ b/src/bigquery.ts @@ -60,7 +60,30 @@ function getProjectId( throw new Error(`Cannot find projectId ${bqObj}`); } -export { getProjectId }; +function getFullResourceId(dataset: Dataset): string; +function getFullResourceId(model: Model): string; +function getFullResourceId(table: Table): string; +function getFullResourceId(routine: Routine): string; +function getFullResourceId(bqObj: Dataset | Table | Routine | Model): string { + if (bqObj instanceof Model) { + return `${bqObj.dataset.projectId}:${bqObj.dataset.id}.${bqObj.id}`; + } + + if (bqObj instanceof Table) { + return `${bqObj.dataset.projectId}:${bqObj.dataset.id}.${bqObj.id}`; + } + + if (bqObj instanceof Routine) { + const dataset = bqObj.parent as Dataset; + return `${dataset.projectId}:${dataset.id}.${bqObj.id}`; + } + + if (bqObj instanceof Dataset) { + return `${bqObj.projectId}:${bqObj.id}`; + } + + throw new Error(`Cannot find projectId ${bqObj}`); +} const buildThrottledBigQueryClient = ( concurrency: number, @@ -287,6 +310,8 @@ export { buildThrottledBigQueryClient, extractBigQueryDependencies, extractBigQueryDestinations, + getFullResourceId, + getProjectId, normalizedBQPath, normalizeShardingTableId, path2bq, diff --git a/src/test.ts b/src/test.ts new file mode 100644 index 0000000..a383c95 --- /dev/null +++ b/src/test.ts @@ -0,0 +1,10 @@ +import { BigQuery } from '@google-cloud/bigquery'; + +const main = async () => { + const bq = new BigQuery(); + const d = bq.dataset('test_listing'); + console.log(d); + console.log('get', await d.get()); + console.log('getMetadata', await d.getMetadata()); +}; +await main(); From 49458e21a571ef1e3e78eea0dd9caa4f5fb987f4 Mon Sep 17 00:00:00 2001 From: takegue Date: Wed, 10 May 2023 05:29:02 +0900 Subject: [PATCH 3/3] Impl. _import.json metadata for managing packages --- src/bigquery.ts | 28 +- src/commands/import.ts | 132 +++++---- src/commands/pull.ts | 15 +- src/metadata.ts | 2 +- tests/basic.spec.ts | 607 ++++++++++++++++++++++------------------- 5 files changed, 427 insertions(+), 357 deletions(-) diff --git a/src/bigquery.ts b/src/bigquery.ts index cf325c8..fec0fef 100644 --- a/src/bigquery.ts +++ b/src/bigquery.ts @@ -32,10 +32,9 @@ function getProjectId(dataset: Dataset): string; function getProjectId(model: Model): string; function getProjectId(table: Table): string; function getProjectId(routine: Routine): string; -function getProjectId(bigquery: BigQuery): string; function getProjectId( - bqObj: Dataset | Table | Routine | Model | BigQuery, + bqObj: Dataset | Table | Routine | Model, ): string { if (bqObj?.projectId) { return bqObj.projectId; @@ -304,10 +303,35 @@ const extractBigQueryDestinations = async ( return refs.map((r) => JSON.parse(r)); }; +const constructDDLfromBigQueryObject = async ( + bqObj: Routine, +): Promise => { + const [metadata, _] = await bqObj.getMetadata(); + const id = getFullResourceId(bqObj).replace(':', '.'); + + const _argumentsString = metadata.arguments + ? metadata.arguments.map((arg: any) => + `${arg.name} ${arg.dataType ?? arg.argumentKind.replace('ANY_TYPE', 'ANY TYPE') + }` + ) + .join( + ', ', + ) + : ''; + + return [ + `create or replace function \`${id}\`(${_argumentsString})`, + metadata.language == 'js' ? `language ${metadata.language}` : '', + metadata.returnType ? `return ${metadata.returnType}` : '', + `as (${metadata.definitionBody})`, + ].filter((s) => s).join('\n'); +}; + export { BigQueryResource, bq2path, buildThrottledBigQueryClient, + constructDDLfromBigQueryObject, extractBigQueryDependencies, extractBigQueryDestinations, getFullResourceId, diff --git a/src/commands/import.ts b/src/commands/import.ts index d916cb8..962a042 100644 --- a/src/commands/import.ts +++ b/src/commands/import.ts @@ -5,9 +5,16 @@ import type { Routine, } from '@google-cloud/bigquery'; +import { + BigQueryResource, + bq2path, + constructDDLfromBigQueryObject, +} from '../../src/bigquery.js'; import { pullMetadataTaskBuilder } from '../../src/commands/pull.js'; import { Task } from '../../src/tasks/base.js'; import { ReporterMap } from '../../src/reporter/index.js'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; type ImportContext = { bigQuery: BigQuery; @@ -57,37 +64,54 @@ const parseProjectID = async ( }; const importRoutine = async ( + ctx: ImportContext, client: BigQuery, destination: { project: string; dataset: string }, routine: Routine, ) => { - /* - * This function will fetch routine from BigQuery and return it as a string. - * - * This procses is done by following steps: - * 1. Fetch routine DDL from BigQuery and write a file to local. - * 2. Replace routine dataset or project ids in routine with destination - * 3. Deploy routines to destinatino - * 4. Update metadata in local - */ const [metadata] = await routine.getMetadata(); - const [imported, _] = await client.dataset(destination.dataset, { - projectId: destination.project, - }) - .createRoutine( - metadata.routineReference.routineId, - { - arguments: metadata.arguments, - definitionBody: metadata.definitionBody, - description: metadata.description, - determinismLevel: metadata.determinismLevel, - language: metadata.language, - returnType: metadata.returnType, - routineType: metadata.routineType, - }, - ); + let imported; + try { + const _imported = client.dataset(destination.dataset, { + projectId: destination.project, + }).routine(metadata.routineReference.routineId); + await _imported.get(); + imported = _imported; + } catch (e) { + const [_imported, _] = await client.dataset(destination.dataset, { + projectId: destination.project, + }) + .createRoutine( + metadata.routineReference.routineId, + { + arguments: metadata.arguments, + definitionBody: metadata.definitionBody, + description: metadata.description, + determinismLevel: metadata.determinismLevel, + language: metadata.language, + returnType: metadata.returnType, + routineType: metadata.routineType, + }, + ); + imported = _imported; + } + + const parsed: BQPPRojectID = await parseProjectID( + ctx, + ctx.destination.project, + ); + const d = bq2path( + imported as BigQueryResource, + parsed.kind === 'special', + ); + fs.mkdirSync(path.dirname(d), { recursive: true }); + const importPath = path.join(ctx.rootPath, d, '_imported.json'); + fs.promises.writeFile( + importPath, + JSON.stringify(routine.metadata.routineReference, null, 2), + ); return imported; }; @@ -115,32 +139,14 @@ async function importBigQueryResources( return undefined; } - const [metadata] = await ctx.bigQuery.dataset(datasetId, { projectId }) - .routine(routineId) - .getMetadata(); + const routine = ctx.bigQuery.dataset(datasetId, { projectId }) + .routine(routineId); - if (metadata.routineReference === undefined) { - return undefined; - } - return [ - `create or replace function ${projectId}.${datasetId}.${routineId}` + - `(${metadata.arguments.map((arg: any) => - `${arg.name} ${arg.dataType ?? arg.argumentKind.replace('ANY_TYPE', 'ANY TYPE') - }` - ) - .join( - ', ', - ) - })`, - metadata.language == 'js' ? `language ${metadata.language}` : '', - metadata.returnType ? `return ${metadata.returnType}` : '', - `as (${metadata.definitionBody})`, - ] - .join('\n'); + return constructDDLfromBigQueryObject(routine); }, ); - const jobs = ctx.importTargets.map(async (target) => { + for (const target of ctx.importTargets) { const parsed: BQPPRojectID = await parseProjectID( ctx, ctx.destination.project, @@ -150,21 +156,27 @@ async function importBigQueryResources( parsed.value, dataset: ctx.destination.dataset, }; - const importedRoutine: Routine = await importRoutine( - ctx.bigQuery, - parsedDestination, - ctx.bigQuery - .dataset(target.dataset, { projectId: target.project }) - .routine(target.routine_id), + const task1 = new Task( + `${ctx.destination.project}/${ctx.destination.dataset}/(import)/${target.project}.${target.dataset}.${target.routine_id}`, + async () => { + const importedRoutine: Routine = await importRoutine( + ctx, + ctx.bigQuery, + parsedDestination, + ctx.bigQuery + .dataset(target.dataset, { projectId: target.project }) + .routine(target.routine_id), + ); + + const task = await genPullTask(importedRoutine); + task.run(); + tasks.push(task); + return 'success'; + }, ); - const task = await genPullTask(importedRoutine); - task.run(); - tasks.push(task); - return 0; - }); - await Promise.all(jobs); - - // const reporterType: BuiltInReporters = 'console'; + tasks.push(task1); + } + const reporter = new ReporterMap['json'](); try { reporter.onInit(tasks); diff --git a/src/commands/pull.ts b/src/commands/pull.ts index 066aea4..ab4a120 100644 --- a/src/commands/pull.ts +++ b/src/commands/pull.ts @@ -14,6 +14,7 @@ import { syncMetadata } from '../../src/metadata.js'; import { BigQueryResource, bq2path, + getFullResourceId, getProjectId, normalizeShardingTableId, } from '../../src/bigquery.js'; @@ -234,20 +235,10 @@ const fsWriter = async ( return retFiles; } - const bqObjID = - (bqObj.metadata.datasetReference != undefined ? bqObj.id : undefined) ?? - (bqObj.metadata.tableReference != undefined ? bqObj.id : undefined) ?? - (bqObj.metadata.routineReference != undefined - ? `${bqObj.metadata.routineReference.projectId}:${bqObj.metadata.routineReference.datasetId}.${bqObj.metadata.routineReference.routineId}` - : undefined) ?? - (bqObj.metadata.modelReference != undefined - ? `${bqObj.metadata.modelReference.projectId}:${bqObj.metadata.modelReference.datasetId}.${bqObj.metadata.modelReference.modelId}` - : undefined); - - if (!ctx.withDDL || !bqObjID || !ddlReader) { + if (!ctx.withDDL || !ddlReader) { return retFiles; } - const ddlStatement = await ddlReader(bqObjID); + const ddlStatement = await ddlReader(getFullResourceId(bqObj)); if (!ddlStatement) { return retFiles; } diff --git a/src/metadata.ts b/src/metadata.ts index 30f7be8..0adee1c 100644 --- a/src/metadata.ts +++ b/src/metadata.ts @@ -123,7 +123,7 @@ const syncMetadata = async ( snapshotDefinition: metadata?.snapshotDefinition, // cloneDefinition attribute - cloneDefinition: metadata?.snapshotDefinition, + cloneDefinition: metadata?.cloneDefinition, // Dataset attribute access: metadata?.access, diff --git a/tests/basic.spec.ts b/tests/basic.spec.ts index 60c67fd..3e2affc 100644 --- a/tests/basic.spec.ts +++ b/tests/basic.spec.ts @@ -11,37 +11,40 @@ import { import { BigQueryResource, bq2path, + getFullResourceId, normalizedBQPath, path2bq, } from '../src/bigquery.js'; +import { BigQuery } from '@google-cloud/bigquery'; + describe('util test: toposort', () => { const cases: Array<{ input: Relation[]; expected: string[]; }> = [ - { - input: [ - ['a', 'b'], - ['b', 'c'], - ], - expected: ['c', 'b', 'a'], - }, - { - input: [ - ['a', 'b'], - ['c', 'b'], - ], - expected: ['b', 'a', 'c'], - }, - { - input: [ - ['c', 'b'], - ['b', 'a'], - ], - expected: ['a', 'b', 'c'], - }, - ]; + { + input: [ + ['a', 'b'], + ['b', 'c'], + ], + expected: ['c', 'b', 'a'], + }, + { + input: [ + ['a', 'b'], + ['c', 'b'], + ], + expected: ['b', 'a', 'c'], + }, + { + input: [ + ['c', 'b'], + ['b', 'a'], + ], + expected: ['a', 'b', 'c'], + }, + ]; it.concurrent.each(cases)('topological sort', async (args) => { const { input, expected } = args; expect(topologicalSort(input)) @@ -64,97 +67,97 @@ describe('util test: sql extraction ', () => { ][]; expectedReferences: string[]; }> = [ - { - input: `create table \`child_table\` as select * + { + input: `create table \`child_table\` as select * from \`dataset.parent1_table\`, \`dataset.parent2_table\``, - expectedDestinations: [['`child_table`', 'TABLE', 'DDL_CREATE']], - expectedReferences: [ - '`dataset.parent1_table`', - '`dataset.parent2_table`', - ], - }, - { - input: `with cte as (select * from \`child_table\`) select * from cte`, - expectedDestinations: [], - expectedReferences: ['`child_table`'], - }, - { - input: `select 1`, - expectedDestinations: [], - expectedReferences: [], - }, - { - input: `select * from \`dataset.table_*\``, - expectedDestinations: [], - expectedReferences: ['`dataset.table_*`'], - }, - { - input: `select * from \`dataset.table_20221210\``, - expectedDestinations: [], - expectedReferences: ['`dataset.table_*`'], - }, - { - input: `create or replace table \`sandbox.sample_20221210\` + expectedDestinations: [['`child_table`', 'TABLE', 'DDL_CREATE']], + expectedReferences: [ + '`dataset.parent1_table`', + '`dataset.parent2_table`', + ], + }, + { + input: `with cte as (select * from \`child_table\`) select * from cte`, + expectedDestinations: [], + expectedReferences: ['`child_table`'], + }, + { + input: `select 1`, + expectedDestinations: [], + expectedReferences: [], + }, + { + input: `select * from \`dataset.table_*\``, + expectedDestinations: [], + expectedReferences: ['`dataset.table_*`'], + }, + { + input: `select * from \`dataset.table_20221210\``, + expectedDestinations: [], + expectedReferences: ['`dataset.table_*`'], + }, + { + input: `create or replace table \`sandbox.sample_20221210\` as select 1 as a `, - expectedDestinations: [['`sandbox.sample_*`', 'TABLE', 'DDL_CREATE']], - expectedReferences: [], - }, - { - input: - `create or replace procedure \`sandbox.sample_proc\`(in argument int64) + expectedDestinations: [['`sandbox.sample_*`', 'TABLE', 'DDL_CREATE']], + expectedReferences: [], + }, + { + input: + `create or replace procedure \`sandbox.sample_proc\`(in argument int64) options(description="test") begin select 1; end`, - expectedDestinations: [[ - '`sandbox.sample_proc`', - 'ROUTINE', - 'DDL_CREATE', - ]], - expectedReferences: [], - }, - { - input: - `create or replace procedure \`sandbox.sample_proc\`(in argument int64) + expectedDestinations: [[ + '`sandbox.sample_proc`', + 'ROUTINE', + 'DDL_CREATE', + ]], + expectedReferences: [], + }, + { + input: + `create or replace procedure \`sandbox.sample_proc\`(in argument int64) begin call \`sandbox.reference_proc\`(); end`, - expectedDestinations: [[ - '`sandbox.sample_proc`', - 'ROUTINE', - 'DDL_CREATE', - ]], - expectedReferences: ['`sandbox.reference_proc`'], - }, - { - input: 'create schema `awesome_dataset`;', - expectedDestinations: [['`awesome_dataset`', 'SCHEMA', 'DDL_CREATE']], - expectedReferences: [], - }, - { - input: 'CREATE MODEL `awesome_dataset.mymodel`', - expectedDestinations: [[ - '`awesome_dataset.mymodel`', - 'MODEL', - 'DDL_CREATE', - ]], - expectedReferences: [], - }, - { - input: 'create temp table `tmp_table` as select 1;', - expectedDestinations: [['`tmp_table`', 'TEMPORARY_TABLE', 'DDL_CREATE']], - expectedReferences: [], - }, - { - input: `create temp function \`temp_function\`() as (1)`, - expectedDestinations: [[ - '`temp_function`', - 'TEMPORARY_ROUTINE', - 'DDL_CREATE', - ]], - expectedReferences: [], - }, - ]; + expectedDestinations: [[ + '`sandbox.sample_proc`', + 'ROUTINE', + 'DDL_CREATE', + ]], + expectedReferences: ['`sandbox.reference_proc`'], + }, + { + input: 'create schema `awesome_dataset`;', + expectedDestinations: [['`awesome_dataset`', 'SCHEMA', 'DDL_CREATE']], + expectedReferences: [], + }, + { + input: 'CREATE MODEL `awesome_dataset.mymodel`', + expectedDestinations: [[ + '`awesome_dataset.mymodel`', + 'MODEL', + 'DDL_CREATE', + ]], + expectedReferences: [], + }, + { + input: 'create temp table `tmp_table` as select 1;', + expectedDestinations: [['`tmp_table`', 'TEMPORARY_TABLE', 'DDL_CREATE']], + expectedReferences: [], + }, + { + input: `create temp function \`temp_function\`() as (1)`, + expectedDestinations: [[ + '`temp_function`', + 'TEMPORARY_ROUTINE', + 'DDL_CREATE', + ]], + expectedReferences: [], + }, + ]; it.concurrent.each(cases)( 'identifier extraction: destinations (%#)', async (args) => { @@ -178,63 +181,63 @@ describe('bigquery: path2bq', () => { input: [string, string, string]; expected: string; }> = [ - { - input: [ - 'bigquery-porter/bigquery/@default/v0/ddl.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'my-project.v0', - }, - { - input: [ - 'bigquery-porter/bigquery/@default/v0/@ignored/query.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'my-project.v0', - }, - { - input: [ - 'bigquery-porter/bigquery/hoge/v0/@ignored/query.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'hoge.v0', - }, - { - input: [ - 'bigquery-porter/bigquery/@default/v0/@routine/some_routine/ddl.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'my-project.v0.some_routine', - }, - { - input: [ - 'bigquery-porter/bigquery/@default/v0/@models/some_model/ddl.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'my-project.v0.some_model', - }, - { - input: [ - 'bigquery-porter/bigquery/@default/v0/some_table/ddl.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'my-project.v0.some_table', - }, - { - input: [ - 'bigquery-porter/bigquery/@default/@special/some.sql', - 'bigquery-porter/bigquery', - 'my-project', - ], - expected: 'my-project', - }, - ]; + { + input: [ + 'bigquery-porter/bigquery/@default/v0/ddl.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'my-project.v0', + }, + { + input: [ + 'bigquery-porter/bigquery/@default/v0/@ignored/query.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'my-project.v0', + }, + { + input: [ + 'bigquery-porter/bigquery/hoge/v0/@ignored/query.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'hoge.v0', + }, + { + input: [ + 'bigquery-porter/bigquery/@default/v0/@routine/some_routine/ddl.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'my-project.v0.some_routine', + }, + { + input: [ + 'bigquery-porter/bigquery/@default/v0/@models/some_model/ddl.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'my-project.v0.some_model', + }, + { + input: [ + 'bigquery-porter/bigquery/@default/v0/some_table/ddl.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'my-project.v0.some_table', + }, + { + input: [ + 'bigquery-porter/bigquery/@default/@special/some.sql', + 'bigquery-porter/bigquery', + 'my-project', + ], + expected: 'my-project', + }, + ]; it.concurrent.each(cases)('path2bq test', async (args) => { const { input, expected } = args; expect(path2bq(...input)).toMatchObject(expected); @@ -257,80 +260,80 @@ describe('bigquery: bq2path', () => { input: [BigQueryResource, boolean]; expected: string; }> = [ - { - input: [client, false], - expected: 'awesome-project', - }, - { - input: [client, true], - expected: '@default', - }, - { - input: [dataset, false], - expected: 'awesome-project/sandbox', - }, - { - input: [{ - baseUrl: '/tables', - projectId: 'awesome-project', - id: 'table_id', - parent: dataset, - }, false], - expected: 'awesome-project/sandbox/table_id', - }, - { - input: [{ - baseUrl: '/routines', - id: 'routine_id', - parent: dataset, - }, false], - expected: 'awesome-project/sandbox/@routines/routine_id', - }, - { - input: [{ - baseUrl: '/models', - projectId: 'awesome-project', - id: 'model_id', - parent: dataset, - }, false], - expected: 'awesome-project/sandbox/@models/model_id', - }, - { - input: [{ - baseUrl: '/unknown', - projectId: 'awesome-project', - id: 'unknown_id', - parent: dataset, - }, false], - expected: 'awesome-project/sandbox/@unknown/unknown_id', - }, - { - input: [{ - baseUrl: '/tables', - projectId: 'awesome-project', - id: 'table_id', - parent: dataset, - }, true], - expected: '@default/sandbox/table_id', - }, - { - input: [{ - baseUrl: '/tables', - projectId: 'awesome-project', - id: 'table_200221210', - parent: dataset, - }, true], - expected: '@default/sandbox/table_@', - }, - { - input: [{ - baseUrl: '/routines', - id: 'routine_id', - parent: dataset, - }, true], - expected: '@default/sandbox/@routines/routine_id', - }, - ]; + { + input: [client, false], + expected: 'awesome-project', + }, + { + input: [client, true], + expected: '@default', + }, + { + input: [dataset, false], + expected: 'awesome-project/sandbox', + }, + { + input: [{ + baseUrl: '/tables', + projectId: 'awesome-project', + id: 'table_id', + parent: dataset, + }, false], + expected: 'awesome-project/sandbox/table_id', + }, + { + input: [{ + baseUrl: '/routines', + id: 'routine_id', + parent: dataset, + }, false], + expected: 'awesome-project/sandbox/@routines/routine_id', + }, + { + input: [{ + baseUrl: '/models', + projectId: 'awesome-project', + id: 'model_id', + parent: dataset, + }, false], + expected: 'awesome-project/sandbox/@models/model_id', + }, + { + input: [{ + baseUrl: '/unknown', + projectId: 'awesome-project', + id: 'unknown_id', + parent: dataset, + }, false], + expected: 'awesome-project/sandbox/@unknown/unknown_id', + }, + { + input: [{ + baseUrl: '/tables', + projectId: 'awesome-project', + id: 'table_id', + parent: dataset, + }, true], + expected: '@default/sandbox/table_id', + }, + { + input: [{ + baseUrl: '/tables', + projectId: 'awesome-project', + id: 'table_200221210', + parent: dataset, + }, true], + expected: '@default/sandbox/table_@', + }, + { + input: [{ + baseUrl: '/routines', + id: 'routine_id', + parent: dataset, + }, true], + expected: '@default/sandbox/@routines/routine_id', + }, + ]; it.concurrent.each(cases)('bq2path', async (args) => { const { input, expected } = args; expect(bq2path(...input)).toMatchObject(expected); @@ -342,55 +345,95 @@ describe('biquery: normalizedBQPath', () => { input: [string, string?, boolean?]; expected: string; }> = [ - { - input: ['`project_id.sbx.hoge`'], - expected: 'project_id.sbx.hoge', - }, - { - input: ['project_id.sbx.hoge'], - expected: 'project_id.sbx.hoge', - }, - { - input: ['project_id.sbx.hoge', '@default'], - expected: 'project_id.sbx.hoge', - }, - { - input: ['sbx.hoge', '@default'], - expected: '@default.sbx.hoge', - }, - { - input: ['sbx', '@default', true], - expected: '@default.sbx', - }, - { - input: ['`sbx`', '@default', true], - expected: '@default.sbx', - }, - ]; + { + input: ['`project_id.sbx.hoge`'], + expected: 'project_id.sbx.hoge', + }, + { + input: ['project_id.sbx.hoge'], + expected: 'project_id.sbx.hoge', + }, + { + input: ['project_id.sbx.hoge', '@default'], + expected: 'project_id.sbx.hoge', + }, + { + input: ['sbx.hoge', '@default'], + expected: '@default.sbx.hoge', + }, + { + input: ['sbx', '@default', true], + expected: '@default.sbx', + }, + { + input: ['`sbx`', '@default', true], + expected: '@default.sbx', + }, + ]; it.concurrent.each(cases)('normalized bigquery path', async (args) => { const { input, expected } = args; expect(normalizedBQPath(...input)).toMatchObject(expected); }); }); +describe('biquery: getFullResourceId', () => { + it('Model', async () => { + const bqObj = new BigQuery() + .dataset('example_schema', { projectId: 'example_project' }) + .model('model_id'); + expect(getFullResourceId(bqObj)).toBe( + 'example_project:example_schema.model_id', + ); + }); + + it('Routine', async () => { + const bqObj = new BigQuery() + .dataset('example_schema', { projectId: 'example_project' }) + .routine('routine_id'); + + expect(getFullResourceId(bqObj)).toBe( + 'example_project:example_schema.routine_id', + ); + }); + + it('Table', async () => { + const bqObj = new BigQuery() + .dataset('example_schema', { projectId: 'example_project' }) + .table('table_id'); + + expect(getFullResourceId(bqObj)).toBe( + 'example_project:example_schema.table_id', + ); + }); + + it('Dataset', async () => { + const bqObj = new BigQuery() + .dataset('example_schema', { projectId: 'example_project' }); + + expect(getFullResourceId(bqObj)).toBe( + 'example_project:example_schema', + ); + }); +}); + describe('util: humanFileSize', () => { const cases: Array<{ input: [number, boolean?, number?]; expected: string; }> = [ - { - input: [1e9, true], - expected: '1.0 GB', - }, - { - input: [1e6, true], - expected: '1.0 MB', - }, - { - input: [100, true], - expected: '100B', - }, - ]; + { + input: [1e9, true], + expected: '1.0 GB', + }, + { + input: [1e6, true], + expected: '1.0 MB', + }, + { + input: [100, true], + expected: '100B', + }, + ]; it.concurrent.each(cases)('normalized bigquery path', async (args) => { const { input, expected } = args; expect(humanFileSize(...input)).toMatchObject(expected); @@ -402,15 +445,15 @@ describe('util: msToTime', () => { input: [number]; expected: string; }> = [ - { - input: [1000], - expected: ' 1s', - }, - { - input: [300000], - expected: ' 5m 0s', - }, - ]; + { + input: [1000], + expected: ' 1s', + }, + { + input: [300000], + expected: ' 5m 0s', + }, + ]; it.concurrent.each(cases)('normalized bigquery path', async (args) => { const { input, expected } = args; expect(msToTime(...input)).toMatchObject(expected);