Skip to content

Commit 6670dee

Browse files
0xVoronovflopez7portuu3
authored
[Job Launcher] GCV moderation: Batches processing (#3081)
* Implemented batch processing logic * Added entity, repository and migration * Updated job moderation service * Implemented unit tests * Refactor content moderation module: rename and restructure entities, update environment variables, and remove deprecated job moderation tasks. * Add @faker-js/faker dependency, improve test cases, and clean up job service tests * Improved content moderation tests * Faker ussage for Content Moderation tests * Refactor GCVContentModerationService tests to use mockResolvedValueOnce for better control over mock behavior * Fix GCV content moderation tests * Refactor GCS URL conversion and validation functions to remove unnecessary console logs and ensure consistent output format * Refactor content moderation enums by moving ContentModerationLevel to a new gcv.ts file and updating imports accordingly * Refactor categorize methods and clean some useless code --------- Co-authored-by: Francisco López <francislopez977@gmail.com> Co-authored-by: portuu3 <adrian.portugues.mas@gmail.com>
1 parent 6867431 commit 6670dee

33 files changed

Lines changed: 1839 additions & 1153 deletions

packages/apps/job-launcher/client/src/components/Jobs/Create/CvatJobRequestForm.tsx

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ export const CvatJobRequestForm = () => {
199199
const dataRegions =
200200
values.dataProvider === StorageProviders.AWS ? AWSRegions : GCSRegions;
201201

202+
const bpRegions =
203+
values.bpProvider === StorageProviders.AWS ? AWSRegions : GCSRegions;
204+
202205
const gtRegions =
203206
values.gtProvider === StorageProviders.AWS ? AWSRegions : GCSRegions;
204207

@@ -469,7 +472,11 @@ export const CvatJobRequestForm = () => {
469472
}
470473
onBlur={handleBlur}
471474
>
472-
<MenuItem value={StorageProviders.AWS}>AWS</MenuItem>
475+
{Object.values(StorageProviders).map((provider) => (
476+
<MenuItem key={provider} value={provider}>
477+
{provider.toUpperCase()}
478+
</MenuItem>
479+
))}
473480
</Select>
474481
</FormControl>
475482
</Grid>
@@ -577,7 +584,11 @@ export const CvatJobRequestForm = () => {
577584
error={touched.bpProvider && Boolean(errors.bpProvider)}
578585
onBlur={handleBlur}
579586
>
580-
<MenuItem value={StorageProviders.AWS}>AWS</MenuItem>
587+
{Object.values(StorageProviders).map((provider) => (
588+
<MenuItem key={provider} value={provider}>
589+
{provider.toUpperCase()}
590+
</MenuItem>
591+
))}
581592
</Select>
582593
</FormControl>
583594
</Grid>
@@ -597,7 +608,7 @@ export const CvatJobRequestForm = () => {
597608
error={touched.bpRegion && Boolean(errors.bpRegion)}
598609
onBlur={handleBlur}
599610
>
600-
{Object.values(dataRegions).map((region) => (
611+
{Object.values(bpRegions).map((region) => (
601612
<MenuItem key={`bpset-${region}`} value={region}>
602613
{region}
603614
</MenuItem>
@@ -688,7 +699,11 @@ export const CvatJobRequestForm = () => {
688699
error={touched.gtProvider && Boolean(errors.gtProvider)}
689700
onBlur={handleBlur}
690701
>
691-
<MenuItem value={StorageProviders.AWS}>AWS</MenuItem>
702+
{Object.values(StorageProviders).map((provider) => (
703+
<MenuItem key={provider} value={provider}>
704+
{provider.toUpperCase()}
705+
</MenuItem>
706+
))}
692707
</Select>
693708
</FormControl>
694709
</Grid>

packages/apps/job-launcher/server/.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ SENDGRID_API_KEY=
7373
GOOGLE_PROJECT_ID=
7474
GOOGLE_PRIVATE_KEY=
7575
GOOGLE_CLIENT_EMAIL=
76-
GCS_TEMP_ASYNC_RESULTS_BUCKET=
76+
GCV_MODERATION_RESULTS_FILES_PATH=
7777
GCS_MODERATION_RESULTS_BUCKET=
7878

7979
# Slack

packages/apps/job-launcher/server/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
"zxcvbn": "^4.4.2"
6969
},
7070
"devDependencies": {
71+
"@faker-js/faker": "^9.5.0",
7172
"@golevelup/ts-jest": "^0.6.1",
7273
"@nestjs/cli": "^10.3.2",
7374
"@nestjs/schematics": "^10.1.3",

packages/apps/job-launcher/server/src/common/config/env-schema.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ export const envValidator = Joi.object({
8787
GOOGLE_PROJECT_ID: Joi.string().required(),
8888
GOOGLE_PRIVATE_KEY: Joi.string().required(),
8989
GOOGLE_CLIENT_EMAIL: Joi.string().required(),
90-
GCS_TEMP_ASYNC_RESULTS_BUCKET: Joi.string().required(),
91-
GCS_MODERATION_RESULTS_BUCKET: Joi.string().required(),
90+
GCV_MODERATION_RESULTS_FILES_PATH: Joi.string().required(),
91+
GCV_MODERATION_RESULTS_BUCKET: Joi.string().required(),
9292
// Slack
9393
SLACK_ABUSE_NOTIFICATION_WEBHOOK_URL: Joi.string().required(),
9494
});

packages/apps/job-launcher/server/src/common/config/vision-config.service.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ export class VisionConfigService {
66
constructor(private configService: ConfigService) {}
77

88
/**
9-
* The Google Cloud Storage (GCS) bucket name where temporary async moderation results will be saved.
9+
* The Google Cloud Storage (GCS) path name where temporary async moderation results will be saved.
1010
* Required
1111
*/
12-
get tempAsyncResultsBucket(): string {
12+
get moderationResultsFilesPath(): string {
1313
return this.configService.getOrThrow<string>(
14-
'GCS_TEMP_ASYNC_RESULTS_BUCKET',
14+
'GCV_MODERATION_RESULTS_FILES_PATH',
1515
);
1616
}
1717

@@ -21,7 +21,7 @@ export class VisionConfigService {
2121
*/
2222
get moderationResultsBucket(): string {
2323
return this.configService.getOrThrow<string>(
24-
'GCS_MODERATION_RESULTS_BUCKET',
24+
'GCV_MODERATION_RESULTS_BUCKET',
2525
);
2626
}
2727

packages/apps/job-launcher/server/src/common/constants/errors.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export enum ErrorJob {
2828
/**
2929
* Represents error messages associated with a job moderation.
3030
*/
31-
export enum ErrorJobModeration {
31+
export enum ErrorContentModeration {
3232
ErrorProcessingDataset = 'Error processing dataset',
3333
InappropriateContent = 'Job cannot be processed due to inappropriate content',
3434
ContentModerationFailed = 'Job cannot be processed due to failure in content moderation',
@@ -38,6 +38,8 @@ export enum ErrorJobModeration {
3838
NoResultsFound = 'No results found',
3939
ResultsParsingFailed = 'Results parsing failed',
4040
JobModerationFailed = 'Job moderation failed',
41+
ProcessContentModerationRequestFailed = 'Process content moderation request failed',
42+
CompleteContentModerationFailed = 'Complete content moderation failed',
4143
}
4244

4345
/**

packages/apps/job-launcher/server/src/common/constants/index.ts

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,6 @@ export const E2E_TEST_ENV = 'test-e2e';
7171

7272
export const MUTEX_TIMEOUT = 2000; //ms
7373

74-
export const CONTENT_MODERATION_LEVEL = {
75-
VERY_LIKELY: 'VERY_LIKELY',
76-
LIKELY: 'LIKELY',
77-
POSSIBLE: 'POSSIBLE',
78-
};
79-
80-
export const CONTENT_MODERATION_FEATURE = {
81-
SAFE_SEARCH_DETECTION: 'SAFE_SEARCH_DETECTION',
82-
};
83-
8474
export const GS_PROTOCOL = 'gs://';
85-
export const JOB_MODERATION_BATCH_SIZE = 16;
86-
export const JOB_MODERATION_ASYNC_BATCH_SIZE = 100;
87-
export const JOB_MODERATION_MAX_REQUESTS_PER_MINUTE = 1800;
88-
export const ONE_MINUTE_IN_MS = 60000;
75+
export const GCV_CONTENT_MODERATION_ASYNC_BATCH_SIZE = 100;
76+
export const GCV_CONTENT_MODERATION_BATCH_SIZE_PER_TASK = 2000;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
export enum ContentModerationRequestStatus {
2+
PENDING = 'pending',
3+
PROCESSED = 'processed',
4+
POSITIVE_ABUSE = 'positive_abuse',
5+
PASSED = 'passed',
6+
FAILED = 'failed',
7+
}

packages/apps/job-launcher/server/src/common/enums/cron-job.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
export enum CronJobType {
2-
JobModeration = 'job-moderation',
3-
ParseJobModerationResults = 'parse-job-moderation-results',
2+
ContentModeration = 'content-moderation',
43
CreateEscrow = 'create-escrow',
54
SetupEscrow = 'setup-escrow',
65
FundEscrow = 'fund-escrow',
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
export enum ContentModerationLevel {
2+
VERY_LIKELY = 'VERY_LIKELY',
3+
LIKELY = 'LIKELY',
4+
POSSIBLE = 'POSSIBLE',
5+
}
6+
7+
export enum ContentModerationFeature {
8+
SAFE_SEARCH_DETECTION = 'SAFE_SEARCH_DETECTION',
9+
}

0 commit comments

Comments
 (0)