@@ -317,7 +317,7 @@ export abstract class C2DEngine {
317317 CORE_LOGGER . error ( 'Failed to get running jobs:' + e . message )
318318 }
319319
320- const envResourceIds = new Set ( ( env . resources || [ ] ) . map ( ( r ) => r . id ) )
320+ const envResourceMap = new Map ( ( env . resources || [ ] ) . map ( ( r ) => [ r . id , r ] ) )
321321
322322 let totalJobs = 0
323323 let totalFreeJobs = 0
@@ -354,9 +354,12 @@ export abstract class C2DEngine {
354354
355355 if ( isRunning ) {
356356 for ( const resource of job . resources ) {
357- if ( envResourceIds . has ( resource . id ) ) {
358- const isCpu = resource . id === 'cpu'
359- if ( isCpu && ! isThisEnv ) continue // CPU is partitioned, skip other envs
357+ const envRes = envResourceMap . get ( resource . id )
358+ if ( envRes ) {
359+ // GPUs are shared-exclusive: inUse tracked globally across all envs
360+ // Everything else (cpu, ram, disk) is per-env exclusive
361+ const isSharedExclusive = envRes . type === 'gpu'
362+ if ( ! isSharedExclusive && ! isThisEnv ) continue
360363 if ( ! ( resource . id in usedResources ) ) usedResources [ resource . id ] = 0
361364 usedResources [ resource . id ] += resource . amount
362365 if ( job . isFree ) {
@@ -381,12 +384,40 @@ export abstract class C2DEngine {
381384 }
382385 }
383386
387+ private checkGlobalResourceAvailability (
388+ allEnvironments : ComputeEnvironment [ ] ,
389+ resourceId : string ,
390+ amount : number ,
391+ isFree : boolean
392+ ) {
393+ let globalUsed = 0
394+ let globalTotal = 0
395+ for ( const e of allEnvironments ) {
396+ const res = isFree
397+ ? e . free
398+ ? this . getResource ( e . free . resources , resourceId )
399+ : null
400+ : this . getResource ( e . resources , resourceId )
401+ if ( res ) {
402+ globalTotal += res . total || 0
403+ globalUsed += res . inUse || 0
404+ }
405+ }
406+ const globalRemainder = globalTotal - globalUsed
407+ if ( globalRemainder < amount ) {
408+ throw new Error (
409+ `Not enough available ${ resourceId } globally (remaining: ${ globalRemainder } , requested: ${ amount } )`
410+ )
411+ }
412+ }
413+
384414 // overridden by each engine if required
385415 // eslint-disable-next-line require-await
386416 public async checkIfResourcesAreAvailable (
387417 resourcesRequest : ComputeResourceRequest [ ] ,
388418 env : ComputeEnvironment ,
389- isFree : boolean
419+ isFree : boolean ,
420+ allEnvironments ?: ComputeEnvironment [ ]
390421 ) {
391422 // Filter out resources with amount 0 as they're not actually being requested
392423 const activeResources = resourcesRequest . filter ( ( r ) => r . amount > 0 )
@@ -396,12 +427,33 @@ export abstract class C2DEngine {
396427 if ( ! envResource ) throw new Error ( `No such resource ${ request . id } ` )
397428 if ( envResource . total - envResource . inUse < request . amount )
398429 throw new Error ( `Not enough available ${ request . id } ` )
430+
431+ // Global check for non-GPU resources (cpu, ram, disk are per-env exclusive)
432+ // GPUs are shared-exclusive so their inUse already reflects global usage
433+ if ( allEnvironments && envResource . type !== 'gpu' ) {
434+ this . checkGlobalResourceAvailability (
435+ allEnvironments ,
436+ request . id ,
437+ request . amount ,
438+ false
439+ )
440+ }
441+
399442 if ( isFree ) {
400443 if ( ! env . free ) throw new Error ( `No free resources` )
401444 envResource = this . getResource ( env . free ?. resources , request . id )
402445 if ( ! envResource ) throw new Error ( `No such free resource ${ request . id } ` )
403446 if ( envResource . total - envResource . inUse < request . amount )
404447 throw new Error ( `Not enough available ${ request . id } for free` )
448+
449+ if ( allEnvironments && envResource . type !== 'gpu' ) {
450+ this . checkGlobalResourceAvailability (
451+ allEnvironments ,
452+ request . id ,
453+ request . amount ,
454+ true
455+ )
456+ }
405457 }
406458 }
407459 if ( 'maxJobs' in env && env . maxJobs && env . runningJobs + 1 > env . maxJobs ) {
0 commit comments