Skip to content

Commit 60084ad

Browse files
authored
fix(smrt-core): prefer huggingface transformers for embeddings (#1109)
* fix(smrt-core): prefer huggingface transformers for embeddings * fix(core): address review feedback and ci timeouts
1 parent e03a536 commit 60084ad

7 files changed

Lines changed: 136 additions & 27 deletions

File tree

.changeset/fair-squids-tickle.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
"@happyvertical/smrt-core": patch
3+
---
4+
5+
Prefer `@huggingface/transformers` for local embeddings and fall back to
6+
`@xenova/transformers` only when the newer package is not installed. This
7+
avoids the stale `sharp@0.32.x` runtime path on Node 24 while preserving
8+
compatibility for older consumers.

packages/assets/vitest.config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export default defineConfig({
1818
include: ['src/**/*.{test,spec}.ts'],
1919
setupFiles: [smrtVitestSetupPath],
2020
testTimeout: 30000,
21+
hookTimeout: 30000,
2122
fileParallelism: false,
2223
pool: 'forks',
2324
poolOptions: {

packages/core/package.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,21 @@
7878
"prepack": "npm run build:fresh"
7979
},
8080
"peerDependencies": {
81+
"@huggingface/transformers": ">=3.0.0 <4.0.0",
8182
"@xenova/transformers": "^2.17.0"
8283
},
8384
"peerDependenciesMeta": {
85+
"@huggingface/transformers": {
86+
"optional": true
87+
},
8488
"@xenova/transformers": {
8589
"optional": true
8690
}
8791
},
8892
"devDependencies": {
8993
"@faker-js/faker": "^10.2.0",
90-
"@types/node": "25.0.9",
94+
"@huggingface/transformers": "^3.8.1",
95+
"@types/node": "24.10.9",
9196
"@types/pluralize": "^0.0.33",
9297
"@xenova/transformers": "^2.17.2",
9398
"vite": "7.3.1",
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import { describe, expect, it, vi } from 'vitest';
2+
3+
import { resolveLocalTransformersModule } from '../embeddings/provider';
4+
5+
describe('EmbeddingProvider transformer resolution', () => {
6+
it('prefers @huggingface/transformers when both packages are available', async () => {
7+
const importModule = vi.fn(async (moduleName: string) => ({
8+
packageName: moduleName,
9+
}));
10+
11+
const resolution = await resolveLocalTransformersModule(importModule);
12+
13+
expect(resolution.packageName).toBe('@huggingface/transformers');
14+
expect(importModule).toHaveBeenCalledTimes(1);
15+
expect(importModule).toHaveBeenCalledWith('@huggingface/transformers');
16+
});
17+
18+
it('falls back to @xenova/transformers when Hugging Face transformers is not installed', async () => {
19+
const importModule = vi.fn(async (moduleName: string) => {
20+
if (moduleName === '@huggingface/transformers') {
21+
throw new Error(
22+
"Cannot find package '@huggingface/transformers' imported from test",
23+
);
24+
}
25+
26+
return { packageName: moduleName };
27+
});
28+
29+
const resolution = await resolveLocalTransformersModule(importModule);
30+
31+
expect(resolution.packageName).toBe('@xenova/transformers');
32+
expect(importModule).toHaveBeenNthCalledWith(
33+
1,
34+
'@huggingface/transformers',
35+
);
36+
expect(importModule).toHaveBeenNthCalledWith(2, '@xenova/transformers');
37+
});
38+
39+
it('returns a helpful error when neither transformers package is installed', async () => {
40+
const importModule = vi.fn(async (moduleName: string) => {
41+
throw new Error(`Cannot find package '${moduleName}' imported from test`);
42+
});
43+
44+
await expect(resolveLocalTransformersModule(importModule)).rejects.toThrow(
45+
'Local embeddings require one of: @huggingface/transformers, @xenova/transformers.',
46+
);
47+
});
48+
49+
it('does not hide non-module runtime errors from the preferred package', async () => {
50+
const importModule = vi.fn(async () => {
51+
throw new Error('sharp native module failed to load');
52+
});
53+
54+
await expect(resolveLocalTransformersModule(importModule)).rejects.toThrow(
55+
'sharp native module failed to load',
56+
);
57+
});
58+
});

packages/core/src/embeddings/provider.ts

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Embedding Provider
33
*
44
* Unified interface for generating embeddings using local models or AI APIs.
5-
* Supports @xenova/transformers for local inference and @happyvertical/ai for cloud.
5+
* Supports transformers.js packages for local inference and @happyvertical/ai for cloud.
66
*/
77

88
import type { EmbeddingProviderType, ProjectEmbeddingConfig } from './types';
@@ -17,6 +17,55 @@ async function importOptional(moduleName: string): Promise<any> {
1717
return import(/* @vite-ignore */ name);
1818
}
1919

20+
export type OptionalModuleImporter = (moduleName: string) => Promise<any>;
21+
22+
export const LOCAL_TRANSFORMERS_PACKAGES = [
23+
'@huggingface/transformers',
24+
'@xenova/transformers',
25+
] as const;
26+
27+
export type LocalTransformersPackage =
28+
(typeof LOCAL_TRANSFORMERS_PACKAGES)[number];
29+
30+
export interface TransformersModuleResolution {
31+
module: any;
32+
packageName: LocalTransformersPackage;
33+
}
34+
35+
function isModuleNotFoundError(error: unknown, moduleName: string): boolean {
36+
return (
37+
error instanceof Error &&
38+
(error.message.includes(`Cannot find module '${moduleName}'`) ||
39+
error.message.includes(`Cannot find package '${moduleName}'`))
40+
);
41+
}
42+
43+
function formatTransformersResolutionError(
44+
attemptedPackages: readonly string[],
45+
): Error {
46+
return new Error(
47+
`Local embeddings require one of: ${attemptedPackages.join(', ')}. ` +
48+
`Install one of them to use provider: "local", or switch to provider: "ai".`,
49+
);
50+
}
51+
52+
export async function resolveLocalTransformersModule(
53+
importModule: OptionalModuleImporter = importOptional,
54+
): Promise<TransformersModuleResolution> {
55+
for (const packageName of LOCAL_TRANSFORMERS_PACKAGES) {
56+
try {
57+
const module = await importModule(packageName);
58+
return { module, packageName };
59+
} catch (error) {
60+
if (!isModuleNotFoundError(error, packageName)) {
61+
throw error;
62+
}
63+
}
64+
}
65+
66+
throw formatTransformersResolutionError(LOCAL_TRANSFORMERS_PACKAGES);
67+
}
68+
2069
/**
2170
* Interface for AI client that can generate embeddings
2271
*/
@@ -28,7 +77,7 @@ interface EmbeddingCapableAI {
2877
}
2978

3079
/**
31-
* Pipeline type for @xenova/transformers
80+
* Pipeline type for local transformers packages
3281
*/
3382
type FeatureExtractionPipeline = (
3483
texts: string[],
@@ -98,7 +147,7 @@ export class EmbeddingProvider {
98147
}
99148

100149
/**
101-
* Generate embeddings using local model (@xenova/transformers)
150+
* Generate embeddings using a local transformers model
102151
*/
103152
private async embedLocal(texts: string[]): Promise<number[][]> {
104153
const pipeline = await this.getLocalPipeline();
@@ -133,30 +182,15 @@ export class EmbeddingProvider {
133182
* Initialize the local embedding pipeline
134183
*/
135184
private async initLocalPipeline(): Promise<FeatureExtractionPipeline> {
136-
try {
137-
// Dynamic import for optional dependency
138-
const transformers = await importOptional('@xenova/transformers');
139-
const { pipeline } = transformers;
185+
const { module: transformers } = await resolveLocalTransformersModule();
186+
const { pipeline } = transformers;
140187

141-
const model = this.config.localModel || 'Xenova/bge-base-en-v1.5';
188+
const model = this.config.localModel || 'Xenova/bge-base-en-v1.5';
142189

143-
// Initialize the feature extraction pipeline
144-
const pipe = await pipeline('feature-extraction', model);
190+
// Initialize the feature extraction pipeline
191+
const pipe = await pipeline('feature-extraction', model);
145192

146-
return pipe as unknown as FeatureExtractionPipeline;
147-
} catch (error) {
148-
if (
149-
error instanceof Error &&
150-
error.message.includes("Cannot find module '@xenova/transformers'")
151-
) {
152-
throw new Error(
153-
'Local embeddings require @xenova/transformers. ' +
154-
'Install it with: pnpm add @xenova/transformers\n' +
155-
'Or use provider: "ai" in your embedding configuration.',
156-
);
157-
}
158-
throw error;
159-
}
193+
return pipe as unknown as FeatureExtractionPipeline;
160194
}
161195

162196
/**
@@ -201,7 +235,7 @@ export class EmbeddingProvider {
201235
*/
202236
async isLocalAvailable(): Promise<boolean> {
203237
try {
204-
await importOptional('@xenova/transformers');
238+
await resolveLocalTransformersModule();
205239
return true;
206240
} catch {
207241
return false;

packages/products/vitest.config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ export default defineConfig({
4545

4646
// Timeouts for async operations
4747
testTimeout: 30000,
48-
hookTimeout: 10000,
48+
hookTimeout: 30000,
4949

5050
// Setup files removed - file doesn't exist
5151
// setupFiles: ['../../vitest.setup.ts'],

pnpm-lock.yaml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)