Skip to content

Commit 061bf7f

Browse files
sweetmantechclaude
andcommitted
fix: replace fal Florence-2 face detection with Gemini vision
Florence-2 object detection had false negatives on AI-generated face images. Replaces it with a Gemini 2.5 Flash vision call via the Recoup Chat API, which can reliably determine if an image is a portrait/headshot. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c92f6e1 commit 061bf7f

2 files changed

Lines changed: 69 additions & 83 deletions

File tree

src/content/__tests__/detectFace.test.ts

Lines changed: 36 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -4,108 +4,71 @@ vi.mock("../../sandboxes/logStep", () => ({
44
logStep: vi.fn(),
55
}));
66

7-
const mockFalSubscribe = vi.fn();
8-
vi.mock("../falSubscribe", () => ({
9-
falSubscribe: (...args: unknown[]) => mockFalSubscribe(...args),
10-
}));
7+
const mockFetch = vi.fn();
8+
vi.stubGlobal("fetch", mockFetch);
119

1210
import { detectFace } from "../detectFace";
1311

12+
function mockChatResponse(text: string) {
13+
return {
14+
ok: true,
15+
json: () => Promise.resolve({ text }),
16+
};
17+
}
18+
1419
describe("detectFace", () => {
1520
beforeEach(() => {
1621
vi.clearAllMocks();
22+
process.env.RECOUP_API_KEY = "test-key";
1723
});
1824

19-
it("returns true when a person label is detected", async () => {
20-
mockFalSubscribe.mockResolvedValue({
21-
data: {
22-
results: {
23-
bboxes: [[10, 20, 100, 200]],
24-
labels: ["person"],
25-
},
26-
},
27-
});
25+
it("returns true when the model says the image contains a face", async () => {
26+
mockFetch.mockResolvedValue(mockChatResponse("true"));
2827

2928
const result = await detectFace("https://example.com/headshot.png");
3029

31-
expect(result).toBe(true);
32-
expect(mockFalSubscribe).toHaveBeenCalledWith(
33-
"fal-ai/florence-2-large/object-detection",
34-
{ image_url: "https://example.com/headshot.png" },
35-
);
36-
});
37-
38-
it("returns true when a face label is detected among other objects", async () => {
39-
mockFalSubscribe.mockResolvedValue({
40-
data: {
41-
results: {
42-
bboxes: [[0, 0, 50, 50], [10, 20, 100, 200]],
43-
labels: ["chair", "human face"],
44-
},
45-
},
46-
});
47-
48-
const result = await detectFace("https://example.com/photo.png");
49-
5030
expect(result).toBe(true);
5131
});
5232

53-
it("returns false when no person or face labels are detected", async () => {
54-
mockFalSubscribe.mockResolvedValue({
55-
data: {
56-
results: {
57-
bboxes: [[0, 0, 300, 300]],
58-
labels: ["album cover"],
59-
},
60-
},
61-
});
33+
it("returns false when the model says no face is present", async () => {
34+
mockFetch.mockResolvedValue(mockChatResponse("false"));
6235

6336
const result = await detectFace("https://example.com/album-cover.png");
6437

6538
expect(result).toBe(false);
6639
});
6740

68-
it("returns false when results are empty", async () => {
69-
mockFalSubscribe.mockResolvedValue({
70-
data: {
71-
results: {
72-
bboxes: [],
73-
labels: [],
74-
},
75-
},
76-
});
41+
it("sends the image URL in the prompt to the chat API", async () => {
42+
mockFetch.mockResolvedValue(mockChatResponse("true"));
7743

78-
const result = await detectFace("https://example.com/blank.png");
44+
await detectFace("https://example.com/photo.png");
7945

80-
expect(result).toBe(false);
46+
expect(mockFetch).toHaveBeenCalledOnce();
47+
const [url, options] = mockFetch.mock.calls[0];
48+
expect(url).toContain("/api/chat/generate");
49+
const body = JSON.parse(options.body);
50+
expect(body.prompt).toContain("https://example.com/photo.png");
8151
});
8252

83-
it("returns false when detection fails", async () => {
84-
mockFalSubscribe.mockRejectedValue(new Error("Detection failed"));
53+
it("returns false when API call fails", async () => {
54+
mockFetch.mockRejectedValue(new Error("Network error"));
8555

8656
const result = await detectFace("https://example.com/broken.png");
8757

8858
expect(result).toBe(false);
8959
});
9060

91-
it("does not false-positive on labels containing face words as substrings", async () => {
92-
mockFalSubscribe.mockResolvedValue({
93-
data: {
94-
results: {
95-
bboxes: [[0, 0, 200, 200]],
96-
labels: ["ottoman", "mannequin", "womanizer"],
97-
},
98-
},
99-
});
61+
it("returns false when API returns non-ok response", async () => {
62+
mockFetch.mockResolvedValue({ ok: false, status: 500 });
10063

101-
const result = await detectFace("https://example.com/furniture.png");
64+
const result = await detectFace("https://example.com/broken.png");
10265

10366
expect(result).toBe(false);
10467
});
10568

10669
it("logs the error when detection fails", async () => {
10770
const { logStep } = await import("../../sandboxes/logStep");
108-
mockFalSubscribe.mockRejectedValue(new Error("Rate limit exceeded"));
71+
mockFetch.mockRejectedValue(new Error("Rate limit exceeded"));
10972

11073
await detectFace("https://example.com/broken.png");
11174

@@ -115,4 +78,12 @@ describe("detectFace", () => {
11578
expect.objectContaining({ error: "Rate limit exceeded" }),
11679
);
11780
});
81+
82+
it("handles whitespace and casing in model response", async () => {
83+
mockFetch.mockResolvedValue(mockChatResponse(" True "));
84+
85+
const result = await detectFace("https://example.com/headshot.png");
86+
87+
expect(result).toBe(true);
88+
});
11889
});

src/content/detectFace.ts

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,49 @@
11
import { logStep } from "../sandboxes/logStep";
2-
import { falSubscribe } from "./falSubscribe";
32

4-
const DETECTION_MODEL = "fal-ai/florence-2-large/object-detection";
3+
const PROMPT_PREFIX = `Look at the image at this URL and determine if it contains a human face or person portrait (headshot, selfie, press photo, etc).
54
6-
/** Labels that indicate a human face or person is present in the image. */
7-
const FACE_LABELS = ["person", "face", "human face", "man", "woman", "boy", "girl"];
5+
Respond with ONLY "true" or "false". Nothing else.
6+
7+
Image URL: `;
88

99
/**
10-
* Detects whether an image contains a human face using Florence-2 object detection.
10+
* Detects whether an image contains a human face using a vision-capable text model.
1111
*
1212
* @param imageUrl - URL of the image to analyze
13-
* @returns true if at least one face/person is detected, false otherwise
13+
* @returns true if the image contains a face/portrait, false otherwise
1414
*/
1515
export async function detectFace(imageUrl: string): Promise<boolean> {
1616
try {
17-
const result = await falSubscribe(DETECTION_MODEL, {
18-
image_url: imageUrl,
17+
const recoupApiKey = process.env.RECOUP_API_KEY;
18+
if (!recoupApiKey) {
19+
logStep("Face detection skipped — RECOUP_API_KEY not set", false);
20+
return false;
21+
}
22+
23+
const recoupApiUrl = process.env.RECOUP_API_URL ?? "https://recoup-api.vercel.app";
24+
const response = await fetch(`${recoupApiUrl}/api/chat/generate`, {
25+
method: "POST",
26+
headers: {
27+
"Content-Type": "application/json",
28+
"x-api-key": recoupApiKey,
29+
},
30+
body: JSON.stringify({
31+
prompt: `${PROMPT_PREFIX}${imageUrl}`,
32+
model: "google/gemini-2.5-flash",
33+
excludeTools: ["create_task"],
34+
}),
1935
});
2036

21-
const data = result.data as Record<string, unknown>;
22-
const results = data.results as { labels?: string[] } | undefined;
23-
const labels = results?.labels ?? [];
37+
if (!response.ok) {
38+
logStep("Face detection API returned error", false, { status: response.status });
39+
return false;
40+
}
2441

25-
const hasFace = labels.some((label) => {
26-
const lower = label.toLowerCase();
27-
return FACE_LABELS.some(
28-
(faceLabel) => lower === faceLabel || lower.split(" ").includes(faceLabel),
29-
);
30-
});
31-
logStep("Face detection result", false, { imageUrl: imageUrl.slice(0, 80), hasFace, labels });
42+
const json = (await response.json()) as { text?: string };
43+
const answer = (json.text ?? "").trim().toLowerCase();
44+
const hasFace = answer === "true";
45+
46+
logStep("Face detection result", false, { imageUrl: imageUrl.slice(0, 80), hasFace, answer });
3247
return hasFace;
3348
} catch (err) {
3449
logStep("Face detection failed, assuming no face", false, {

0 commit comments

Comments
 (0)