Skip to content

Commit a84ec73

Browse files
sweetmantechclaude
andauthored
feat: extract conversation ID from email HTML for Superhuman replies (#107)
Superhuman email client inserts <wbr /> tags in link text which breaks plain text extraction. Added extractRoomIdFromHtml function as secondary fallback in getEmailRoomId to handle this case. Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 1ca467f commit a84ec73

File tree

4 files changed

+269
-3
lines changed

4 files changed

+269
-3
lines changed
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import { describe, it, expect } from "vitest";
2+
import { extractRoomIdFromHtml } from "../extractRoomIdFromHtml";
3+
4+
describe("extractRoomIdFromHtml", () => {
5+
describe("Superhuman reply with conversation link in quoted content", () => {
6+
it("extracts roomId from Superhuman reply with wbr tags in link text", () => {
7+
// This is the actual HTML from a Superhuman reply where the link text
8+
// contains <wbr /> tags for word breaking
9+
const html = `<html>
10+
11+
<head></head>
12+
13+
<body>
14+
<div>
15+
<div>
16+
<div>
17+
<div class="">Send a picture of him <br /></div>
18+
<div class=""><br /></div>
19+
</div>
20+
<div>
21+
<div style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;"><img src="https://r.superhuman.com/4640qXWivTiaNi_anz1bstqoUbWlYj8nnSM0Y-NWmoL_OZdXZ1Zq-_DSPSu7r6M_NMQJAgHCnrKL5OisY6deh83uz8MfXoijSTOwhFcnM5Ya0RU8q8kZDoD0MVTLFtwDxERoN1wu0T-LgI8TDjcWI8K1HEns5_8ETb2EF1fetEenZgrj73FE6Q.gif" alt=" " width="1" height="0" style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;" /><!-- --></div><br />
22+
<div class="gmail_signature">
23+
<div style="clear:both">Sent via <a href="https://sprh.mn/?vip=sidney@recoupable.com" target="_blank">Superhuman</a></div><br />
24+
</div>
25+
</div><br />
26+
<div>
27+
<div class="gmail_quote">On Fri, Jan 09, 2026 at 11:59 AM, Agent by Recoup <span dir="ltr">&lt;<a href="mailto:agent@recoupable.com" target="_blank">agent@recoupable.com</a>&gt;</span> wrote:<br />
28+
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
29+
<div class="gmail_extra">
30+
<div class="gmail_quote sh-color-black sh-color">
31+
<p class="sh-color-black sh-color">Short answer: Brian Kernighan.</p>
32+
<p class="sh-color-black sh-color">Details: the earliest known use in computing appears in Kernighan's 1972 tutorial for the B language (the "hello, world!" example). It was then popularized by Kernighan &amp; Ritchie's 1978 book The C Programming Language. (There are older claims—BCPL examples from the late 1960s and the exact phrase appeared as a radio catchphrase in the 1950s—but Kernighan is usually credited for putting it into programming tradition.)</p>
33+
<p cor-black sh-color">Want the sources/links?</p>
34+
35+
36+
<hr style="margin-top:24px;margin-bottom:16px;border:none;border-top:1px solid #e5e7eb;" class="sh-color-grey sh-color" />
37+
<p style="font-size:12px;color:#6b7280;margin:0 0 4px;" class="sh-color-grey sh-color">
38+
Note: you can reply directly to this email to continue the conversation.
39+
</p>
40+
<p style="font-size:12px;color:#6b7280;margin:0;" class="sh-color-grey sh-color">
41+
Or continue the conversation on Recoup:
42+
<a href="https://14158f8b1cbe93481ac078c1f43f3792.us-east-1.resend-links.com/CL0/https:%2F%2Fchat.recoupable.com%2Fchat%2Fd5c473ec-04cf-4a23-a577-e0dc71542392/1/0100019ba3b2dbec-832401f0-a3c6-4478-b6bf-3b0b06b7251a-000000/OomH25B53Pym0ykT2YYxbKx0c_NEhvJ3oFfBzpKKdVk=439" rel="noopener noreferrer" target="_blank" class="sh-color-blue sh-color">
43+
https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392
44+
</a>
45+
</p>
46+
</div>
47+
</div>
48+
</blockquote>
49+
</div>
50+
</div><br />
51+
</div>
52+
</div>
53+
</body>
54+
55+
</html>`;
56+
57+
const result = extractRoomIdFromHtml(html);
58+
59+
expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
60+
});
61+
});
62+
63+
describe("Gmail reply with proper threading", () => {
64+
it("extracts roomId from Gmail reply with quoted content", () => {
65+
const html = `
66+
<html>
67+
<body>
68+
<p>Thanks for the info!</p>
69+
<div class="gmail_quote">
70+
<blockquote>
71+
<p>Original message here</p>
72+
<p>Continue the conversation: <a href="https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890">https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890</a></p>
73+
</blockquote>
74+
</div>
75+
</body>
76+
</html>
77+
`;
78+
79+
const result = extractRoomIdFromHtml(html);
80+
81+
expect(result).toBe("a1b2c3d4-e5f6-7890-abcd-ef1234567890");
82+
});
83+
});
84+
85+
describe("no conversation ID", () => {
86+
it("returns undefined for undefined input", () => {
87+
const result = extractRoomIdFromHtml(undefined);
88+
89+
expect(result).toBeUndefined();
90+
});
91+
92+
it("returns undefined for empty string", () => {
93+
const result = extractRoomIdFromHtml("");
94+
95+
expect(result).toBeUndefined();
96+
});
97+
98+
it("returns undefined when no chat link present", () => {
99+
const html = "<html><body><p>This email has no Recoup chat link.</p></body></html>";
100+
101+
const result = extractRoomIdFromHtml(html);
102+
103+
expect(result).toBeUndefined();
104+
});
105+
106+
it("returns undefined for invalid UUID format in link", () => {
107+
const html =
108+
'<a href="https://chat.recoupable.com/chat/not-a-valid-uuid">link</a>';
109+
110+
const result = extractRoomIdFromHtml(html);
111+
112+
expect(result).toBeUndefined();
113+
});
114+
115+
it("returns undefined for wrong domain", () => {
116+
const html =
117+
'<a href="https://chat.otherdomain.com/chat/550e8400-e29b-41d4-a716-446655440000">link</a>';
118+
119+
const result = extractRoomIdFromHtml(html);
120+
121+
expect(result).toBeUndefined();
122+
});
123+
});
124+
125+
describe("edge cases", () => {
126+
it("handles URL-encoded link in href attribute", () => {
127+
// Resend tracking redirects URL-encode the destination
128+
const html =
129+
'<a href="https://tracking.example.com/redirect/https:%2F%2Fchat.recoupable.com%2Fchat%2F12345678-1234-1234-1234-123456789abc">Click here</a>';
130+
131+
const result = extractRoomIdFromHtml(html);
132+
133+
expect(result).toBe("12345678-1234-1234-1234-123456789abc");
134+
});
135+
136+
it("extracts first roomId when multiple links present", () => {
137+
const html = `
138+
<a href="https://chat.recoupable.com/chat/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee">First</a>
139+
<a href="https://chat.recoupable.com/chat/11111111-2222-3333-4444-555555555555">Second</a>
140+
`;
141+
142+
const result = extractRoomIdFromHtml(html);
143+
144+
expect(result).toBe("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee");
145+
});
146+
147+
it("handles link text with wbr tags breaking up the URL", () => {
148+
const html = `
149+
<a href="#">
150+
https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />abcdef12-3456-7890-abcd-ef1234567890
151+
</a>
152+
`;
153+
154+
const result = extractRoomIdFromHtml(html);
155+
156+
expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
157+
});
158+
159+
it("handles mixed case in URL", () => {
160+
const html =
161+
'<a href="HTTPS://CHAT.RECOUPABLE.COM/CHAT/12345678-1234-1234-1234-123456789abc">link</a>';
162+
163+
const result = extractRoomIdFromHtml(html);
164+
165+
expect(result).toBe("12345678-1234-1234-1234-123456789abc");
166+
});
167+
});
168+
});

lib/emails/inbound/__tests__/getEmailRoomId.test.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,54 @@ describe("getEmailRoomId", () => {
4545
});
4646
});
4747

48+
describe("secondary: extracting from email HTML", () => {
49+
it("returns roomId from HTML when text has no chat link", async () => {
50+
const emailContent = {
51+
text: "No chat link in text",
52+
html: '<a href="https://chat.recoupable.com/chat/abcdef12-3456-7890-abcd-ef1234567890">link</a>',
53+
headers: { references: "<old-message-id@example.com>" },
54+
} as GetReceivingEmailResponseSuccess;
55+
56+
const result = await getEmailRoomId(emailContent);
57+
58+
expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
59+
expect(mockSelectMemoryEmails).not.toHaveBeenCalled();
60+
});
61+
62+
it("handles Superhuman wbr tags in HTML link text", async () => {
63+
const emailContent = {
64+
text: undefined,
65+
html: '<a href="#">https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392</a>',
66+
headers: {},
67+
} as GetReceivingEmailResponseSuccess;
68+
69+
const result = await getEmailRoomId(emailContent);
70+
71+
expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
72+
});
73+
74+
it("prioritizes text over HTML", async () => {
75+
const emailContent = {
76+
text: "https://chat.recoupable.com/chat/11111111-1111-1111-1111-111111111111",
77+
html: '<a href="https://chat.recoupable.com/chat/22222222-2222-2222-2222-222222222222">link</a>',
78+
headers: {},
79+
} as GetReceivingEmailResponseSuccess;
80+
81+
const result = await getEmailRoomId(emailContent);
82+
83+
expect(result).toBe("11111111-1111-1111-1111-111111111111");
84+
});
85+
});
86+
4887
describe("fallback: checking references header", () => {
49-
it("falls back to references header when no chat link in text", async () => {
88+
it("falls back to references header when no chat link in text or html", async () => {
5089
mockSelectMemoryEmails.mockResolvedValue([
5190
{ memories: { room_id: "22222222-3333-4444-5555-666666666666" } },
5291
] as Awaited<ReturnType<typeof selectMemoryEmails>>);
5392

5493
const emailContent = {
5594
text: "No chat link here",
95+
html: "<p>No chat link in HTML either</p>",
5696
headers: { references: "<message-id@example.com>" },
5797
} as GetReceivingEmailResponseSuccess;
5898

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
const UUID_PATTERN = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";
2+
3+
// Matches chat.recoupable.com/chat/{uuid} in various formats:
4+
// - Direct URL: https://chat.recoupable.com/chat/uuid
5+
// - URL-encoded (in tracking redirects): chat.recoupable.com%2Fchat%2Fuuid
6+
const CHAT_LINK_PATTERNS = [
7+
new RegExp(`https?://chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`, "i"),
8+
new RegExp(`chat\\.recoupable\\.com%2Fchat%2F(${UUID_PATTERN})`, "i"),
9+
];
10+
11+
// Pattern to find UUID after /chat/ or %2Fchat%2F in link text that may contain <wbr /> tags
12+
// The link text version: "https://<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />uuid"
13+
const WBR_STRIPPED_PATTERN = new RegExp(
14+
`chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`,
15+
"i",
16+
);
17+
18+
/**
19+
* Extracts the roomId from email HTML by looking for a Recoup chat link.
20+
* Handles various formats including:
21+
* - Direct URLs in href attributes
22+
* - URL-encoded URLs in tracking redirect links
23+
* - Link text with <wbr /> tags inserted for word breaking (common in Superhuman)
24+
*
25+
* @param html - The email HTML body
26+
* @returns The roomId if found, undefined otherwise
27+
*/
28+
export function extractRoomIdFromHtml(html: string | undefined): string | undefined {
29+
if (!html) return undefined;
30+
31+
// Try direct URL patterns first (most common case)
32+
for (const pattern of CHAT_LINK_PATTERNS) {
33+
const match = html.match(pattern);
34+
if (match?.[1]) {
35+
return match[1];
36+
}
37+
}
38+
39+
// Fallback: strip <wbr /> tags and try again
40+
// This handles Superhuman's link text formatting: "https:/<wbr />/<wbr />chat.<wbr />..."
41+
const strippedHtml = html.replace(/<wbr\s*\/?>/gi, "");
42+
const strippedMatch = strippedHtml.match(WBR_STRIPPED_PATTERN);
43+
if (strippedMatch?.[1]) {
44+
return strippedMatch[1];
45+
}
46+
47+
return undefined;
48+
}

lib/emails/inbound/getEmailRoomId.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import type { GetReceivingEmailResponseSuccess } from "resend";
22
import selectMemoryEmails from "@/lib/supabase/memory_emails/selectMemoryEmails";
33
import { extractRoomIdFromText } from "./extractRoomIdFromText";
4+
import { extractRoomIdFromHtml } from "./extractRoomIdFromHtml";
45

56
/**
6-
* Extracts the roomId from an email. First checks the email text for a Recoup chat link,
7-
* then falls back to looking up existing memory_emails via the references header.
7+
* Extracts the roomId from an email. Checks multiple sources in order:
8+
* 1. Email text body for a Recoup chat link
9+
* 2. Email HTML body for a Recoup chat link (handles Superhuman's wbr tags)
10+
* 3. References header to look up existing memory_emails
811
*
912
* @param emailContent - The email content from Resend's Receiving API
1013
* @returns The roomId if found, undefined otherwise
@@ -18,6 +21,13 @@ export async function getEmailRoomId(
1821
return roomIdFromText;
1922
}
2023

24+
// Secondary: check email HTML for Recoup chat link
25+
// This handles clients like Superhuman that insert <wbr /> tags in link text
26+
const roomIdFromHtml = extractRoomIdFromHtml(emailContent.html);
27+
if (roomIdFromHtml) {
28+
return roomIdFromHtml;
29+
}
30+
2131
// Fallback: check references header for existing memory_emails
2232
const references = emailContent.headers?.references;
2333
if (!references) {

0 commit comments

Comments
 (0)