From 4f984f5c58c7096bd1fd08f25e0e3ce80d35bdc4 Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Tue, 13 Jan 2026 07:48:50 -0500
Subject: [PATCH] feat: extract conversation ID from email HTML for Superhuman
 replies

Superhuman email client inserts <wbr /> tags in link text which breaks
plain text extraction. Added extractRoomIdFromHtml function as secondary
fallback in getEmailRoomId to handle this case.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../__tests__/extractRoomIdFromHtml.test.ts   | 168 ++++++++++++++++++
 .../inbound/__tests__/getEmailRoomId.test.ts  |  42 ++++-
 lib/emails/inbound/extractRoomIdFromHtml.ts   |  48 +++++
 lib/emails/inbound/getEmailRoomId.ts          |  14 +-
 4 files changed, 269 insertions(+), 3 deletions(-)
 create mode 100644 lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts
 create mode 100644 lib/emails/inbound/extractRoomIdFromHtml.ts
diff --git a/lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts b/lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts
new file mode 100644
index 00000000..5fcf968b
--- /dev/null
+++ b/lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts
@@ -0,0 +1,168 @@
+import { describe, it, expect } from "vitest";
+import { extractRoomIdFromHtml } from "../extractRoomIdFromHtml";
+
+describe("extractRoomIdFromHtml", () => {
+  describe("Superhuman reply with conversation link in quoted content", () => {
+    it("extracts roomId from Superhuman reply with wbr tags in link text", () => {
+      // This is the actual HTML from a Superhuman reply where the link text
+      // contains <wbr /> tags for word breaking
+      const html = `<html>
+
+<head></head>
+
+<body>
+  <div>
+    <div>
+      <div>
+        <div class="">Send a picture of him <br /></div>
+        <div class=""><br /></div>
+      </div>
+      <div>
+        <div style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;"><img src="https://r.superhuman.com/4640qXWivTiaNi_anz1bstqoUbWlYj8nnSM0Y-NWmoL_OZdXZ1Zq-_DSPSu7r6M_NMQJAgHCnrKL5OisY6deh83uz8MfXoijSTOwhFcnM5Ya0RU8q8kZDoD0MVTLFtwDxERoN1wu0T-LgI8TDjcWI8K1HEns5_8ETb2EF1fetEenZgrj73FE6Q.gif" alt=" " width="1" height="0" style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;" /><!--                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                --></div><br />
+        <div class="gmail_signature">
+          <div style="clear:both">Sent via <a href="https://sprh.mn/?vip=sidney@recoupable.com" target="_blank">Superhuman</a></div><br />
+        </div>
+      </div><br />
+      <div>
+        <div class="gmail_quote">On Fri, Jan 09, 2026 at 11:59 AM, Agent by Recoup <span dir="ltr">&lt;<a href="mailto:agent@recoupable.com" target="_blank">agent@recoupable.com</a>&gt;</span> wrote:<br />
+          <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+            <div class="gmail_extra">
+              <div class="gmail_quote sh-color-black sh-color">
+                <p class="sh-color-black sh-color">Short answer: Brian Kernighan.</p>
+                <p class="sh-color-black sh-color">Details: the earliest known use in computing appears in Kernighan's 1972 tutorial for the B language (the "hello, world!" example). It was then popularized by Kernighan &amp; Ritchie's 1978 book The C Programming Language. (There are older claims—BCPL examples from the late 1960s and the exact phrase appeared as a radio catchphrase in the 1950s—but Kernighan is usually credited for putting it into programming tradition.)</p>
+                <p cor-black sh-color">Want the sources/links?</p>
+
+
+                <hr style="margin-top:24px;margin-bottom:16px;border:none;border-top:1px solid #e5e7eb;" class="sh-color-grey sh-color" />
+                <p style="font-size:12px;color:#6b7280;margin:0 0 4px;" class="sh-color-grey sh-color">
+                  Note: you can reply directly to this email to continue the conversation.
+                </p>
+                <p style="font-size:12px;color:#6b7280;margin:0;" class="sh-color-grey sh-color">
+                  Or continue the conversation on Recoup:
+                  <a href="https://14158f8b1cbe93481ac078c1f43f3792.us-east-1.resend-links.com/CL0/https:%2F%2Fchat.recoupable.com%2Fchat%2Fd5c473ec-04cf-4a23-a577-e0dc71542392/1/0100019ba3b2dbec-832401f0-a3c6-4478-b6bf-3b0b06b7251a-000000/OomH25B53Pym0ykT2YYxbKx0c_NEhvJ3oFfBzpKKdVk=439" rel="noopener noreferrer" target="_blank" class="sh-color-blue sh-color">
+                    https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392
+                  </a>
+                </p>
+              </div>
+            </div>
+          </blockquote>
+        </div>
+      </div><br />
+    </div>
+  </div>
+</body>
+
+</html>`;
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
+    });
+  });
+
+  describe("Gmail reply with proper threading", () => {
+    it("extracts roomId from Gmail reply with quoted content", () => {
+      const html = `
+        <html>
+          <body>
+            <p>Thanks for the info!</p>
+            <div class="gmail_quote">
+              <blockquote>
+                <p>Original message here</p>
+                <p>Continue the conversation: <a href="https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890">https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890</a></p>
+              </blockquote>
+            </div>
+          </body>
+        </html>
+      `;
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBe("a1b2c3d4-e5f6-7890-abcd-ef1234567890");
+    });
+  });
+
+  describe("no conversation ID", () => {
+    it("returns undefined for undefined input", () => {
+      const result = extractRoomIdFromHtml(undefined);
+
+      expect(result).toBeUndefined();
+    });
+
+    it("returns undefined for empty string", () => {
+      const result = extractRoomIdFromHtml("");
+
+      expect(result).toBeUndefined();
+    });
+
+    it("returns undefined when no chat link present", () => {
+      const html = "<html><body><p>This email has no Recoup chat link.</p></body></html>";
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBeUndefined();
+    });
+
+    it("returns undefined for invalid UUID format in link", () => {
+      const html =
+        '<a href="https://chat.recoupable.com/chat/not-a-valid-uuid">link</a>';
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBeUndefined();
+    });
+
+    it("returns undefined for wrong domain", () => {
+      const html =
+        '<a href="https://chat.otherdomain.com/chat/550e8400-e29b-41d4-a716-446655440000">link</a>';
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBeUndefined();
+    });
+  });
+
+  describe("edge cases", () => {
+    it("handles URL-encoded link in href attribute", () => {
+      // Resend tracking redirects URL-encode the destination
+      const html =
+        '<a href="https://tracking.example.com/redirect/https:%2F%2Fchat.recoupable.com%2Fchat%2F12345678-1234-1234-1234-123456789abc">Click here</a>';
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBe("12345678-1234-1234-1234-123456789abc");
+    });
+
+    it("extracts first roomId when multiple links present", () => {
+      const html = `
+        <a href="https://chat.recoupable.com/chat/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee">First</a>
+        <a href="https://chat.recoupable.com/chat/11111111-2222-3333-4444-555555555555">Second</a>
+      `;
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBe("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee");
+    });
+
+    it("handles link text with wbr tags breaking up the URL", () => {
+      const html = `
+        <a href="#">
+          https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />abcdef12-3456-7890-abcd-ef1234567890
+        </a>
+      `;
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
+    });
+
+    it("handles mixed case in URL", () => {
+      const html =
+        '<a href="HTTPS://CHAT.RECOUPABLE.COM/CHAT/12345678-1234-1234-1234-123456789abc">link</a>';
+
+      const result = extractRoomIdFromHtml(html);
+
+      expect(result).toBe("12345678-1234-1234-1234-123456789abc");
+    });
+  });
+});
diff --git a/lib/emails/inbound/__tests__/getEmailRoomId.test.ts b/lib/emails/inbound/__tests__/getEmailRoomId.test.ts
index 690beb59..2850f7c3 100644
--- a/lib/emails/inbound/__tests__/getEmailRoomId.test.ts
+++ b/lib/emails/inbound/__tests__/getEmailRoomId.test.ts
@@ -45,14 +45,54 @@ describe("getEmailRoomId", () => {
     });
   });
 
+  describe("secondary: extracting from email HTML", () => {
+    it("returns roomId from HTML when text has no chat link", async () => {
+      const emailContent = {
+        text: "No chat link in text",
+        html: '<a href="https://chat.recoupable.com/chat/abcdef12-3456-7890-abcd-ef1234567890">link</a>',
+        headers: { references: "<old-message-id@example.com>" },
+      } as GetReceivingEmailResponseSuccess;
+
+      const result = await getEmailRoomId(emailContent);
+
+      expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
+      expect(mockSelectMemoryEmails).not.toHaveBeenCalled();
+    });
+
+    it("handles Superhuman wbr tags in HTML link text", async () => {
+      const emailContent = {
+        text: undefined,
+        html: '<a href="#">https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392</a>',
+        headers: {},
+      } as GetReceivingEmailResponseSuccess;
+
+      const result = await getEmailRoomId(emailContent);
+
+      expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
+    });
+
+    it("prioritizes text over HTML", async () => {
+      const emailContent = {
+        text: "https://chat.recoupable.com/chat/11111111-1111-1111-1111-111111111111",
+        html: '<a href="https://chat.recoupable.com/chat/22222222-2222-2222-2222-222222222222">link</a>',
+        headers: {},
+      } as GetReceivingEmailResponseSuccess;
+
+      const result = await getEmailRoomId(emailContent);
+
+      expect(result).toBe("11111111-1111-1111-1111-111111111111");
+    });
+  });
+
   describe("fallback: checking references header", () => {
-    it("falls back to references header when no chat link in text", async () => {
+    it("falls back to references header when no chat link in text or html", async () => {
       mockSelectMemoryEmails.mockResolvedValue([
         { memories: { room_id: "22222222-3333-4444-5555-666666666666" } },
       ] as Awaited<ReturnType<typeof selectMemoryEmails>>);
 
       const emailContent = {
         text: "No chat link here",
+        html: "<p>No chat link in HTML either</p>",
         headers: { references: "<message-id@example.com>" },
       } as GetReceivingEmailResponseSuccess;
 
diff --git a/lib/emails/inbound/extractRoomIdFromHtml.ts b/lib/emails/inbound/extractRoomIdFromHtml.ts
new file mode 100644
index 00000000..f637b17e
--- /dev/null
+++ b/lib/emails/inbound/extractRoomIdFromHtml.ts
@@ -0,0 +1,48 @@
+const UUID_PATTERN = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";
+
+// Matches chat.recoupable.com/chat/{uuid} in various formats:
+// - Direct URL: https://chat.recoupable.com/chat/uuid
+// - URL-encoded (in tracking redirects): chat.recoupable.com%2Fchat%2Fuuid
+const CHAT_LINK_PATTERNS = [
+  new RegExp(`https?://chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`, "i"),
+  new RegExp(`chat\\.recoupable\\.com%2Fchat%2F(${UUID_PATTERN})`, "i"),
+];
+
+// Pattern to find UUID after /chat/ or %2Fchat%2F in link text that may contain <wbr /> tags
+// The link text version: "https://<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />uuid"
+const WBR_STRIPPED_PATTERN = new RegExp(
+  `chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`,
+  "i",
+);
+
+/**
+ * Extracts the roomId from email HTML by looking for a Recoup chat link.
+ * Handles various formats including:
+ * - Direct URLs in href attributes
+ * - URL-encoded URLs in tracking redirect links
+ * - Link text with <wbr /> tags inserted for word breaking (common in Superhuman)
+ *
+ * @param html - The email HTML body
+ * @returns The roomId if found, undefined otherwise
+ */
+export function extractRoomIdFromHtml(html: string | undefined): string | undefined {
+  if (!html) return undefined;
+
+  // Try direct URL patterns first (most common case)
+  for (const pattern of CHAT_LINK_PATTERNS) {
+    const match = html.match(pattern);
+    if (match?.[1]) {
+      return match[1];
+    }
+  }
+
+  // Fallback: strip <wbr /> tags and try again
+  // This handles Superhuman's link text formatting: "https:/<wbr />/<wbr />chat.<wbr />..."
+  const strippedHtml = html.replace(/<wbr\s*\/?>/gi, "");
+  const strippedMatch = strippedHtml.match(WBR_STRIPPED_PATTERN);
+  if (strippedMatch?.[1]) {
+    return strippedMatch[1];
+  }
+
+  return undefined;
+}
diff --git a/lib/emails/inbound/getEmailRoomId.ts b/lib/emails/inbound/getEmailRoomId.ts
index ef889381..f12db939 100644
--- a/lib/emails/inbound/getEmailRoomId.ts
+++ b/lib/emails/inbound/getEmailRoomId.ts
@@ -1,10 +1,13 @@
 import type { GetReceivingEmailResponseSuccess } from "resend";
 import selectMemoryEmails from "@/lib/supabase/memory_emails/selectMemoryEmails";
 import { extractRoomIdFromText } from "./extractRoomIdFromText";
+import { extractRoomIdFromHtml } from "./extractRoomIdFromHtml";
 
 /**
- * Extracts the roomId from an email. First checks the email text for a Recoup chat link,
- * then falls back to looking up existing memory_emails via the references header.
+ * Extracts the roomId from an email. Checks multiple sources in order:
+ * 1. Email text body for a Recoup chat link
+ * 2. Email HTML body for a Recoup chat link (handles Superhuman's wbr tags)
+ * 3. References header to look up existing memory_emails
  *
  * @param emailContent - The email content from Resend's Receiving API
  * @returns The roomId if found, undefined otherwise
@@ -18,6 +21,13 @@ export async function getEmailRoomId(
     return roomIdFromText;
   }
 
+  // Secondary: check email HTML for Recoup chat link
+  // This handles clients like Superhuman that insert <wbr /> tags in link text
+  const roomIdFromHtml = extractRoomIdFromHtml(emailContent.html);
+  if (roomIdFromHtml) {
+    return roomIdFromHtml;
+  }
+
   // Fallback: check references header for existing memory_emails
   const references = emailContent.headers?.references;
   if (!references) {