From 7267ae4ee9d5f068b94b27120770570271268824 Mon Sep 17 00:00:00 2001
From: Guido van Rossum <gvanrossum@microsoft.com>
Date: Wed, 3 Dec 2025 14:01:28 -0800
Subject: [PATCH 1/4] Recognize inline replies, even in the presence of
 unquoted trailers

---
 test/test_email_import.py        | 182 +++++++++++++++++++++++++++++++
 typeagent/emails/email_import.py | 176 +++++++++++++++++++++++++++++-
 2 files changed, 353 insertions(+), 5 deletions(-)
 create mode 100644 test/test_email_import.py

diff --git a/test/test_email_import.py b/test/test_email_import.py
new file mode 100644
index 00000000..efd2bd11
--- /dev/null
+++ b/test/test_email_import.py
@@ -0,0 +1,182 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from typeagent.emails.email_import import (
+    extract_inline_reply,
+    get_last_response_in_thread,
+    is_inline_reply,
+)
+
+
+class TestIsInlineReply:
+    def test_empty_text(self) -> None:
+        assert is_inline_reply("") is False
+
+    def test_no_header(self) -> None:
+        text = "Just a regular email with no quoted content."
+        assert is_inline_reply(text) is False
+
+    def test_bottom_posted_reply(self) -> None:
+        # This has "On ... wrote:" but all quotes are at the bottom, no interleaving
+        text = """Thanks for the info!
+
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+
+> Here is some quoted text.
+> More quoted text.
+> Even more."""
+        assert is_inline_reply(text) is False
+
+    def test_inline_reply(self) -> None:
+        text = """I've given my replies in line with the quoted text.
+
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Quoted blah.
+
+That is clearly BS.
+
+> Quoted blah blah.
+
+Here I must agree.
+
+> More quoted text.
+
+-- 
+Guido van Rossum"""
+        assert is_inline_reply(text) is True
+
+    def test_inline_reply_no_preamble(self) -> None:
+        text = """On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> First quote.
+
+My first response.
+
+> Second quote.
+
+My second response."""
+        assert is_inline_reply(text) is True
+
+
+class TestExtractInlineReply:
+    def test_empty_text(self) -> None:
+        assert extract_inline_reply("") == ""
+
+    def test_no_inline_pattern(self) -> None:
+        text = "Just a regular email."
+        assert extract_inline_reply(text) == "Just a regular email."
+
+    def test_basic_inline_reply(self) -> None:
+        text = """I've given my replies in line with the quoted text.
+
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Quoted blah.
+
+That is clearly BS.
+
+> Quoted blah blah.
+
+Here I must agree.
+
+> More quoted text.
+
+-- 
+Guido van Rossum"""
+        result = extract_inline_reply(text)
+        assert "I've given my replies in line" in result
+        assert "That is clearly BS." in result
+        assert "Here I must agree." in result
+        # Quoted content should be removed
+        assert "Quoted blah" not in result
+        # Signature should be removed
+        assert "Guido van Rossum" not in result
+
+    def test_inline_reply_with_context(self) -> None:
+        text = """On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Is Python good?
+
+Yes, absolutely!
+
+> What about JavaScript?
+
+It has its uses."""
+        result = extract_inline_reply(text, include_context=True)
+        assert "Yes, absolutely!" in result
+        assert "It has its uses." in result
+        assert "[In reply to:" in result
+        assert "Python" in result
+
+    def test_preserves_preamble(self) -> None:
+        text = """Here's my preamble before the inline replies.
+
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Question?
+
+Answer!"""
+        result = extract_inline_reply(text)
+        assert "Here's my preamble" in result
+        assert "Answer!" in result
+
+    def test_strips_trailing_delimiters(self) -> None:
+        text = """On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Question?
+
+Answer!
+_______________"""
+        result = extract_inline_reply(text)
+        assert result.endswith("Answer!")
+
+
+class TestGetLastResponseInThread:
+    def test_empty_text(self) -> None:
+        assert get_last_response_in_thread("") == ""
+
+    def test_simple_text(self) -> None:
+        assert get_last_response_in_thread("Hello world") == "Hello world"
+
+    def test_bottom_posted_reply(self) -> None:
+        text = """This is my response.
+
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+
+> Original message here.
+> More original."""
+        result = get_last_response_in_thread(text)
+        assert result == "This is my response."
+
+    def test_inline_reply(self) -> None:
+        text = """Preamble.
+
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Quote 1.
+
+Reply 1.
+
+> Quote 2.
+
+Reply 2.
+
+-- 
+Signature"""
+        result = get_last_response_in_thread(text)
+        assert "Preamble" in result
+        assert "Reply 1" in result
+        assert "Reply 2" in result
+        assert "Quote" not in result
+        assert "Signature" not in result
+
+    def test_original_message_delimiter(self) -> None:
+        text = """My response.
+
+-----Original Message-----
+From: Someone
+The original content."""
+        result = get_last_response_in_thread(text)
+        assert result == "My response."
+
+    def test_forwarded_delimiter(self) -> None:
+        text = """My thoughts on this.
+
+----- Forwarded by Someone -----
+Original forwarded content."""
+        result = get_last_response_in_thread(text)
+        assert result == "My thoughts on this."
diff --git a/typeagent/emails/email_import.py b/typeagent/emails/email_import.py
index 5be5baaa..2cf51182 100644
--- a/typeagent/emails/email_import.py
+++ b/typeagent/emails/email_import.py
@@ -125,21 +125,187 @@ def get_forwarded_email_parts(email_text: str) -> list[str]:
 # Precompiled regex for trailing line delimiters (underscores, dashes, equals, spaces)
 _TRAILING_LINE_DELIMITERS = re.compile(r"[\r\n][_\-= ]+\s*$")
 
+# Pattern to detect "On <date> <user> wrote:" header for inline replies
+_INLINE_REPLY_HEADER = re.compile(
+    r"^on\s+.+\s+wrote:\s*$",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+# Pattern to match quoted lines (starting with > possibly with leading whitespace)
+_QUOTED_LINE = re.compile(r"^\s*>")
+
+# Pattern to detect email signature markers
+_SIGNATURE_MARKER = re.compile(r"^--\s*$", re.MULTILINE)
+
+
+def is_inline_reply(email_text: str) -> bool:
+    """
+    Detect if an email contains inline replies (responses interspersed with quotes).
+
+    An inline reply has:
+    1. An "On ... wrote:" header
+    2. Quoted lines (starting with >) interspersed with non-quoted response lines
+    """
+    if not email_text:
+        return False
+
+    # Must have the "On ... wrote:" header
+    header_match = _INLINE_REPLY_HEADER.search(email_text)
+    if not header_match:
+        return False
+
+    # Check content after the header for mixed quoted/non-quoted lines
+    content_after_header = email_text[header_match.end() :]
+    lines = content_after_header.split("\n")
+
+    has_quoted = False
+    has_non_quoted_after_quoted = False
+
+    for line in lines:
+        # Check for signature marker
+        if _SIGNATURE_MARKER.match(line):
+            break
+
+        stripped = line.strip()
+        if not stripped:
+            continue
+
+        if _QUOTED_LINE.match(line):
+            has_quoted = True
+        elif has_quoted:
+            # Non-quoted line after we've seen quoted lines = inline reply
+            has_non_quoted_after_quoted = True
+            break
+
+    return has_quoted and has_non_quoted_after_quoted
+
+
+def extract_inline_reply(email_text: str, include_context: bool = False) -> str:
+    """
+    Extract reply content from an email with inline responses.
+
+    For emails where the author responds inline to quoted text, this extracts
+    the non-quoted portions (the actual replies).
+
+    Args:
+        email_text: The full email body text
+        include_context: If True, include abbreviated quoted context before each reply
+
+    Returns:
+        The extracted reply text. If include_context is True, quoted lines are
+        prefixed with "[quoted]" to show what's being replied to.
+    """
+    if not email_text:
+        return ""
+
+    # Find the "On ... wrote:" header
+    header_match = _INLINE_REPLY_HEADER.search(email_text)
+    if not header_match:
+        # No inline reply pattern, return as-is
+        return email_text
+
+    # Get preamble (content before the "On ... wrote:" header)
+    preamble = email_text[: header_match.start()].strip()
+
+    # Process content after header
+    content_after_header = email_text[header_match.end() :]
+    lines = content_after_header.split("\n")
+
+    result_parts: list[str] = []
+    if preamble:
+        result_parts.append(preamble)
+
+    current_reply_lines: list[str] = []
+    current_quoted_lines: list[str] = []
+    in_signature = False
+
+    for line in lines:
+        # Check for signature marker
+        if _SIGNATURE_MARKER.match(line):
+            in_signature = True
+            # Flush any pending reply
+            if current_reply_lines:
+                if include_context and current_quoted_lines:
+                    result_parts.append(_summarize_quoted(current_quoted_lines))
+                result_parts.append("\n".join(current_reply_lines))
+                current_reply_lines = []
+                current_quoted_lines = []
+            continue
+
+        if in_signature:
+            # Skip signature content
+            continue
+
+        if _QUOTED_LINE.match(line):
+            # This is a quoted line
+            if current_reply_lines:
+                # Flush the current reply block
+                if include_context and current_quoted_lines:
+                    result_parts.append(_summarize_quoted(current_quoted_lines))
+                result_parts.append("\n".join(current_reply_lines))
+                current_reply_lines = []
+                current_quoted_lines = []
+            # Accumulate quoted lines for context (only if needed)
+            if include_context:
+                # Strip the leading > and any space after it
+                unquoted = re.sub(r"^\s*>\s?", "", line)
+                current_quoted_lines.append(unquoted)
+        else:
+            # Non-quoted line - part of the reply
+            stripped = line.strip()
+            if stripped or current_reply_lines:
+                # Include non-empty lines, or preserve blank lines within a reply block
+                current_reply_lines.append(line.rstrip())
+
+    # Flush any remaining reply
+    if current_reply_lines:
+        if include_context and current_quoted_lines:
+            result_parts.append(_summarize_quoted(current_quoted_lines))
+        result_parts.append("\n".join(current_reply_lines))
+
+    result = "\n\n".join(part for part in result_parts if part.strip())
+    return _strip_trailing_delimiters(result)
+
+
+def _summarize_quoted(quoted_lines: list[str]) -> str:
+    """Create a brief summary of quoted content for context."""
+    # Join and truncate to provide context
+    text = " ".join(line.strip() for line in quoted_lines if line.strip())
+    if len(text) > 100:
+        text = text[:97] + "..."
+    return f"[In reply to: {text}]"
+
+
+def _strip_trailing_delimiters(text: str) -> str:
+    """Remove trailing line delimiters (underscores, dashes, equals, spaces)."""
+    text = text.strip()
+    return _TRAILING_LINE_DELIMITERS.sub("", text)
+
 
 # Simple way to get the last response on an email thread in MIME format
 def get_last_response_in_thread(email_text: str) -> str:
+    """
+    Extract the latest response from an email thread.
+
+    Handles two patterns:
+    1. Bottom-posted replies: New content at top, quoted thread at bottom
+    2. Inline replies: Responses interspersed with quoted text
+
+    For inline replies, only the reply portions (non-quoted text) are extracted.
+    """
     if not email_text:
         return ""
 
+    # Check for inline reply pattern first
+    if is_inline_reply(email_text):
+        return extract_inline_reply(email_text, include_context=False)
+
+    # Fall back to original behavior for bottom-posted replies
     match = _THREAD_DELIMITERS.search(email_text)
     if match:
         email_text = email_text[: match.start()]
 
-    email_text = email_text.strip()
-    # Remove trailing line delimiters (e.g. underscores, dashes, equals)
-    _TRAILING_LINE_DELIMITER_REGEX = _TRAILING_LINE_DELIMITERS
-    email_text = _TRAILING_LINE_DELIMITER_REGEX.sub("", email_text)
-    return email_text
+    return _strip_trailing_delimiters(email_text)
 
 
 # Extracts the plain text body from an email.message.Message object.

From 37006c12195fae67371c9949ef58155ad1a7d4f3 Mon Sep 17 00:00:00 2001
From: Guido van Rossum <gvanrossum@microsoft.com>
Date: Wed, 3 Dec 2025 14:07:36 -0800
Subject: [PATCH 2/4] Rename bottom-posting to top-posting

---
 test/test_email_import.py        | 4 ++--
 typeagent/emails/email_import.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_email_import.py b/test/test_email_import.py
index efd2bd11..7e8d3308 100644
--- a/test/test_email_import.py
+++ b/test/test_email_import.py
@@ -16,7 +16,7 @@ def test_no_header(self) -> None:
         text = "Just a regular email with no quoted content."
         assert is_inline_reply(text) is False
 
-    def test_bottom_posted_reply(self) -> None:
+    def test_top_posted_reply(self) -> None:
         # This has "On ... wrote:" but all quotes are at the bottom, no interleaving
         text = """Thanks for the info!
 
@@ -133,7 +133,7 @@ def test_empty_text(self) -> None:
     def test_simple_text(self) -> None:
         assert get_last_response_in_thread("Hello world") == "Hello world"
 
-    def test_bottom_posted_reply(self) -> None:
+    def test_top_posted_reply(self) -> None:
         text = """This is my response.
 
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
diff --git a/typeagent/emails/email_import.py b/typeagent/emails/email_import.py
index 2cf51182..d5db5380 100644
--- a/typeagent/emails/email_import.py
+++ b/typeagent/emails/email_import.py
@@ -288,7 +288,7 @@ def get_last_response_in_thread(email_text: str) -> str:
     Extract the latest response from an email thread.
 
     Handles two patterns:
-    1. Bottom-posted replies: New content at top, quoted thread at bottom
+    1. Top-posted replies: New content at top, quoted thread at bottom
     2. Inline replies: Responses interspersed with quoted text
 
     For inline replies, only the reply portions (non-quoted text) are extracted.

From 3c1bc26d78d17c5c2b56d0985401da8cd653f6dd Mon Sep 17 00:00:00 2001
From: Guido van Rossum <gvanrossum@microsoft.com>
Date: Wed, 3 Dec 2025 14:48:02 -0800
Subject: [PATCH 3/4] Add chunk_sources to track quoted vs original email
 content

---
 test/test_email_import.py         | 188 +++++++++++++++---------------
 typeagent/emails/email_import.py  | 179 ++++++++++++++--------------
 typeagent/emails/email_message.py |   6 +
 3 files changed, 190 insertions(+), 183 deletions(-)

diff --git a/test/test_email_import.py b/test/test_email_import.py
index 7e8d3308..18208617 100644
--- a/test/test_email_import.py
+++ b/test/test_email_import.py
@@ -2,9 +2,8 @@
 # Licensed under the MIT License.
 
 from typeagent.emails.email_import import (
-    extract_inline_reply,
-    get_last_response_in_thread,
     is_inline_reply,
+    parse_email_chunks,
 )
 
 
@@ -18,17 +17,20 @@ def test_no_header(self) -> None:
 
     def test_top_posted_reply(self) -> None:
         # This has "On ... wrote:" but all quotes are at the bottom, no interleaving
-        text = """Thanks for the info!
+        text = """\
+Thanks for the info!
 
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
 
 > Here is some quoted text.
 > More quoted text.
-> Even more."""
+> Even more.
+"""
         assert is_inline_reply(text) is False
 
     def test_inline_reply(self) -> None:
-        text = """I've given my replies in line with the quoted text.
+        text = """\
+I've given my replies in line with the quoted text.
 
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
 > Quoted blah.
@@ -42,31 +44,37 @@ def test_inline_reply(self) -> None:
 > More quoted text.
 
 -- 
-Guido van Rossum"""
+Guido van Rossum
+"""
         assert is_inline_reply(text) is True
 
     def test_inline_reply_no_preamble(self) -> None:
-        text = """On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+        text = """\
+On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
 > First quote.
 
 My first response.
 
 > Second quote.
 
-My second response."""
+My second response.
+"""
         assert is_inline_reply(text) is True
 
 
-class TestExtractInlineReply:
+class TestParseEmailChunks:
     def test_empty_text(self) -> None:
-        assert extract_inline_reply("") == ""
+        assert parse_email_chunks("") == []
 
     def test_no_inline_pattern(self) -> None:
         text = "Just a regular email."
-        assert extract_inline_reply(text) == "Just a regular email."
+        result = parse_email_chunks(text)
+        assert len(result) == 1
+        assert result[0] == ("Just a regular email.", None)
 
     def test_basic_inline_reply(self) -> None:
-        text = """I've given my replies in line with the quoted text.
+        text = """\
+I've given my replies in line with the quoted text.
 
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
 > Quoted blah.
@@ -80,103 +88,95 @@ def test_basic_inline_reply(self) -> None:
 > More quoted text.
 
 -- 
-Guido van Rossum"""
-        result = extract_inline_reply(text)
-        assert "I've given my replies in line" in result
-        assert "That is clearly BS." in result
-        assert "Here I must agree." in result
-        # Quoted content should be removed
-        assert "Quoted blah" not in result
-        # Signature should be removed
-        assert "Guido van Rossum" not in result
-
-    def test_inline_reply_with_context(self) -> None:
-        text = """On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+Guido van Rossum
+"""
+        result = parse_email_chunks(text)
+        # Should have: preamble (original), quoted, reply, quoted, reply, quoted
+        texts = [chunk[0] for chunk in result]
+        sources = [chunk[1] for chunk in result]
+
+        # Check we have all the content
+        assert any("I've given my replies" in t for t in texts)
+        assert any("That is clearly BS" in t for t in texts)
+        assert any("Here I must agree" in t for t in texts)
+        assert any("Quoted blah" in t for t in texts)
+
+        # Original content should have None source
+        for text, source in result:
+            if "I've given my replies" in text or "That is clearly BS" in text:
+                assert source is None
+
+        # Quoted content should have the person's name
+        for text, source in result:
+            if "Quoted blah" in text:
+                assert source == "Someone"
+
+        # Signature should NOT be included
+        assert not any("Guido van Rossum" in t for t in texts)
+
+    def test_extracts_quoted_person_name(self) -> None:
+        text = """\
+On Mon, Dec 10, 2020 at 10:30 AM John Doe wrote:
 > Is Python good?
 
 Yes, absolutely!
 
 > What about JavaScript?
 
-It has its uses."""
-        result = extract_inline_reply(text, include_context=True)
-        assert "Yes, absolutely!" in result
-        assert "It has its uses." in result
-        assert "[In reply to:" in result
-        assert "Python" in result
+It has its uses.
+"""
+        result = parse_email_chunks(text)
+
+        # Find quoted chunks - they should have "John Doe" as source
+        quoted_chunks = [(t, s) for t, s in result if s is not None]
+        assert len(quoted_chunks) == 2
+        for text, source in quoted_chunks:
+            assert source == "John Doe"
 
     def test_preserves_preamble(self) -> None:
-        text = """Here's my preamble before the inline replies.
+        text = """\
+Here's my preamble before the inline replies.
 
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
 > Question?
 
-Answer!"""
-        result = extract_inline_reply(text)
-        assert "Here's my preamble" in result
-        assert "Answer!" in result
-
-    def test_strips_trailing_delimiters(self) -> None:
-        text = """On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
-> Question?
-
 Answer!
-_______________"""
-        result = extract_inline_reply(text)
-        assert result.endswith("Answer!")
+"""
+        result = parse_email_chunks(text)
+        texts = [chunk[0] for chunk in result]
 
+        assert any("preamble" in t for t in texts)
+        assert any("Answer" in t for t in texts)
 
-class TestGetLastResponseInThread:
-    def test_empty_text(self) -> None:
-        assert get_last_response_in_thread("") == ""
-
-    def test_simple_text(self) -> None:
-        assert get_last_response_in_thread("Hello world") == "Hello world"
-
-    def test_top_posted_reply(self) -> None:
-        text = """This is my response.
-
+    def test_strips_trailing_delimiters(self) -> None:
+        text = """\
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
+> Question?
 
-> Original message here.
-> More original."""
-        result = get_last_response_in_thread(text)
-        assert result == "This is my response."
-
-    def test_inline_reply(self) -> None:
-        text = """Preamble.
-
+Answer!
+_______________
+"""
+        result = parse_email_chunks(text)
+        # Last non-quoted chunk should not end with underscores
+        original_chunks = [t for t, s in result if s is None]
+        assert len(original_chunks) > 0
+        assert not original_chunks[-1].endswith("_")
+
+    def test_quoted_content_is_unabbreviated(self) -> None:
+        text = """\
 On Mon, Dec 10, 2020 at 10:30 AM Someone wrote:
-> Quote 1.
-
-Reply 1.
-
-> Quote 2.
-
-Reply 2.
-
--- 
-Signature"""
-        result = get_last_response_in_thread(text)
-        assert "Preamble" in result
-        assert "Reply 1" in result
-        assert "Reply 2" in result
-        assert "Quote" not in result
-        assert "Signature" not in result
-
-    def test_original_message_delimiter(self) -> None:
-        text = """My response.
-
------Original Message-----
-From: Someone
-The original content."""
-        result = get_last_response_in_thread(text)
-        assert result == "My response."
-
-    def test_forwarded_delimiter(self) -> None:
-        text = """My thoughts on this.
-
------ Forwarded by Someone -----
-Original forwarded content."""
-        result = get_last_response_in_thread(text)
-        assert result == "My thoughts on this."
+> This is a very long quoted line that should be preserved in full.
+> And this is another line that continues the quote.
+> Even more content here.
+
+My response.
+"""
+        result = parse_email_chunks(text)
+
+        # Find the quoted chunk
+        quoted = [t for t, s in result if s is not None]
+        assert len(quoted) == 1
+        # Full content should be preserved
+        assert "very long quoted line" in quoted[0]
+        assert "another line" in quoted[0]
+        assert "Even more content" in quoted[0]
diff --git a/typeagent/emails/email_import.py b/typeagent/emails/email_import.py
index d5db5380..89bfc92f 100644
--- a/typeagent/emails/email_import.py
+++ b/typeagent/emails/email_import.py
@@ -54,9 +54,8 @@ def import_forwarded_email_string(
 
 
 # Imports an email.message.Message object and returns an EmailMessage object
-# If the message is a reply, returns only the latest response.
 def import_email_message(msg: Message, max_chunk_length: int) -> EmailMessage:
-    # Extract metadata from
+    # Extract metadata
     email_meta = EmailMessageMeta(
         sender=msg.get("From", ""),
         recipients=_import_address_headers(msg.get_all("To", [])),
@@ -70,20 +69,32 @@ def import_email_message(msg: Message, max_chunk_length: int) -> EmailMessage:
     if timestamp_date is not None:
         timestamp = parsedate_to_datetime(timestamp_date).isoformat()
 
-    # Get email body.
-    # If the email was a reply, then ensure we only pick up the latest response
+    # Get email body and parse into chunks with source attribution
     body = _extract_email_body(msg)
     if body is None:
         body = ""
-    elif is_reply(msg):
-        body = get_last_response_in_thread(body)
 
+    # Prepend subject to body if available
     if email_meta.subject is not None:
         body = email_meta.subject + "\n\n" + body
 
-    body_chunks = _text_to_chunks(body, max_chunk_length)
+    # Parse into chunks with source attribution (handles inline replies)
+    parsed_chunks = parse_email_chunks(body)
+
+    # Apply max_chunk_length splitting while preserving source attribution
+    text_chunks: list[str] = []
+    chunk_sources: list[str | None] = []
+    for text, source in parsed_chunks:
+        sub_chunks = _text_to_chunks(text, max_chunk_length)
+        for sub_chunk in sub_chunks:
+            text_chunks.append(sub_chunk)
+            chunk_sources.append(source)
+
     email: EmailMessage = EmailMessage(
-        metadata=email_meta, text_chunks=body_chunks, timestamp=timestamp
+        metadata=email_meta,
+        text_chunks=text_chunks,
+        chunk_sources=chunk_sources,
+        timestamp=timestamp,
     )
     return email
 
@@ -126,8 +137,13 @@ def get_forwarded_email_parts(email_text: str) -> list[str]:
 _TRAILING_LINE_DELIMITERS = re.compile(r"[\r\n][_\-= ]+\s*$")
 
 # Pattern to detect "On <date> <user> wrote:" header for inline replies
+# Uses alternation to handle different date formats:
+# 1. "On Mon, Dec 10, 2020 at 10:30 AM John Doe wrote:" (AM/PM format)
+# 2. "On Mon, Dec 10, 2020 Someone wrote:" (year followed by name)
+# 3. Fallback: last words before "wrote:"
+# Groups 1, 2, or 3 will capture the person's name depending on format
 _INLINE_REPLY_HEADER = re.compile(
-    r"^on\s+.+\s+wrote:\s*$",
+    r"^on\s+(?:.+[AP]M\s+(.+?)|.+,\s*\d{4}\s+(.+?)|.+\s+(.+?))\s+wrote:\s*$",
     re.IGNORECASE | re.MULTILINE,
 )
 
@@ -138,6 +154,10 @@ def get_forwarded_email_parts(email_text: str) -> list[str]:
 _SIGNATURE_MARKER = re.compile(r"^--\s*$", re.MULTILINE)
 
 
+# Type alias for chunk with source info
+ChunkWithSource = tuple[str, str | None]  # (text, source: None=original, str=quoted)
+
+
 def is_inline_reply(email_text: str) -> bool:
     """
     Detect if an email contains inline replies (responses interspersed with quotes).
@@ -180,29 +200,36 @@ def is_inline_reply(email_text: str) -> bool:
     return has_quoted and has_non_quoted_after_quoted
 
 
-def extract_inline_reply(email_text: str, include_context: bool = False) -> str:
+def parse_email_chunks(email_text: str) -> list[ChunkWithSource]:
     """
-    Extract reply content from an email with inline responses.
-
-    For emails where the author responds inline to quoted text, this extracts
-    the non-quoted portions (the actual replies).
+    Parse email text into chunks with source attribution.
 
-    Args:
-        email_text: The full email body text
-        include_context: If True, include abbreviated quoted context before each reply
+    Returns a list of (text, source) tuples where:
+    - source is None for original (unquoted) content
+    - source is the quoted person's name for quoted content, or " " if unknown
 
-    Returns:
-        The extracted reply text. If include_context is True, quoted lines are
-        prefixed with "[quoted]" to show what's being replied to.
+    This handles inline replies where the sender responds inline to quoted text,
+    preserving both the quoted and unquoted portions as separate chunks.
     """
     if not email_text:
-        return ""
+        return []
 
     # Find the "On ... wrote:" header
     header_match = _INLINE_REPLY_HEADER.search(email_text)
     if not header_match:
-        # No inline reply pattern, return as-is
-        return email_text
+        # No inline reply pattern, return as a single original chunk
+        text = _strip_trailing_delimiters(email_text)
+        if text:
+            return [(text, None)]
+        return []
+
+    # Extract quoted person from header (first non-None group from groups 1, 2, or 3)
+    quoted_person = (
+        header_match.group(1) or header_match.group(2) or header_match.group(3) or " "
+    )
+    quoted_person = quoted_person.strip() if quoted_person else " "
+    if not quoted_person:
+        quoted_person = " "
 
     # Get preamble (content before the "On ... wrote:" header)
     preamble = email_text[: header_match.start()].strip()
@@ -211,25 +238,37 @@ def extract_inline_reply(email_text: str, include_context: bool = False) -> str:
     content_after_header = email_text[header_match.end() :]
     lines = content_after_header.split("\n")
 
-    result_parts: list[str] = []
+    result: list[ChunkWithSource] = []
     if preamble:
-        result_parts.append(preamble)
+        result.append((preamble, None))
 
     current_reply_lines: list[str] = []
     current_quoted_lines: list[str] = []
     in_signature = False
 
+    def flush_reply() -> None:
+        nonlocal current_reply_lines
+        if current_reply_lines:
+            text = "\n".join(current_reply_lines).strip()
+            if text:
+                result.append((text, None))
+            current_reply_lines = []
+
+    def flush_quoted() -> None:
+        nonlocal current_quoted_lines
+        if current_quoted_lines:
+            text = "\n".join(current_quoted_lines).strip()
+            if text:
+                result.append((text, quoted_person))
+            current_quoted_lines = []
+
     for line in lines:
         # Check for signature marker
         if _SIGNATURE_MARKER.match(line):
             in_signature = True
-            # Flush any pending reply
-            if current_reply_lines:
-                if include_context and current_quoted_lines:
-                    result_parts.append(_summarize_quoted(current_quoted_lines))
-                result_parts.append("\n".join(current_reply_lines))
-                current_reply_lines = []
-                current_quoted_lines = []
+            # Flush any pending content
+            flush_quoted()
+            flush_reply()
             continue
 
         if in_signature:
@@ -237,43 +276,31 @@ def extract_inline_reply(email_text: str, include_context: bool = False) -> str:
             continue
 
         if _QUOTED_LINE.match(line):
-            # This is a quoted line
+            # This is a quoted line - flush any pending reply first
             if current_reply_lines:
-                # Flush the current reply block
-                if include_context and current_quoted_lines:
-                    result_parts.append(_summarize_quoted(current_quoted_lines))
-                result_parts.append("\n".join(current_reply_lines))
-                current_reply_lines = []
-                current_quoted_lines = []
-            # Accumulate quoted lines for context (only if needed)
-            if include_context:
-                # Strip the leading > and any space after it
-                unquoted = re.sub(r"^\s*>\s?", "", line)
-                current_quoted_lines.append(unquoted)
+                flush_reply()
+            # Strip the leading > and any space after it
+            unquoted = re.sub(r"^\s*>\s?", "", line)
+            current_quoted_lines.append(unquoted)
         else:
-            # Non-quoted line - part of the reply
+            # Non-quoted line - flush any pending quoted first
+            if current_quoted_lines:
+                flush_quoted()
+            # Only accumulate non-empty lines or preserve blank lines within a block
             stripped = line.strip()
             if stripped or current_reply_lines:
-                # Include non-empty lines, or preserve blank lines within a reply block
                 current_reply_lines.append(line.rstrip())
 
-    # Flush any remaining reply
-    if current_reply_lines:
-        if include_context and current_quoted_lines:
-            result_parts.append(_summarize_quoted(current_quoted_lines))
-        result_parts.append("\n".join(current_reply_lines))
+    # Flush any remaining content
+    flush_quoted()
+    flush_reply()
 
-    result = "\n\n".join(part for part in result_parts if part.strip())
-    return _strip_trailing_delimiters(result)
+    # Strip trailing delimiters from the last chunk
+    if result:
+        last_text, last_source = result[-1]
+        result[-1] = (_strip_trailing_delimiters(last_text), last_source)
 
-
-def _summarize_quoted(quoted_lines: list[str]) -> str:
-    """Create a brief summary of quoted content for context."""
-    # Join and truncate to provide context
-    text = " ".join(line.strip() for line in quoted_lines if line.strip())
-    if len(text) > 100:
-        text = text[:97] + "..."
-    return f"[In reply to: {text}]"
+    return result
 
 
 def _strip_trailing_delimiters(text: str) -> str:
@@ -282,32 +309,6 @@ def _strip_trailing_delimiters(text: str) -> str:
     return _TRAILING_LINE_DELIMITERS.sub("", text)
 
 
-# Simple way to get the last response on an email thread in MIME format
-def get_last_response_in_thread(email_text: str) -> str:
-    """
-    Extract the latest response from an email thread.
-
-    Handles two patterns:
-    1. Top-posted replies: New content at top, quoted thread at bottom
-    2. Inline replies: Responses interspersed with quoted text
-
-    For inline replies, only the reply portions (non-quoted text) are extracted.
-    """
-    if not email_text:
-        return ""
-
-    # Check for inline reply pattern first
-    if is_inline_reply(email_text):
-        return extract_inline_reply(email_text, include_context=False)
-
-    # Fall back to original behavior for bottom-posted replies
-    match = _THREAD_DELIMITERS.search(email_text)
-    if match:
-        email_text = email_text[: match.start()]
-
-    return _strip_trailing_delimiters(email_text)
-
-
 # Extracts the plain text body from an email.message.Message object.
 def _extract_email_body(msg: Message) -> str:
     """Extracts the plain text body from an email.message.Message object."""
@@ -365,11 +366,11 @@ def _text_to_chunks(text: str, max_chunk_length: int) -> list[str]:
     if len(text) < max_chunk_length:
         return [text]
 
-    paragraphs = _splitIntoParagraphs(text)
+    paragraphs = _split_into_paragraphs(text)
     return list(_merge_chunks(paragraphs, "\n\n", max_chunk_length))
 
 
-def _splitIntoParagraphs(text: str) -> list[str]:
+def _split_into_paragraphs(text: str) -> list[str]:
     return _remove_empty_strings(re.split(r"\n{2,}", text))
 
 
diff --git a/typeagent/emails/email_message.py b/typeagent/emails/email_message.py
index 4b1ec287..69a06eda 100644
--- a/typeagent/emails/email_message.py
+++ b/typeagent/emails/email_message.py
@@ -160,6 +160,12 @@ def __init__(self, **data: Any) -> None:
         super().__init__(**data)
 
     text_chunks: list[str] = CamelCaseField("The text chunks of the email message")
+    # For each chunk: None means original content, str means quoted.
+    # If quoted, the string is the name of the person being quoted, or " " if unknown.
+    chunk_sources: list[str | None] = CamelCaseField(
+        "Source attribution for each chunk: None=original, str=quoted person or ' '",
+        default_factory=list,
+    )
     metadata: EmailMessageMeta = CamelCaseField(
         "Metadata associated with the email message"
     )

From 21282ba6e959ff16cc3dd67530f6b24c4a7b1bbf Mon Sep 17 00:00:00 2001
From: Guido van Rossum <gvanrossum@microsoft.com>
Date: Wed, 3 Dec 2025 15:00:29 -0800
Subject: [PATCH 4/4] Add CODEOWNERS

---
 CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 CODEOWNERS

diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 00000000..bcff3975
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1 @@
+* @gvanrossum @gvanrossum-ms @umeshma @robgruen