mlomb · brzvsk · Sep 12, 2025 · Sep 12, 2025
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,5 +1,7 @@
 # v1.1.3 (YYYY/MM/DD) [IN DEVELOPMENT]
 
+- Added support for Telegram data exports (all chats) in addition to single-chat exports. Both formats are automatically detected and supported with full backwards compatibility.
+
 # v1.1.2 (2024/08/14)
 
 - Fixed WhatsApp channel type. [Comment](https://github.com/mlomb/chat-analytics/commit/9e25f8bfe3323fc62ce6d7474c3f38d45390358c#r145396724).

diff --git a/pipeline/parse/parsers/Telegram.d.ts b/pipeline/parse/parsers/Telegram.d.ts
@@ -50,3 +50,26 @@ interface TextArray {
         | unknown;
     text: string;
 }
+
+// Single chat export format: { name, type, id, messages: [...] }
+interface TelegramChat {
+    name?: string;
+    type: TelegramChannelType;
+    id: number;
+    messages: TelegramMessage[];
+}
+
+// Multi-chat export format: { chats: { about, list: [TelegramChat, ...] } }
+interface TelegramChatsContainer {
+    about: string;
+    list: TelegramChat[];
+}
+
+interface TelegramDataExport {
+    about?: string;
+    personal_information?: any;
+    profile_pictures?: any[];
+    stories?: any[];
+    contacts?: any;
+    chats: TelegramChatsContainer;
+}
diff --git a/pipeline/parse/parsers/TelegramParser.ts b/pipeline/parse/parsers/TelegramParser.ts
@@ -17,23 +17,129 @@ export class TelegramParser extends Parser {
     /**
      * Regex to find the timestamp of the last message in a Telegram export file.
      * We use the timestamp of the last message as the `at` value (see @Parser)
+     * This pattern matches messages nested anywhere in the JSON structure.
      */
     static readonly TS_MSG_REGEX = /"date(?:_unixtime)?": ?"(.+?)"/gi;
 
     async *parse(file: FileInput, progress?: Progress) {
         this.lastMessageTimestampInFile = await tryToFindTimestampAtEnd(TelegramParser.TS_MSG_REGEX, file);
 
+        // Detect format by reading the beginning of the file
+        const isMultiChatExport = await this.detectMultiChatFormat(file);
+
+        if (isMultiChatExport) {
+            yield* this.parseMultiChatExport(file, progress);
+        } else {
+            yield* this.parseSingleChatExport(file, progress);
+        }
+
+        // Reset state
+        this.lastChannelName = undefined;
+        this.lastChannelID = undefined;
+        this.lastEmittedMessageTimestamp = undefined;
+    }
+
+    /**
+     * Detect if this is a multi-chat export by examining the structure.
+     * Multi-chat exports have: { chats: { list: [...] } }
+     * Single-chat exports have: { name, type, id, messages: [...] }
+     */
+    private async detectMultiChatFormat(file: FileInput): Promise<boolean> {
+        try {
+            // Read first 2MB to detect format without loading the entire file
+            const sampleSize = Math.min(2097152, file.size || 2097152);
+            const buffer = await file.slice(0, sampleSize);
+            const text = new TextDecoder('utf-8').decode(buffer);
+
+            // Simple check: if the file contains "chats" key and "list" key in the root level,
+            // it's likely a multi-chat export. Single-chat exports have "name", "type", "id", "messages".
+            const hasChatsKey = text.includes('"chats"');
+            const hasListKey = text.includes('"list"');
+            const hasNameKey = text.includes('"name"');
+            const hasMessagesKey = text.includes('"messages"');
+
+            // If it has chats and list but is early in the file, it's multi-chat
+            // If it has name and messages early, it's single-chat
+            if (hasChatsKey && hasListKey) {
+                return true;
+            } else if (hasNameKey && hasMessagesKey) {
+                return false;
+            } else {
+                return false;
+            }
+        } catch (err) {
+            // If detection fails, assume single-chat format for backward compatibility
+            return false;
+        }
+    }
+
+    /**
+     * Parse a single-chat export format: { name, type, id, messages: [...] }
+     */
+    private async *parseSingleChatExport(file: FileInput, progress?: Progress) {
         const stream = new JSONStream()
             .onObject<string>("name", this.onChannelName.bind(this))
             .onObject<TelegramChannelType>("type", this.onChannelType.bind(this))
             .onObject<RawID>("id", this.onChannelId.bind(this))
             .onArrayItem<TelegramMessage>("messages", this.parseMessage.bind(this));
 
         yield* streamJSONFromFile(stream, file, progress);
+    }
+
+    /**
+     * Parse a multi-chat export format: { chats: { list: [TelegramChat, ...] } }
+     */
+    private async *parseMultiChatExport(file: FileInput, progress?: Progress) {
+        const stream = new JSONStream()
+            .onObject<TelegramChatsContainer>("chats", this.parseChatsContainer.bind(this));
+
+        yield* streamJSONFromFile(stream, file, progress);
+    }
 
+    /**
+     * Parse the chats container object from multi-chat exports.
+     */
+    private parseChatsContainer(chatsContainer: TelegramChatsContainer) {
+        for (const chat of chatsContainer.list) {
+            this.parseChatObject(chat);
+        }
+    }
+
+    /**
+     * Parse a single chat object. Used by both single-chat and multi-chat formats.
+     * Resets state variables to avoid cross-chat contamination.
+     */
+    private parseChatObject(chat: TelegramChat) {
+        // Reset state for this chat
         this.lastChannelName = undefined;
+        this.lastChannelType = undefined;
         this.lastChannelID = undefined;
         this.lastEmittedMessageTimestamp = undefined;
+
+        // Set chat metadata
+        this.lastChannelName = chat.name;
+        this.lastChannelType = chat.type;
+        this.lastChannelID = chat.id;
+
+        // Emit guild and channel
+        const pguild: PGuild = {
+            id: 0,
+            name: "Telegram Chats",
+        };
+        const pchannel: PChannel = {
+            id: chat.id,
+            guildId: 0,
+            name: chat.name || "Telegram chat",
+            type: ["personal_chat", "bot_chat"].includes(chat.type || "") ? "dm" : "group",
+        };
+
+        this.emit("guild", pguild, this.lastMessageTimestampInFile);
+        this.emit("channel", pchannel, this.lastMessageTimestampInFile);
+
+        // Process all messages in this chat
+        for (const message of chat.messages) {
+            this.parseMessage(message);
+        }
     }
 
     private onChannelName(channelName: string) {

diff --git a/tests/parse/Parsers.test.ts b/tests/parse/Parsers.test.ts
@@ -24,6 +24,7 @@ describe("should parse correctly", () => {
         { parser: WhatsAppParser, inputs: ["whatsapp/4A_11M.zip"] },
 
         { parser: TelegramParser, inputs: ["telegram/DM_2A_7M.json"] },
+        { parser: TelegramParser, inputs: ["telegram/MultiChat_2C_6M.json"] },
 
         { parser: MessengerParser, inputs: ["messenger/2A_7M.json"] },
 
@@ -53,6 +54,7 @@ describe("timestamp of the last message at the end of the file", () => {
         { file: "discord/SV_5A_5M.json", regex: DiscordParser.TS_MSG_REGEX, lastMessageTimestamp: new Date("2018-05-20T16:09:51.118+00:00").getTime() },
 
         { file: "telegram/DM_2A_7M.json", regex: TelegramParser.TS_MSG_REGEX, lastMessageTimestamp: 1691719862 },
+        { file: "telegram/MultiChat_2C_6M.json", regex: TelegramParser.TS_MSG_REGEX, lastMessageTimestamp: 1672747200 },
     ];
 
     test.each(cases)("$file", async ({ file, regex, lastMessageTimestamp }) => {

diff --git a/tests/samples/telegram/MultiChat_2C_6M.json b/tests/samples/telegram/MultiChat_2C_6M.json
@@ -0,0 +1,112 @@
+{
+    "about": "Test multi-chat export",
+    "personal_information": {
+        "user_id": 100000001,
+        "first_name": "Test",
+        "last_name": "User"
+    },
+    "chats": {
+        "about": "This page lists all chats from this export.",
+        "list": [
+            {
+                "name": "Chat One",
+                "type": "personal_chat",
+                "id": 700000001,
+                "messages": [
+                    {
+                        "id": 1001,
+                        "type": "message",
+                        "date": "2023-01-01T10:00:00",
+                        "date_unixtime": "1672574400",
+                        "from": "Alice",
+                        "from_id": "user300000000",
+                        "text": "Hello from chat one!",
+                        "text_entities": [
+                            {
+                                "type": "plain",
+                                "text": "Hello from chat one!"
+                            }
+                        ]
+                    },
+                    {
+                        "id": 1002,
+                        "type": "message",
+                        "date": "2023-01-01T10:01:00",
+                        "date_unixtime": "1672574460",
+                        "from": "Bob",
+                        "from_id": "user700000000",
+                        "text": "Hi there!",
+                        "text_entities": [
+                            {
+                                "type": "plain",
+                                "text": "Hi there!"
+                            }
+                        ]
+                    }
+                ]
+            },
+            {
+                "name": "Chat Two",
+                "type": "private_group",
+                "id": 700000002,
+                "messages": [
+                    {
+                        "id": 2001,
+                        "type": "message",
+                        "date": "2023-01-02T10:00:00",
+                        "date_unixtime": "1672660800",
+                        "from": "Charlie",
+                        "from_id": "user800000000",
+                        "text": "Welcome to chat two!",
+                        "text_entities": [
+                            {
+                                "type": "plain",
+                                "text": "Welcome to chat two!"
+                            }
+                        ]
+                    },
+                    {
+                        "id": 2002,
+                        "type": "message",
+                        "date": "2023-01-02T10:02:00",
+                        "date_unixtime": "1672660920",
+                        "from": "David",
+                        "from_id": "user900000000",
+                        "text": "Group chat is fun!",
+                        "text_entities": [
+                            {
+                                "type": "plain",
+                                "text": "Group chat is fun!"
+                            }
+                        ]
+                    },
+                    {
+                        "id": 2003,
+                        "type": "service",
+                        "date": "2023-01-02T10:03:00",
+                        "date_unixtime": "1672660980",
+                        "actor": "Charlie",
+                        "actor_id": "user800000000",
+                        "action": "phone_call",
+                        "duration_seconds": 120,
+                        "text": "",
+                        "text_entities": []
+                    },
+                    {
+                        "id": 2004,
+                        "type": "message",
+                        "date": "2023-01-03T10:00:00",
+                        "date_unixtime": "1672747200",
+                        "from": "Eve",
+                        "from_id": "user101000000",
+                        "photo": "(File not included. Change data exporting settings to download.)",
+                        "width": 800,
+                        "height": 600,
+                        "text": "",
+                        "text_entities": []
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/tests/samples/telegram/MultiChat_2C_6M.json.ts b/tests/samples/telegram/MultiChat_2C_6M.json.ts
@@ -0,0 +1,65 @@
+import { AttachmentType } from "@pipeline/Attachments";
+
+import type { ExpectedPartialParseResult } from "@tests/parse/Parse";
+import { PGUILD_DEFAULT } from "@tests/samples/telegram/Common";
+
+export const expectedParse: ExpectedPartialParseResult = {
+    guilds: [PGUILD_DEFAULT],
+    channels: [
+        { id: 700000001, guildId: 0, type: "dm", name: "Chat One" },
+        { id: 700000002, guildId: 0, type: "group", name: "Chat Two" },
+    ],
+    authors: [
+        { id: "user300000000", name: "Alice", bot: false },
+        { id: "user700000000", name: "Bob", bot: false },
+        { id: "user800000000", name: "Charlie", bot: false },
+        { id: "user900000000", name: "David", bot: false },
+        { id: "user101000000", name: "Eve", bot: false },
+    ],
+    messages: [
+        {
+            id: "1001",
+            authorId: "user300000000",
+            channelId: 700000001,
+            textContent: "Hello from chat one!",
+            timestamp: 1672574400 * 1000,
+        },
+        {
+            id: "1002",
+            authorId: "user700000000",
+            channelId: 700000001,
+            textContent: "Hi there!",
+            timestamp: 1672574460 * 1000,
+        },
+        {
+            id: "2001",
+            authorId: "user800000000",
+            channelId: 700000002,
+            textContent: "Welcome to chat two!",
+            timestamp: 1672660800 * 1000,
+        },
+        {
+            id: "2002",
+            authorId: "user900000000",
+            channelId: 700000002,
+            textContent: "Group chat is fun!",
+            timestamp: 1672660920 * 1000,
+        },
+        {
+            id: "2004",
+            attachments: [AttachmentType.Image],
+            authorId: "user101000000",
+            channelId: 700000002,
+            timestamp: 1672747200 * 1000,
+        },
+    ],
+    calls: [
+        {
+            id: "2003",
+            authorId: "user800000000",
+            channelId: 700000002,
+            timestampStart: 1672660980 * 1000,
+            timestampEnd: (1672660980 + 120) * 1000,
+        },
+    ],
+};