diff --git a/README.md b/README.md
index 932741c..8d082c9 100644
--- a/README.md
+++ b/README.md
@@ -175,7 +175,14 @@ Each line in `conversations.jsonl` is one session:
   "start_time": "2025-06-15T10:00:00+00:00",
   "end_time": "2025-06-15T10:30:00+00:00",
   "messages": [
-    {"role": "user", "content": "Fix the login bug", "timestamp": "..."},
+    {
+      "role": "user",
+      "content": "Fix the login bug",
+      "content_parts": [
+        {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "..."}}
+      ],
+      "timestamp": "..."
+    },
     {
       "role": "assistant",
       "content": "I'll investigate the login flow.",
@@ -201,6 +208,8 @@ Each line in `conversations.jsonl` is one session:
 }
 ```
 
+`messages[].content_parts` is optional and preserves structured user content such as attachments when the source provides them. The canonical human-readable user text remains in `messages[].content`.
+
 `tool_uses[].output.raw` is optional and preserves extra structured tool-result fields when the source provides them. The canonical human-readable result text remains in `tool_uses[].output.text`.
 
 Each HF repo also includes a `metadata.json` with aggregate stats.
diff --git a/dataclaw/_cli/exporting.py b/dataclaw/_cli/exporting.py
index 16d49ff..d7c6bf8 100644
--- a/dataclaw/_cli/exporting.py
+++ b/dataclaw/_cli/exporting.py
@@ -1,5 +1,7 @@
 """Export and publish helpers for the DataClaw CLI."""
 
+import hashlib
+import json as std_json
 import sys
 import urllib.error
 import urllib.request
@@ -12,6 +14,17 @@
 from .common import HF_TAG, REPO_URL, SKILL_URL, _format_token_count, _provider_dataset_tags
 
 
+def _gemini_dedupe_fingerprint(session: dict, source: str) -> str | None:
+    if source != "gemini":
+        return None
+
+    canonical = dict(session)
+    canonical["source"] = source
+    canonical.pop("project", None)
+    payload = std_json.dumps(canonical, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(payload.encode()).hexdigest()
+
+
 def export_to_jsonl(
     selected_projects: list[dict],
     output_path: Path,
@@ -28,6 +41,7 @@ def export_to_jsonl(
     total_input_tokens = 0
     total_output_tokens = 0
     project_names = []
+    seen_fingerprints: set[str] = set()
 
     try:
         fh = open(output_path, "wb")
@@ -46,14 +60,22 @@ def export_to_jsonl(
             )
             proj_count = 0
             for session in sessions:
+                source = session.get("source") or project.get("source", default_source)
                 model = session.get("model")
                 if not model or model == "<synthetic>":
                     skipped += 1
                     continue
 
+                fingerprint = _gemini_dedupe_fingerprint(session, source)
+                if fingerprint is not None and fingerprint in seen_fingerprints:
+                    continue
+
                 session, n_redacted = redact_session(session, custom_strings=custom_strings)
                 total_redactions += n_redacted
 
+                if fingerprint is not None:
+                    seen_fingerprints.add(fingerprint)
+
                 f.write(json.dumps_bytes(session))
                 f.write(b"\n")
                 total += 1
diff --git a/dataclaw/parsers/gemini.py b/dataclaw/parsers/gemini.py
index 436cea1..302472c 100644
--- a/dataclaw/parsers/gemini.py
+++ b/dataclaw/parsers/gemini.py
@@ -1,12 +1,21 @@
 import hashlib
 import logging
 import os
+from collections import defaultdict, deque
 from pathlib import Path
 from typing import Any, Callable
 
 from .. import _json as json
 from ..anonymizer import Anonymizer
-from .common import collect_project_sessions, make_session_result, make_stats, update_time_bounds
+from ..secrets import should_skip_large_binary_string
+from .common import (
+    anonymize_value,
+    collect_project_sessions,
+    make_session_result,
+    make_stats,
+    parse_tool_input,
+    update_time_bounds,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -282,6 +291,145 @@ def parse_tool_call(tool_call: dict, anonymizer: Anonymizer) -> dict:
     return {"tool": name, "input": inp, "output": out, "status": status}
 
 
+def anonymize_text_preserving_blobs(
+    text: Any,
+    anonymizer: Anonymizer,
+    *,
+    strip: bool = False,
+    drop_empty: bool = True,
+) -> str | None:
+    if not isinstance(text, str):
+        return None
+    if should_skip_large_binary_string(text):
+        return text
+    normalized = text.strip() if strip else text
+    if drop_empty and not normalized.strip():
+        return None
+    return anonymizer.text(normalized)
+
+
+def build_gemini_call_id(name: str, args: Any, counters: dict[str, int]) -> str:
+    counters[name] += 1
+    return f"fc_{name}_{counters[name]}"
+
+
+def anonymize_file_uri(file_uri: Any, anonymizer: Anonymizer) -> str | None:
+    if not isinstance(file_uri, str):
+        return None
+    if file_uri.startswith("file://"):
+        return f"file://{anonymizer.path(file_uri[7:])}"
+    return anonymizer.text(file_uri)
+
+
+def parse_gemini_user_part(
+    part: Any,
+    anonymizer: Anonymizer,
+    pending_call_ids: dict[str, deque[str]],
+    call_counters: dict[str, int],
+) -> tuple[str | None, dict[str, Any] | None]:
+    if isinstance(part, str):
+        text = anonymize_text_preserving_blobs(part, anonymizer, drop_empty=False)
+        if text is None:
+            return None, None
+        if should_skip_large_binary_string(part):
+            return None, {"type": "text", "text": text}
+        return text, None
+
+    if not isinstance(part, dict):
+        return None, None
+
+    if "text" in part:
+        text = anonymize_text_preserving_blobs(part.get("text"), anonymizer, drop_empty=False)
+        if text is None:
+            return None, None
+        if should_skip_large_binary_string(part.get("text", "")):
+            return None, {"type": "text", "text": text}
+        return text, None
+
+    inline = part.get("inlineData")
+    if isinstance(inline, dict):
+        mime_type = inline.get("mimeType", "")
+        return None, {
+            "type": "image" if isinstance(mime_type, str) and mime_type.startswith("image/") else "document",
+            "source": {
+                "type": "base64",
+                "media_type": mime_type,
+                "data": inline.get("data", ""),
+            },
+        }
+
+    file_data = part.get("fileData")
+    if isinstance(file_data, dict):
+        source: dict[str, Any] = {"type": "url"}
+        url = anonymize_file_uri(file_data.get("fileUri"), anonymizer)
+        if url:
+            source["url"] = url
+        mime_type = file_data.get("mimeType")
+        if mime_type:
+            source["media_type"] = mime_type
+        return None, {"type": "document", "source": source}
+
+    function_call = part.get("functionCall")
+    if isinstance(function_call, dict):
+        name = function_call.get("name", "unknown")
+        args = function_call.get("args", {})
+        call_id = function_call.get("id") or build_gemini_call_id(name, args, call_counters)
+        pending_call_ids[name].append(call_id)
+        return None, {
+            "type": "tool_use",
+            "id": call_id,
+            "name": name,
+            "input": parse_tool_input(name, args, anonymizer),
+        }
+
+    function_response = part.get("functionResponse")
+    if isinstance(function_response, dict):
+        name = function_response.get("name", "unknown")
+        tool_use_id = function_response.get("id") or (
+            pending_call_ids[name].popleft() if pending_call_ids.get(name) else f"fc_{name}"
+        )
+        response = function_response.get("response")
+        content: Any = None
+        if isinstance(response, dict) and "output" in response:
+            content = anonymize_text_preserving_blobs(response.get("output"), anonymizer)
+        elif response is not None:
+            content = anonymize_value("response", response, anonymizer)
+        part_result: dict[str, Any] = {"type": "tool_result", "tool_use_id": tool_use_id}
+        if content not in (None, "", [], {}):
+            part_result["content"] = content
+        return None, part_result
+
+    return None, None
+
+
+def parse_gemini_user_content(content: Any, anonymizer: Anonymizer) -> tuple[str | None, list[dict[str, Any]]]:
+    if isinstance(content, str):
+        text = anonymize_text_preserving_blobs(content, anonymizer, drop_empty=False)
+        if text is None:
+            return None, []
+        if should_skip_large_binary_string(content):
+            return None, [{"type": "text", "text": text}]
+        return text, []
+
+    if not isinstance(content, list):
+        return None, []
+
+    text_parts: list[str] = []
+    content_parts: list[dict[str, Any]] = []
+    pending_call_ids: dict[str, deque[str]] = defaultdict(deque)
+    call_counters: dict[str, int] = defaultdict(int)
+
+    for part in content:
+        text, content_part = parse_gemini_user_part(part, anonymizer, pending_call_ids, call_counters)
+        if text is not None:
+            text_parts.append(text)
+        if content_part:
+            content_parts.append(content_part)
+
+    text_content = "\n".join(text_parts) if text_parts else None
+    return text_content, content_parts
+
+
 def parse_session_file(
     filepath: Path,
     anonymizer: Anonymizer,
@@ -313,23 +461,15 @@ def parse_session_file(
         timestamp = msg_data.get("timestamp")
 
         if msg_type == "user":
-            content = msg_data.get("content")
-            if isinstance(content, list):
-                text_parts = [part.get("text", "") for part in content if isinstance(part, dict) and "text" in part]
-                text = "\n".join(text_parts)
-            elif isinstance(content, str):
-                text = content
-            else:
-                continue
-            if not text.strip():
+            text, content_parts = parse_gemini_user_content(msg_data.get("content"), anonymizer)
+            if text is None and not content_parts:
                 continue
-            messages.append(
-                {
-                    "role": "user",
-                    "content": anonymizer.text(text.strip()),
-                    "timestamp": timestamp,
-                }
-            )
+            message: dict[str, Any] = {"role": "user", "timestamp": timestamp}
+            if text is not None:
+                message["content"] = text
+            if content_parts:
+                message["content_parts"] = content_parts
+            messages.append(message)
             stats["user_messages"] += 1
             update_time_bounds(metadata, timestamp)
 
diff --git a/dataclaw/secrets.py b/dataclaw/secrets.py
index 81d71cd..139ea2b 100644
--- a/dataclaw/secrets.py
+++ b/dataclaw/secrets.py
@@ -310,6 +310,9 @@ def redact_session(session: dict, custom_strings: list[str] | None = None) -> tu
                 if custom_strings:
                     msg[field], count = redact_custom_strings(msg[field], custom_strings)
                     total += count
+        if msg.get("content_parts"):
+            msg["content_parts"], count = _redact_value(msg["content_parts"], custom_strings)
+            total += count
         for tool_use in msg.get("tool_uses", []):
             for field in ("input", "output"):
                 if tool_use.get(field):
diff --git a/docs/gemini-vs-cchv-gaps.md b/docs/gemini-vs-cchv-gaps.md
new file mode 100644
index 0000000..20c997e
--- /dev/null
+++ b/docs/gemini-vs-cchv-gaps.md
@@ -0,0 +1,280 @@
+# Gemini CLI Gaps vs CCHV
+
+## Scope
+
+This note compares Gemini CLI handling in:
+
+- DataClaw: `~/dataclaw`
+- Claude Code History Viewer (CCHV): `~/claude-code-history-viewer`
+
+The goal is to identify Gemini CLI data that CCHV captures more faithfully than DataClaw today, while also noting important cases where DataClaw retains data that CCHV skips.
+
+## Summary
+
+CCHV preserves more Gemini message/event structure than DataClaw in several places.
+
+The biggest current DataClaw gaps are:
+
+1. It drops Gemini `info` / `warning` / `error` messages.
+2. It drops `resultDisplay` tool UI output, including file-diff previews and tool-status strings.
+3. It drops non-text user content parts such as `inlineData` images/documents.
+4. It drops part-level `functionResponse` blocks embedded in message content.
+5. It only keeps session-level token totals, while CCHV keeps per-message Gemini token usage.
+6. It does not preserve top-level Gemini session metadata such as `summary` and `kind`.
+
+Important counterpoint:
+
+- CCHV explicitly skips Gemini sessions whose top-level `kind` is `subagent`, while DataClaw currently exports them as normal sessions.
+
+## Detailed Findings
+
+### 1. CCHV keeps `info` / `warning` / `error` messages; DataClaw drops them
+
+CCHV converts Gemini message records with types:
+
+- `user`
+- `gemini`
+- `info`
+- `warning`
+- `error`
+
+References:
+
+- CCHV Gemini message dispatch: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:464-485`
+- CCHV system-message conversion: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:630-653`
+
+DataClaw only exports:
+
+- `user`
+- `gemini`
+
+References:
+
+- DataClaw Gemini parser message handling: `~/dataclaw/dataclaw/parsers/gemini.py:311-375`
+
+Practical consequence:
+
+- DataClaw drops Gemini informational and error messages that CCHV exposes as system messages.
+
+Observed in real Gemini data on this machine:
+
+- `835` `info` messages
+- `29` `error` messages
+
+Example real file:
+
+- `~/.gemini/tmp/comfyui-featherops/chats/session-2026-03-24T08-56-51cb7147.json:10,16,1111`
+
+### 2. CCHV keeps `resultDisplay`; DataClaw ignores it
+
+CCHV converts Gemini `toolCalls[].resultDisplay` into extra content blocks.
+
+References:
+
+- CCHV includes `resultDisplay` during Gemini tool-call conversion: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:592-597`
+- CCHV `extract_result_display(...)`: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:893-928`
+
+DataClaw's Gemini parser never reads `resultDisplay`.
+
+References:
+
+- DataClaw Gemini tool-call parsing: `~/dataclaw/dataclaw/parsers/gemini.py:162-282`
+
+Practical consequence:
+
+- DataClaw drops user-visible tool UI output, including:
+  - short status strings such as `Found 4 matching file(s)`
+  - read-file previews such as `Read lines 50-150 ...`
+  - file-diff previews stored in `resultDisplay.fileDiff`
+  - potential subagent-progress markers (`isSubagentProgress`) if they appear
+
+Observed in real Gemini data on this machine:
+
+- `4386` string `resultDisplay` values
+- `854` object `resultDisplay` values containing:
+  - `fileDiff`
+  - `fileName`
+  - `filePath`
+  - `originalContent`
+  - `newContent`
+  - `diffStat`
+  - `isNewFile`
+
+Example real file with file-diff previews:
+
+- `~/.gemini/tmp/comfyui-featherops/chats/session-2026-03-28T01-43-f9f3aa2a.json:305-306,350-351,395-396,440-441`
+
+### 3. CCHV keeps non-text Gemini content parts; DataClaw drops them in user messages
+
+CCHV converts Gemini content parts such as:
+
+- `inlineData` image/document blocks
+- `fileData` URL-backed document blocks
+- plain text parts
+- `functionCall`
+- `functionResponse`
+- `executableCode`
+- `codeExecutionResult`
+
+References:
+
+- CCHV content conversion helpers: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:660-867`
+
+DataClaw's Gemini parser, for `user` messages, only extracts parts containing `text` and drops the rest.
+
+References:
+
+- DataClaw user-message extraction: `~/dataclaw/dataclaw/parsers/gemini.py:315-323`
+
+Practical consequence:
+
+- DataClaw drops user attachments and other structured Gemini content parts that CCHV preserves.
+
+Observed in real Gemini data on this machine:
+
+- real `inlineData` image attachments exist in user messages, for example:
+  `~/.gemini/tmp/rocm-systems/chats/session-2026-03-06T03-33-68bc726c.json:4764,4794,5006,5036`
+
+These include large base64 image payloads that CCHV maps to image/document blocks.
+
+### 4. CCHV keeps part-level `functionResponse` blocks; DataClaw drops them when embedded in content
+
+CCHV converts `functionResponse` parts in Gemini content into `tool_result` blocks.
+
+References:
+
+- CCHV `functionResponse` conversion in `convert_gemini_content_to_claude(...)`: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:691-709`
+- CCHV direct `functionResponse` part conversion: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:813-830`
+
+DataClaw does not preserve these when they appear inside a message `content` array, because its user-message parser keeps only text parts.
+
+References:
+
+- DataClaw user-message extraction: `~/dataclaw/dataclaw/parsers/gemini.py:315-323`
+
+Practical consequence:
+
+- DataClaw loses some Gemini tool-result structure that is encoded directly in content parts rather than only in `toolCalls[].result`.
+
+Observed in real Gemini data on this machine:
+
+- real `functionResponse` content parts exist, for example in:
+  `~/.gemini/tmp/rocm-systems/chats/session-2026-03-06T03-33-68bc726c.json:122,147,172,231`
+
+### 5. CCHV keeps per-message token usage; DataClaw only keeps session totals
+
+CCHV stores Gemini per-message token usage derived directly from each Gemini response record's `tokens` field.
+
+References:
+
+- CCHV Gemini usage extraction: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:601-608,616-627`
+
+DataClaw only aggregates token counts into session-level `stats`.
+
+References:
+
+- DataClaw Gemini token aggregation: `~/dataclaw/dataclaw/parsers/gemini.py:340-343`
+- DataClaw normalized session shape: `~/dataclaw/dataclaw/parsers/common.py:56-71`
+
+Practical consequence:
+
+- DataClaw loses per-message Gemini usage, including cached-input attribution on individual assistant responses.
+
+### 6. CCHV keeps more Gemini session metadata than DataClaw exports
+
+CCHV extracts Gemini session metadata including:
+
+- `session_id`
+- `kind`
+- `start_time`
+- `last_updated`
+- `message_count`
+- `has_tool_use`
+- `summary`
+
+References:
+
+- CCHV Gemini lightweight metadata extraction: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:388-457`
+
+DataClaw exports a smaller normalized session shape and does not preserve top-level Gemini `summary` or `kind`.
+
+References:
+
+- DataClaw Gemini metadata initialization: `~/dataclaw/dataclaw/parsers/gemini.py:300-308`
+- DataClaw normalized session shape: `~/dataclaw/dataclaw/parsers/common.py:56-71`
+
+Practical consequence:
+
+- DataClaw loses Gemini session metadata that CCHV surfaces in its session index/browser.
+
+Observed in real Gemini data on this machine:
+
+- `122` Gemini session files have top-level `summary`
+- top-level `kind` values present include:
+  - `main`
+  - `subagent`
+
+## Important Counterpoint: CCHV skips Gemini `kind == "subagent"` sessions, but DataClaw exports them
+
+This is an important Gemini difference in the opposite direction.
+
+CCHV explicitly skips Gemini sessions whose top-level `kind` is `subagent` in both:
+
+- project/session listing
+- search
+
+References:
+
+- CCHV session listing skip: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:167-169`
+- CCHV project scan skip: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:85-88`
+- CCHV search skip: `~/claude-code-history-viewer/src-tauri/src/providers/gemini.rs:283-286`
+
+DataClaw does not filter by `kind`, so it includes Gemini subagent sessions if they exist as chat files.
+
+References:
+
+- DataClaw Gemini discovery and parse paths read all `session-*.json` files: `~/dataclaw/dataclaw/parsers/gemini.py:123-159`
+
+Observed in real Gemini data on this machine:
+
+- there is exactly one real Gemini chat file with `kind: "subagent"`:
+  `~/.gemini/tmp/tmp/chats/session-2026-03-05T03-59-51c63ffc.json:82`
+- DataClaw successfully parses and exports that session.
+
+So, unlike the Claude comparison, Gemini is not a simple one-way story where CCHV always preserves more.
+
+## Observed In Real Gemini Data On This Machine
+
+The following real Gemini structures exist on this machine and matter for the comparison:
+
+- `info` / `error` message types:
+  `~/.gemini/tmp/comfyui-featherops/chats/session-2026-03-24T08-56-51cb7147.json:10,16,1111`
+
+- `resultDisplay.fileDiff` edit previews:
+  `~/.gemini/tmp/comfyui-featherops/chats/session-2026-03-28T01-43-f9f3aa2a.json:305-306,350-351,395-396,440-441`
+
+- user `inlineData` image attachments:
+  `~/.gemini/tmp/rocm-systems/chats/session-2026-03-06T03-33-68bc726c.json:4764,4794,5006,5036`
+
+- content-part `functionResponse` blocks:
+  `~/.gemini/tmp/rocm-systems/chats/session-2026-03-06T03-33-68bc726c.json:122,147,172,231`
+
+- a real Gemini subagent session:
+  `~/.gemini/tmp/tmp/chats/session-2026-03-05T03-59-51c63ffc.json:82`
+
+These are all real, present data shapes, not just theoretical parser code paths.
+
+## Bottom Line
+
+Compared with CCHV, DataClaw currently loses more Gemini fidelity around:
+
+- `info` / `warning` / `error` messages
+- `resultDisplay` tool UI output
+- non-text content parts such as `inlineData`
+- part-level `functionResponse` blocks
+- per-message token usage
+- top-level session metadata like `summary` and `kind`
+
+But CCHV also has one notable Gemini omission that DataClaw does not:
+
+- CCHV skips Gemini `kind == "subagent"` sessions from its normal session views, while DataClaw exports them.
diff --git a/tests/test_cli_exporting.py b/tests/test_cli_exporting.py
index 1519364..7319982 100644
--- a/tests/test_cli_exporting.py
+++ b/tests/test_cli_exporting.py
@@ -148,6 +148,73 @@ def test_skips_none_model(self, tmp_path, mock_anonymizer):
         assert meta["sessions"] == 0
         assert meta["skipped"] == 1
 
+    def test_dedupes_identical_gemini_sessions_ignoring_project_label(self, tmp_path, mock_anonymizer):
+        output = tmp_path / "out.jsonl"
+        session_upper = {
+            "session_id": "g1",
+            "model": "gemini-2.5-pro",
+            "git_branch": None,
+            "start_time": "2026-01-01T00:00:00Z",
+            "end_time": "2026-01-01T00:01:00Z",
+            "messages": [{"role": "user", "content": "hi"}],
+            "stats": {"input_tokens": 1, "output_tokens": 2},
+            "project": "gemini:ComfyUI",
+            "source": "gemini",
+        }
+        session_lower = {**session_upper, "project": "gemini:comfyui"}
+        projects = [
+            {"dir_name": "upper", "display_name": "gemini:ComfyUI", "source": "gemini"},
+            {"dir_name": "lower", "display_name": "gemini:comfyui", "source": "gemini"},
+        ]
+
+        def parse_project_sessions(*args, **kwargs):
+            return [session_upper] if args[0] == "upper" else [session_lower]
+
+        meta = export_to_jsonl(
+            projects,
+            output,
+            mock_anonymizer,
+            parse_project_sessions_fn=parse_project_sessions,
+            default_source="gemini",
+        )
+
+        lines = output.read_text().strip().split("\n")
+        assert len(lines) == 1
+        assert meta["sessions"] == 1
+
+    def test_keeps_distinct_gemini_snapshots(self, tmp_path, mock_anonymizer):
+        output = tmp_path / "out.jsonl"
+        session_old = {
+            "session_id": "g1",
+            "model": "gemini-2.5-pro",
+            "git_branch": None,
+            "start_time": "2026-01-01T00:00:00Z",
+            "end_time": "2026-01-01T00:01:00Z",
+            "messages": [{"role": "user", "content": "short"}],
+            "stats": {"input_tokens": 1, "output_tokens": 2},
+            "project": "gemini:comfyui",
+            "source": "gemini",
+        }
+        session_new = {
+            **session_old,
+            "end_time": "2026-01-01T00:02:00Z",
+            "messages": [{"role": "user", "content": "longer"}],
+            "stats": {"input_tokens": 3, "output_tokens": 4},
+        }
+        projects = [{"dir_name": "proj", "display_name": "gemini:comfyui", "source": "gemini"}]
+
+        meta = export_to_jsonl(
+            projects,
+            output,
+            mock_anonymizer,
+            parse_project_sessions_fn=lambda *args, **kwargs: [session_old, session_new],
+            default_source="gemini",
+        )
+
+        lines = output.read_text().strip().split("\n")
+        assert len(lines) == 2
+        assert meta["sessions"] == 2
+
 
 class TestPushToHuggingface:
     def test_missing_huggingface_hub(self, tmp_path, monkeypatch):
diff --git a/tests/test_parser_gemini.py b/tests/test_parser_gemini.py
new file mode 100644
index 0000000..77ff790
--- /dev/null
+++ b/tests/test_parser_gemini.py
@@ -0,0 +1,210 @@
+"""Tests for Gemini parser behavior."""
+
+from dataclaw import _json as json
+from dataclaw.parsers.gemini import parse_session_file
+
+
+class TestParseGeminiUserContentParts:
+    def test_user_text_parts_preserve_whitespace_and_empty_parts(self, tmp_path, mock_anonymizer):
+        session_file = tmp_path / "session-gemini.json"
+        session_file.write_text(
+            json.dumps(
+                {
+                    "sessionId": "gemini-session-0",
+                    "startTime": "2026-03-24T12:00:00Z",
+                    "lastUpdated": "2026-03-24T12:00:01Z",
+                    "messages": [
+                        {
+                            "type": "user",
+                            "timestamp": "2026-03-24T12:00:00Z",
+                            "content": [
+                                {"text": "Alpha"},
+                                {"text": ""},
+                                {"text": "  "},
+                                {"text": "Beta  "},
+                            ],
+                        }
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = parse_session_file(session_file, mock_anonymizer)
+
+        assert result is not None
+        message = result["messages"][0]
+        assert message["content"] == "Alpha\n\n  \nBeta  "
+        assert "content_parts" not in message
+
+    def test_user_string_content_preserves_outer_whitespace(self, tmp_path, mock_anonymizer):
+        session_file = tmp_path / "session-gemini.json"
+        session_file.write_text(
+            json.dumps(
+                {
+                    "sessionId": "gemini-session-whitespace",
+                    "startTime": "2026-03-24T12:00:00Z",
+                    "lastUpdated": "2026-03-24T12:00:01Z",
+                    "messages": [
+                        {
+                            "type": "user",
+                            "timestamp": "2026-03-24T12:00:00Z",
+                            "content": "  padded request  ",
+                        }
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = parse_session_file(session_file, mock_anonymizer)
+
+        assert result is not None
+        assert result["messages"][0]["content"] == "  padded request  "
+
+    def test_all_whitespace_user_text_parts_are_not_dropped(self, tmp_path, mock_anonymizer):
+        session_file = tmp_path / "session-gemini.json"
+        session_file.write_text(
+            json.dumps(
+                {
+                    "sessionId": "gemini-session-blank",
+                    "startTime": "2026-03-24T12:00:00Z",
+                    "lastUpdated": "2026-03-24T12:00:01Z",
+                    "messages": [
+                        {
+                            "type": "user",
+                            "timestamp": "2026-03-24T12:00:00Z",
+                            "content": [
+                                {"text": "   "},
+                                {"text": ""},
+                            ],
+                        }
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = parse_session_file(session_file, mock_anonymizer)
+
+        assert result is not None
+        assert result["messages"][0]["content"] == "   \n"
+        assert result["stats"]["user_messages"] == 1
+
+    def test_user_inline_data_preserved_without_duplicate_text(self, tmp_path, mock_anonymizer):
+        session_file = tmp_path / "session-gemini.json"
+        session_file.write_text(
+            json.dumps(
+                {
+                    "sessionId": "gemini-session-1",
+                    "startTime": "2026-03-24T12:00:00Z",
+                    "lastUpdated": "2026-03-24T12:00:01Z",
+                    "messages": [
+                        {
+                            "type": "user",
+                            "timestamp": "2026-03-24T12:00:00Z",
+                            "content": [
+                                {"text": "Please inspect this screenshot."},
+                                {"inlineData": {"mimeType": "image/png", "data": "QUJDRA=="}},
+                            ],
+                        }
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = parse_session_file(session_file, mock_anonymizer)
+
+        assert result is not None
+        message = result["messages"][0]
+        assert message["content"] == "Please inspect this screenshot."
+        assert message["content_parts"] == [
+            {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/png",
+                    "data": "QUJDRA==",
+                },
+            }
+        ]
+
+    def test_user_function_parts_preserved_and_linked(self, tmp_path, mock_anonymizer):
+        session_file = tmp_path / "session-gemini.json"
+        session_file.write_text(
+            json.dumps(
+                {
+                    "sessionId": "gemini-session-2",
+                    "startTime": "2026-03-24T12:00:00Z",
+                    "lastUpdated": "2026-03-24T12:00:01Z",
+                    "messages": [
+                        {
+                            "type": "user",
+                            "timestamp": "2026-03-24T12:00:00Z",
+                            "content": [
+                                {"text": "Use the read result below."},
+                                {
+                                    "functionCall": {
+                                        "name": "read_file",
+                                        "args": {"file_path": "/Users/testuser/Documents/myproject/src/app.py"},
+                                    }
+                                },
+                                {
+                                    "functionResponse": {
+                                        "name": "read_file",
+                                        "response": {"output": "print('hello')"},
+                                    }
+                                },
+                            ],
+                        }
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = parse_session_file(session_file, mock_anonymizer)
+
+        assert result is not None
+        message = result["messages"][0]
+        assert message["content"] == "Use the read result below."
+        assert len(message["content_parts"]) == 2
+        tool_use, tool_result = message["content_parts"]
+        assert tool_use["type"] == "tool_use"
+        assert tool_use["name"] == "read_file"
+        assert "testuser" not in tool_use["input"]["file_path"]
+        assert tool_result == {
+            "type": "tool_result",
+            "tool_use_id": tool_use["id"],
+            "content": "print('hello')",
+        }
+
+    def test_large_blob_string_content_preserved_in_content_parts(self, tmp_path, mock_anonymizer):
+        blob = "data:image/png;base64," + ("A" * 5000)
+        session_file = tmp_path / "session-gemini.json"
+        session_file.write_text(
+            json.dumps(
+                {
+                    "sessionId": "gemini-session-3",
+                    "startTime": "2026-03-24T12:00:00Z",
+                    "lastUpdated": "2026-03-24T12:00:01Z",
+                    "messages": [
+                        {
+                            "type": "user",
+                            "timestamp": "2026-03-24T12:00:00Z",
+                            "content": blob,
+                        }
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = parse_session_file(session_file, mock_anonymizer)
+
+        assert result is not None
+        message = result["messages"][0]
+        assert "content" not in message
+        assert message["content_parts"] == [{"type": "text", "text": blob}]
+        assert result["stats"]["user_messages"] == 1
diff --git a/tests/test_secrets.py b/tests/test_secrets.py
index 44139ee..ddec5ed 100644
--- a/tests/test_secrets.py
+++ b/tests/test_secrets.py
@@ -662,6 +662,23 @@ def test_none_content_skipped(self):
         result, count = redact_session(session)
         assert count == 0
 
+    def test_redacts_content_parts_and_preserves_blob_payloads(self):
+        blob = "data:image/png;base64," + ("A" * 5000)
+        session = {
+            "messages": [
+                {
+                    "content_parts": [
+                        {"type": "tool_result", "content": "Key: sk-ant-api03-abcdefghijklmnopqrstuvwxyz"},
+                        {"type": "image", "source": {"type": "base64", "data": blob}},
+                    ]
+                }
+            ]
+        }
+        result, count = redact_session(session)
+        assert REDACTED in result["messages"][0]["content_parts"][0]["content"]
+        assert result["messages"][0]["content_parts"][1]["source"]["data"] == blob
+        assert count >= 1
+
 
 class TestLargeBinarySkipping:
     def test_detects_large_base64_blob(self):