From 062e8dc0c7a7bd36259328977d7dd217100eb8f9 Mon Sep 17 00:00:00 2001 From: Erik Svilich Date: Wed, 2 Jul 2025 10:25:23 -0700 Subject: [PATCH] docs: document tamper report --- .../package/api-reference/unicode_metadata.md | 35 +++++++++++ docs/package/user-guide/tamper-detection.md | 6 +- encypher/core/unicode_metadata.py | 44 +++++++++++++ encypher/utils/diff_utils.py | 63 +++++++++++++++++++ tests/integration/test_c2pa_v2_2.py | 5 +- 5 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 encypher/utils/diff_utils.py diff --git a/docs/package/api-reference/unicode_metadata.md b/docs/package/api-reference/unicode_metadata.md index fc97bf7..8b20ac1 100644 --- a/docs/package/api-reference/unicode_metadata.md +++ b/docs/package/api-reference/unicode_metadata.md @@ -128,11 +128,46 @@ For C2PA v2.2 manifests, it performs full cryptographic verification, including: - `allow_fallback_extraction`: If True, attempts to extract data from the end of the string if standard extraction fails. **Returns:** + A tuple of `(is_verified, signer_id, payload)`: - `is_verified` (`bool`): `True` if the signature and all binding checks (content hash, manifest hash) are valid. - `signer_id` (`Optional[str]`): The identifier of the key used for signing. - `payload` (`Union[BasicPayload, ManifestPayload, C2PAPayload, None]`): The extracted and verified inner payload, or `None` on failure. +### `UnicodeMetadata.verify_metadata_with_report` + +```python +@classmethod +def verify_metadata_with_report( + cls, + text: str, + public_key_resolver: Callable[[str], Optional[PublicKeyTypes]], + reference_text: Optional[str] = None, + return_payload_on_failure: bool = False, + require_hard_binding: bool = True, +) -> Tuple[ + bool, + Optional[str], + Union[BasicPayload, ManifestPayload, C2PAPayload, None], + Optional[str], +]: +``` + +Extends :meth:`verify_metadata` by optionally returning a short tamper report. If +verification fails and a `reference_text` is provided, the method compares the +current text against that reference and returns a human-friendly diff summary. + +**Parameters:** +- `text`: The potentially tampered text. +- `public_key_resolver`: Function to look up the public key for a given signer ID. +- `reference_text`: The original text to compare against for diff generation. +- `return_payload_on_failure`: If `True`, include the payload even when verification fails. +- `require_hard_binding`: Require the hard-binding assertion for C2PA manifests. + +**Returns:** +A tuple of `(is_verified, signer_id, payload, report)` where `report` contains a +string summarizing the differences, or `None` if no report could be generated. + ### `UnicodeMetadata.extract_metadata` ```python diff --git a/docs/package/user-guide/tamper-detection.md b/docs/package/user-guide/tamper-detection.md index 763be81..b5bebfb 100644 --- a/docs/package/user-guide/tamper-detection.md +++ b/docs/package/user-guide/tamper-detection.md @@ -69,12 +69,14 @@ print(f"\nOriginal text: '{original_text}'") print(f"Tampered text: '{tampered_text.strip()}'") # Verification on the tampered text will fail because the content hash no longer matches. -is_valid_tampered, _, _ = UnicodeMetadata.verify_metadata( +is_valid_tampered, _, _, diff_report = UnicodeMetadata.verify_metadata_with_report( text=tampered_text, - public_key_provider=public_key_provider + public_key_provider=public_key_provider, + reference_text=encoded_text, ) print(f"Verification of tampered text: {'✅ Passed' if is_valid_tampered else '🚨 Failed'}") +print(f"Tamper report: {diff_report}") ``` ### C2PA and Tamper Detection diff --git a/encypher/core/unicode_metadata.py b/encypher/core/unicode_metadata.py index 03e7e1e..b39b0a3 100644 --- a/encypher/core/unicode_metadata.py +++ b/encypher/core/unicode_metadata.py @@ -40,6 +40,7 @@ serialize_payload, ) from .signing import extract_payload_from_cose_sign1, sign_c2pa_cose, sign_payload, verify_c2pa_cose, verify_signature +from encypher.utils.diff_utils import generate_diff_report class UnicodeMetadata: @@ -1012,6 +1013,49 @@ def verify_metadata( cast(Union[BasicPayload, ManifestPayload, C2PAPayload, None], actual_inner_payload) if return_payload_on_failure else None, ) + @classmethod + def verify_metadata_with_report( + cls, + text: str, + public_key_resolver: Callable[[str], Optional[Ed25519PublicKey]], + reference_text: Optional[str] = None, + return_payload_on_failure: bool = False, + require_hard_binding: bool = True, + ) -> Tuple[bool, Optional[str], Union[BasicPayload, ManifestPayload, C2PAPayload, None], Optional[str]]: + """Verify metadata and optionally generate a tamper report. + + This method wraps :meth:`verify_metadata` to provide an additional diff + summary when verification fails and a reference text is supplied. + + Args: + text: The text with embedded metadata to verify. + public_key_resolver: Function that returns a public key for a signer ID. + reference_text: Optional original text to compare against for diff generation. + return_payload_on_failure: Return the payload even when verification fails. + require_hard_binding: Require the hard binding assertion when verifying C2PA manifests. + + Returns: + A tuple of ``(is_valid, signer_id, payload, report)`` where ``report`` + contains a human-friendly description of the differences if verification fails + and a reference text was provided. + """ + is_valid, signer_id, payload = cls.verify_metadata( + text=text, + public_key_resolver=public_key_resolver, + return_payload_on_failure=return_payload_on_failure, + require_hard_binding=require_hard_binding, + ) + + report: Optional[str] = None + if not is_valid and reference_text is not None: + try: + report = generate_diff_report(reference_text, text) + except Exception as e: # pragma: no cover - diff failures shouldn't crash verification + logger.error(f"Failed to generate tamper report: {e}") + report = None + + return is_valid, signer_id, payload, report + @classmethod def _verify_c2pa_v2_2( cls, diff --git a/encypher/utils/diff_utils.py b/encypher/utils/diff_utils.py new file mode 100644 index 0000000..0c21451 --- /dev/null +++ b/encypher/utils/diff_utils.py @@ -0,0 +1,63 @@ +"""Utility functions for generating tamper reports.""" + +from difflib import SequenceMatcher + + +def generate_diff_report(original: str, modified: str) -> str: + """Generate a short diff summary between two strings. + + This compares both the visible text and embedded metadata bytes. + + Args: + original: The reference string believed to be untampered. + modified: The string under verification. + + Returns: + A human-friendly summary describing the differences. + """ + text_matcher = SequenceMatcher(None, original, modified) + added_chars = 0 + removed_chars = 0 + for tag, i1, i2, j1, j2 in text_matcher.get_opcodes(): + if tag == "insert": + added_chars += j2 - j1 + elif tag == "delete": + removed_chars += i2 - i1 + elif tag == "replace": + removed_chars += i2 - i1 + added_chars += j2 - j1 + from encypher.core.unicode_metadata import UnicodeMetadata + + orig_bytes = UnicodeMetadata.extract_bytes(original) + mod_bytes = UnicodeMetadata.extract_bytes(modified) + + byte_matcher = SequenceMatcher(None, orig_bytes, mod_bytes) + added_bytes = 0 + removed_bytes = 0 + for tag, i1, i2, j1, j2 in byte_matcher.get_opcodes(): + if tag == "insert": + added_bytes += j2 - j1 + elif tag == "delete": + removed_bytes += i2 - i1 + elif tag == "replace": + removed_bytes += i2 - i1 + added_bytes += j2 - j1 + + parts = [] + if added_chars or removed_chars: + char_parts = [] + if removed_chars: + char_parts.append(f"{removed_chars} characters removed") + if added_chars: + char_parts.append(f"{added_chars} characters added") + parts.append("Text changes: " + ", ".join(char_parts)) + if added_bytes or removed_bytes: + byte_parts = [] + if removed_bytes: + byte_parts.append(f"{removed_bytes} metadata bytes removed") + if added_bytes: + byte_parts.append(f"{added_bytes} metadata bytes added") + parts.append("Metadata changes: " + ", ".join(byte_parts)) + if not parts: + return "No changes detected" + return "; ".join(parts) diff --git a/tests/integration/test_c2pa_v2_2.py b/tests/integration/test_c2pa_v2_2.py index cfd9a65..f2fc01b 100644 --- a/tests/integration/test_c2pa_v2_2.py +++ b/tests/integration/test_c2pa_v2_2.py @@ -155,11 +155,12 @@ def public_key_resolver(signer_id: str) -> Optional[ed25519.Ed25519PublicKey]: return self.public_key return None - # Verify the tampered text - is_verified, _, _ = UnicodeMetadata.verify_metadata(tampered_text, public_key_resolver) + # Verify the tampered text with diff report + is_verified, _, _, report = UnicodeMetadata.verify_metadata_with_report(tampered_text, public_key_resolver, reference_text=embedded_text) # Verification should fail due to content hash mismatch self.assertFalse(is_verified) + self.assertIsNotNone(report) def test_c2pa_soft_binding(self): """Test soft binding functionality for C2PA v2.2."""