Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions docs/package/api-reference/unicode_metadata.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,46 @@ For C2PA v2.2 manifests, it performs full cryptographic verification, including:
- `allow_fallback_extraction`: If True, attempts to extract data from the end of the string if standard extraction fails.

**Returns:**

A tuple of `(is_verified, signer_id, payload)`:
- `is_verified` (`bool`): `True` if the signature and all binding checks (content hash, manifest hash) are valid.
- `signer_id` (`Optional[str]`): The identifier of the key used for signing.
- `payload` (`Union[BasicPayload, ManifestPayload, C2PAPayload, None]`): The extracted and verified inner payload, or `None` on failure.

### `UnicodeMetadata.verify_metadata_with_report`

```python
@classmethod
def verify_metadata_with_report(
cls,
text: str,
public_key_resolver: Callable[[str], Optional[PublicKeyTypes]],
reference_text: Optional[str] = None,
return_payload_on_failure: bool = False,
require_hard_binding: bool = True,
) -> Tuple[
bool,
Optional[str],
Union[BasicPayload, ManifestPayload, C2PAPayload, None],
Optional[str],
]:
```

Extends :meth:`verify_metadata` by optionally returning a short tamper report. If
verification fails and a `reference_text` is provided, the method compares the
current text against that reference and returns a human-friendly diff summary.

**Parameters:**
- `text`: The potentially tampered text.
- `public_key_resolver`: Function to look up the public key for a given signer ID.
- `reference_text`: The original text to compare against for diff generation.
- `return_payload_on_failure`: If `True`, include the payload even when verification fails.
- `require_hard_binding`: Require the hard-binding assertion for C2PA manifests.

**Returns:**
A tuple of `(is_verified, signer_id, payload, report)` where `report` contains a
string summarizing the differences, or `None` if no report could be generated.

### `UnicodeMetadata.extract_metadata`

```python
Expand Down
6 changes: 4 additions & 2 deletions docs/package/user-guide/tamper-detection.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,14 @@ print(f"\nOriginal text: '{original_text}'")
print(f"Tampered text: '{tampered_text.strip()}'")

# Verification on the tampered text will fail because the content hash no longer matches.
is_valid_tampered, _, _ = UnicodeMetadata.verify_metadata(
is_valid_tampered, _, _, diff_report = UnicodeMetadata.verify_metadata_with_report(
text=tampered_text,
public_key_provider=public_key_provider
public_key_provider=public_key_provider,
reference_text=encoded_text,
)

print(f"Verification of tampered text: {'✅ Passed' if is_valid_tampered else '🚨 Failed'}")
print(f"Tamper report: {diff_report}")
```

### C2PA and Tamper Detection
Expand Down
44 changes: 44 additions & 0 deletions encypher/core/unicode_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
serialize_payload,
)
from .signing import extract_payload_from_cose_sign1, sign_c2pa_cose, sign_payload, verify_c2pa_cose, verify_signature
from encypher.utils.diff_utils import generate_diff_report


class UnicodeMetadata:
Expand Down Expand Up @@ -1012,6 +1013,49 @@ def verify_metadata(
cast(Union[BasicPayload, ManifestPayload, C2PAPayload, None], actual_inner_payload) if return_payload_on_failure else None,
)

@classmethod
def verify_metadata_with_report(
cls,
text: str,
public_key_resolver: Callable[[str], Optional[Ed25519PublicKey]],
reference_text: Optional[str] = None,
return_payload_on_failure: bool = False,
require_hard_binding: bool = True,
) -> Tuple[bool, Optional[str], Union[BasicPayload, ManifestPayload, C2PAPayload, None], Optional[str]]:
"""Verify metadata and optionally generate a tamper report.

This method wraps :meth:`verify_metadata` to provide an additional diff
summary when verification fails and a reference text is supplied.

Args:
text: The text with embedded metadata to verify.
public_key_resolver: Function that returns a public key for a signer ID.
reference_text: Optional original text to compare against for diff generation.
return_payload_on_failure: Return the payload even when verification fails.
require_hard_binding: Require the hard binding assertion when verifying C2PA manifests.

Returns:
A tuple of ``(is_valid, signer_id, payload, report)`` where ``report``
contains a human-friendly description of the differences if verification fails
and a reference text was provided.
"""
is_valid, signer_id, payload = cls.verify_metadata(
text=text,
public_key_resolver=public_key_resolver,
return_payload_on_failure=return_payload_on_failure,
require_hard_binding=require_hard_binding,
)

report: Optional[str] = None
if not is_valid and reference_text is not None:
try:
report = generate_diff_report(reference_text, text)
except Exception as e: # pragma: no cover - diff failures shouldn't crash verification
logger.error(f"Failed to generate tamper report: {e}")
report = None

return is_valid, signer_id, payload, report

@classmethod
def _verify_c2pa_v2_2(
cls,
Expand Down
63 changes: 63 additions & 0 deletions encypher/utils/diff_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Utility functions for generating tamper reports."""

from difflib import SequenceMatcher


def generate_diff_report(original: str, modified: str) -> str:
"""Generate a short diff summary between two strings.

This compares both the visible text and embedded metadata bytes.

Args:
original: The reference string believed to be untampered.
modified: The string under verification.

Returns:
A human-friendly summary describing the differences.
"""
text_matcher = SequenceMatcher(None, original, modified)
added_chars = 0
removed_chars = 0
for tag, i1, i2, j1, j2 in text_matcher.get_opcodes():
if tag == "insert":
added_chars += j2 - j1
elif tag == "delete":
removed_chars += i2 - i1
elif tag == "replace":
removed_chars += i2 - i1
added_chars += j2 - j1
from encypher.core.unicode_metadata import UnicodeMetadata

orig_bytes = UnicodeMetadata.extract_bytes(original)
mod_bytes = UnicodeMetadata.extract_bytes(modified)

byte_matcher = SequenceMatcher(None, orig_bytes, mod_bytes)
added_bytes = 0
removed_bytes = 0
for tag, i1, i2, j1, j2 in byte_matcher.get_opcodes():
if tag == "insert":
added_bytes += j2 - j1
elif tag == "delete":
removed_bytes += i2 - i1
elif tag == "replace":
removed_bytes += i2 - i1
added_bytes += j2 - j1

parts = []
if added_chars or removed_chars:
char_parts = []
if removed_chars:
char_parts.append(f"{removed_chars} characters removed")
if added_chars:
char_parts.append(f"{added_chars} characters added")
parts.append("Text changes: " + ", ".join(char_parts))
if added_bytes or removed_bytes:
byte_parts = []
if removed_bytes:
byte_parts.append(f"{removed_bytes} metadata bytes removed")
if added_bytes:
byte_parts.append(f"{added_bytes} metadata bytes added")
parts.append("Metadata changes: " + ", ".join(byte_parts))
if not parts:
return "No changes detected"
return "; ".join(parts)
5 changes: 3 additions & 2 deletions tests/integration/test_c2pa_v2_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,12 @@ def public_key_resolver(signer_id: str) -> Optional[ed25519.Ed25519PublicKey]:
return self.public_key
return None

# Verify the tampered text
is_verified, _, _ = UnicodeMetadata.verify_metadata(tampered_text, public_key_resolver)
# Verify the tampered text with diff report
is_verified, _, _, report = UnicodeMetadata.verify_metadata_with_report(tampered_text, public_key_resolver, reference_text=embedded_text)

# Verification should fail due to content hash mismatch
self.assertFalse(is_verified)
self.assertIsNotNone(report)

def test_c2pa_soft_binding(self):
"""Test soft binding functionality for C2PA v2.2."""
Expand Down