From 7ee346537b4f5f4be8ac71f83ee5abf81bf0afbd Mon Sep 17 00:00:00 2001
From: Mateusz Sterczewski <mateusz@cycode.com>
Date: Fri, 23 Jan 2026 14:18:47 +0100
Subject: [PATCH 1/2] CM-57848-Fix UTF encoding when displaying code snippet

---
 cycode/cli/printers/tables/table_printer.py   |  4 +-
 .../cli/printers/utils/code_snippet_syntax.py |  4 +-
 cycode/cli/printers/utils/rich_helpers.py     |  4 +-
 cycode/cli/utils/string_utils.py              |  9 ++
 .../printers/utils/test_rich_encoding_fix.py  | 85 +++++++++++++++++++
 5 files changed, 103 insertions(+), 3 deletions(-)
 create mode 100644 tests/cli/printers/utils/test_rich_encoding_fix.py

diff --git a/cycode/cli/printers/tables/table_printer.py b/cycode/cli/printers/tables/table_printer.py
index 6a5dd198..4468ef9f 100644
--- a/cycode/cli/printers/tables/table_printer.py
+++ b/cycode/cli/printers/tables/table_printer.py
@@ -8,7 +8,7 @@
 from cycode.cli.printers.tables.table_printer_base import TablePrinterBase
 from cycode.cli.printers.utils import is_git_diff_based_scan
 from cycode.cli.printers.utils.detection_ordering.common_ordering import sort_and_group_detections_from_scan_result
-from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
+from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding
 
 if TYPE_CHECKING:
     from cycode.cli.models import LocalScanResult
@@ -96,6 +96,8 @@ def _enrich_table_with_detection_code_segment_values(
             if not self.show_secret:
                 violation = obfuscate_text(violation)
 
+        violation = sanitize_text_for_encoding(violation)
+
         table.add_cell(LINE_NUMBER_COLUMN, str(detection_line))
         table.add_cell(COLUMN_NUMBER_COLUMN, str(detection_column))
         table.add_cell(VIOLATION_LENGTH_COLUMN, f'{violation_length} chars')
diff --git a/cycode/cli/printers/utils/code_snippet_syntax.py b/cycode/cli/printers/utils/code_snippet_syntax.py
index 20f94d4e..57bc084e 100644
--- a/cycode/cli/printers/utils/code_snippet_syntax.py
+++ b/cycode/cli/printers/utils/code_snippet_syntax.py
@@ -5,7 +5,7 @@
 from cycode.cli import consts
 from cycode.cli.console import _SYNTAX_HIGHLIGHT_THEME
 from cycode.cli.printers.utils import is_git_diff_based_scan
-from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
+from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding
 
 if TYPE_CHECKING:
     from cycode.cli.models import Document
@@ -72,6 +72,7 @@ def _get_code_snippet_syntax_from_file(
             code_lines_to_render.append(line_content)
 
     code_to_render = '\n'.join(code_lines_to_render)
+    code_to_render = sanitize_text_for_encoding(code_to_render)
     return _get_syntax_highlighted_code(
         code=code_to_render,
         lexer=Syntax.guess_lexer(document.path, code=code_to_render),
@@ -94,6 +95,7 @@ def _get_code_snippet_syntax_from_git_diff(
         violation = line_content[detection_position_in_line : detection_position_in_line + violation_length]
         line_content = line_content.replace(violation, obfuscate_text(violation))
 
+    line_content = sanitize_text_for_encoding(line_content)
     return _get_syntax_highlighted_code(
         code=line_content,
         lexer='diff',
diff --git a/cycode/cli/printers/utils/rich_helpers.py b/cycode/cli/printers/utils/rich_helpers.py
index 52d2a0f2..6049b211 100644
--- a/cycode/cli/printers/utils/rich_helpers.py
+++ b/cycode/cli/printers/utils/rich_helpers.py
@@ -5,6 +5,7 @@
 from rich.panel import Panel
 
 from cycode.cli.console import console
+from cycode.cli.utils.string_utils import sanitize_text_for_encoding
 
 if TYPE_CHECKING:
     from rich.console import RenderableType
@@ -20,8 +21,9 @@ def get_panel(renderable: 'RenderableType', title: str) -> Panel:
 
 
 def get_markdown_panel(markdown_text: str, title: str) -> Panel:
+    sanitized_text = sanitize_text_for_encoding(markdown_text.strip())
     return get_panel(
-        Markdown(markdown_text.strip()),
+        Markdown(sanitized_text),
         title=title,
     )
 
diff --git a/cycode/cli/utils/string_utils.py b/cycode/cli/utils/string_utils.py
index c3c0c6c6..ac3987f4 100644
--- a/cycode/cli/utils/string_utils.py
+++ b/cycode/cli/utils/string_utils.py
@@ -65,3 +65,12 @@ def shortcut_dependency_paths(dependency_paths_list: str) -> str:
         result += '\n'
 
     return result.rstrip().rstrip(',')
+
+
+def sanitize_text_for_encoding(text: str) -> str:
+    """Sanitize text by replacing surrogate characters and invalid UTF-8 sequences.
+    
+    This prevents encoding errors when Rich tries to display the content, especially on Windows.
+    Surrogate characters (U+D800 to U+DFFF) cannot be encoded to UTF-8 and will cause errors.
+    """
+    return text.encode('utf-8', errors='replace').decode('utf-8')
diff --git a/tests/cli/printers/utils/test_rich_encoding_fix.py b/tests/cli/printers/utils/test_rich_encoding_fix.py
new file mode 100644
index 00000000..e735b9c7
--- /dev/null
+++ b/tests/cli/printers/utils/test_rich_encoding_fix.py
@@ -0,0 +1,85 @@
+"""Tests for Rich encoding fix to handle surrogate characters."""
+
+from io import StringIO
+from unittest.mock import MagicMock
+
+from rich.console import Console
+
+from cycode.cli import consts
+from cycode.cli.models import Document
+from cycode.cli.printers.rich_printer import RichPrinter
+from cycode.cyclient.models import Detection
+
+
+def create_strict_encoding_console() -> tuple[Console, StringIO]:
+    """Create a Console that enforces strict UTF-8 encoding, simulating Windows console behavior.
+    
+    When Rich writes to the console, the file object needs to encode strings to bytes.
+    With errors='strict' (default for TextIOWrapper), this raises UnicodeEncodeError on surrogates.
+    This function simulates that behavior to test the encoding fix.
+    """
+    buffer = StringIO()
+    
+    class StrictEncodingWrapper:
+        def __init__(self, file_obj: StringIO) -> None:
+            self._file = file_obj
+        
+        def write(self, text: str) -> int:
+            """Validate encoding before writing to simulate strict encoding behavior."""
+            text.encode('utf-8')
+            return self._file.write(text)
+        
+        def flush(self) -> None:
+            self._file.flush()
+        
+        def isatty(self) -> bool:
+            return False
+        
+        def __getattr__(self, name: str):
+            # Delegate all other attributes to the underlying file
+            return getattr(self._file, name)
+    
+    strict_file = StrictEncodingWrapper(buffer)
+    console = Console(file=strict_file, width=80, force_terminal=False)
+    return console, buffer
+
+
+def test_rich_printer_handles_surrogate_characters_in_violation_card() -> None:
+    """Test that RichPrinter._print_violation_card() handles surrogate characters without errors.
+    
+    The error occurs in Rich's console._write_buffer() -> write() when console.print() is called.
+    On Windows with strict encoding, this raises UnicodeEncodeError on surrogates.
+    """
+    surrogate_char = chr(0xDC96)
+    document_content = 'A' * 1236 + surrogate_char + 'B' * 100
+    document = Document(
+        path='test.py',
+        content=document_content,
+        is_git_diff_format=False,
+    )
+    
+    detection = Detection(
+        detection_type_id='test-id',
+        type='test-type',
+        message='Test message',
+        detection_details={
+            'description': 'Summary with ' + surrogate_char + ' surrogate character',
+            'policy_display_name': 'Test Policy',
+            'start_position': 1236,
+            'length': 1,
+            'line': 0,
+        },
+        detection_rule_id='test-rule-id',
+        severity='Medium',
+    )
+    
+    mock_ctx = MagicMock()
+    mock_ctx.obj = {
+        'scan_type': consts.SAST_SCAN_TYPE,
+        'show_secret': False,
+    }
+    mock_ctx.info_name = consts.SAST_SCAN_TYPE
+    
+    console, _ = create_strict_encoding_console()
+    printer = RichPrinter(mock_ctx, console, console)
+    printer._print_violation_card(document, detection, 1, 1)

From ed93814177a4b07fd6cccc3c238247fd4f22d9a1 Mon Sep 17 00:00:00 2001
From: Mateusz Sterczewski <mateusz@cycode.com>
Date: Fri, 23 Jan 2026 14:23:32 +0100
Subject: [PATCH 2/2] CM-57848-Fixes

---
 cycode/cli/utils/string_utils.py              |  2 +-
 tests/cli/printers/__init__.py                |  0
 tests/cli/printers/utils/__init__.py          |  0
 .../printers/utils/test_rich_encoding_fix.py  | 25 ++++++++++---------
 4 files changed, 14 insertions(+), 13 deletions(-)
 create mode 100644 tests/cli/printers/__init__.py
 create mode 100644 tests/cli/printers/utils/__init__.py

diff --git a/cycode/cli/utils/string_utils.py b/cycode/cli/utils/string_utils.py
index ac3987f4..06d3a51c 100644
--- a/cycode/cli/utils/string_utils.py
+++ b/cycode/cli/utils/string_utils.py
@@ -69,7 +69,7 @@ def shortcut_dependency_paths(dependency_paths_list: str) -> str:
 
 def sanitize_text_for_encoding(text: str) -> str:
     """Sanitize text by replacing surrogate characters and invalid UTF-8 sequences.
-    
+
     This prevents encoding errors when Rich tries to display the content, especially on Windows.
     Surrogate characters (U+D800 to U+DFFF) cannot be encoded to UTF-8 and will cause errors.
     """
diff --git a/tests/cli/printers/__init__.py b/tests/cli/printers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/cli/printers/utils/__init__.py b/tests/cli/printers/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/cli/printers/utils/test_rich_encoding_fix.py b/tests/cli/printers/utils/test_rich_encoding_fix.py
index e735b9c7..721f1c6a 100644
--- a/tests/cli/printers/utils/test_rich_encoding_fix.py
+++ b/tests/cli/printers/utils/test_rich_encoding_fix.py
@@ -1,6 +1,7 @@
 """Tests for Rich encoding fix to handle surrogate characters."""
 
 from io import StringIO
+from typing import Any
 from unittest.mock import MagicMock
 
 from rich.console import Console
@@ -13,32 +14,32 @@
 
 def create_strict_encoding_console() -> tuple[Console, StringIO]:
     """Create a Console that enforces strict UTF-8 encoding, simulating Windows console behavior.
-    
+
     When Rich writes to the console, the file object needs to encode strings to bytes.
     With errors='strict' (default for TextIOWrapper), this raises UnicodeEncodeError on surrogates.
     This function simulates that behavior to test the encoding fix.
     """
     buffer = StringIO()
-    
+
     class StrictEncodingWrapper:
         def __init__(self, file_obj: StringIO) -> None:
             self._file = file_obj
-        
+
         def write(self, text: str) -> int:
             """Validate encoding before writing to simulate strict encoding behavior."""
             text.encode('utf-8')
             return self._file.write(text)
-        
+
         def flush(self) -> None:
             self._file.flush()
-        
+
         def isatty(self) -> bool:
             return False
-        
-        def __getattr__(self, name: str):
+
+        def __getattr__(self, name: str) -> Any:
             # Delegate all other attributes to the underlying file
             return getattr(self._file, name)
-    
+
     strict_file = StrictEncodingWrapper(buffer)
     console = Console(file=strict_file, width=80, force_terminal=False)
     return console, buffer
@@ -46,7 +47,7 @@ def __getattr__(self, name: str):
 
 def test_rich_printer_handles_surrogate_characters_in_violation_card() -> None:
     """Test that RichPrinter._print_violation_card() handles surrogate characters without errors.
-    
+
     The error occurs in Rich's console._write_buffer() -> write() when console.print() is called.
     On Windows with strict encoding, this raises UnicodeEncodeError on surrogates.
     """
@@ -57,7 +58,7 @@ def test_rich_printer_handles_surrogate_characters_in_violation_card() -> None:
         content=document_content,
         is_git_diff_format=False,
     )
-    
+
     detection = Detection(
         detection_type_id='test-id',
         type='test-type',
@@ -72,14 +73,14 @@ def test_rich_printer_handles_surrogate_characters_in_violation_card() -> None:
         detection_rule_id='test-rule-id',
         severity='Medium',
     )
-    
+
     mock_ctx = MagicMock()
     mock_ctx.obj = {
         'scan_type': consts.SAST_SCAN_TYPE,
         'show_secret': False,
     }
     mock_ctx.info_name = consts.SAST_SCAN_TYPE
-    
+
     console, _ = create_strict_encoding_console()
     printer = RichPrinter(mock_ctx, console, console)
     printer._print_violation_card(document, detection, 1, 1)