Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cycode/cli/printers/tables/table_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cycode.cli.printers.tables.table_printer_base import TablePrinterBase
from cycode.cli.printers.utils import is_git_diff_based_scan
from cycode.cli.printers.utils.detection_ordering.common_ordering import sort_and_group_detections_from_scan_result
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding

if TYPE_CHECKING:
from cycode.cli.models import LocalScanResult
Expand Down Expand Up @@ -96,6 +96,8 @@ def _enrich_table_with_detection_code_segment_values(
if not self.show_secret:
violation = obfuscate_text(violation)

violation = sanitize_text_for_encoding(violation)

table.add_cell(LINE_NUMBER_COLUMN, str(detection_line))
table.add_cell(COLUMN_NUMBER_COLUMN, str(detection_column))
table.add_cell(VIOLATION_LENGTH_COLUMN, f'{violation_length} chars')
Expand Down
4 changes: 3 additions & 1 deletion cycode/cli/printers/utils/code_snippet_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cycode.cli import consts
from cycode.cli.console import _SYNTAX_HIGHLIGHT_THEME
from cycode.cli.printers.utils import is_git_diff_based_scan
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding

if TYPE_CHECKING:
from cycode.cli.models import Document
Expand Down Expand Up @@ -72,6 +72,7 @@ def _get_code_snippet_syntax_from_file(
code_lines_to_render.append(line_content)

code_to_render = '\n'.join(code_lines_to_render)
code_to_render = sanitize_text_for_encoding(code_to_render)
return _get_syntax_highlighted_code(
code=code_to_render,
lexer=Syntax.guess_lexer(document.path, code=code_to_render),
Expand All @@ -94,6 +95,7 @@ def _get_code_snippet_syntax_from_git_diff(
violation = line_content[detection_position_in_line : detection_position_in_line + violation_length]
line_content = line_content.replace(violation, obfuscate_text(violation))

line_content = sanitize_text_for_encoding(line_content)
return _get_syntax_highlighted_code(
code=line_content,
lexer='diff',
Expand Down
4 changes: 3 additions & 1 deletion cycode/cli/printers/utils/rich_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from rich.panel import Panel

from cycode.cli.console import console
from cycode.cli.utils.string_utils import sanitize_text_for_encoding

if TYPE_CHECKING:
from rich.console import RenderableType
Expand All @@ -20,8 +21,9 @@ def get_panel(renderable: 'RenderableType', title: str) -> Panel:


def get_markdown_panel(markdown_text: str, title: str) -> Panel:
sanitized_text = sanitize_text_for_encoding(markdown_text.strip())
return get_panel(
Markdown(markdown_text.strip()),
Markdown(sanitized_text),
title=title,
)

Expand Down
9 changes: 9 additions & 0 deletions cycode/cli/utils/string_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,12 @@ def shortcut_dependency_paths(dependency_paths_list: str) -> str:
result += '\n'

return result.rstrip().rstrip(',')


def sanitize_text_for_encoding(text: str) -> str:
"""Sanitize text by replacing surrogate characters and invalid UTF-8 sequences.

This prevents encoding errors when Rich tries to display the content, especially on Windows.
Surrogate characters (U+D800 to U+DFFF) cannot be encoded to UTF-8 and will cause errors.
"""
return text.encode('utf-8', errors='replace').decode('utf-8')
Empty file.
Empty file.
86 changes: 86 additions & 0 deletions tests/cli/printers/utils/test_rich_encoding_fix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Tests for Rich encoding fix to handle surrogate characters."""

from io import StringIO
from typing import Any
from unittest.mock import MagicMock

from rich.console import Console

from cycode.cli import consts
from cycode.cli.models import Document
from cycode.cli.printers.rich_printer import RichPrinter
from cycode.cyclient.models import Detection


def create_strict_encoding_console() -> tuple[Console, StringIO]:
"""Create a Console that enforces strict UTF-8 encoding, simulating Windows console behavior.

When Rich writes to the console, the file object needs to encode strings to bytes.
With errors='strict' (default for TextIOWrapper), this raises UnicodeEncodeError on surrogates.
This function simulates that behavior to test the encoding fix.
"""
buffer = StringIO()

class StrictEncodingWrapper:
def __init__(self, file_obj: StringIO) -> None:
self._file = file_obj

def write(self, text: str) -> int:
"""Validate encoding before writing to simulate strict encoding behavior."""
text.encode('utf-8')
return self._file.write(text)

def flush(self) -> None:
self._file.flush()

def isatty(self) -> bool:
return False

def __getattr__(self, name: str) -> Any:
# Delegate all other attributes to the underlying file
return getattr(self._file, name)

strict_file = StrictEncodingWrapper(buffer)
console = Console(file=strict_file, width=80, force_terminal=False)
return console, buffer


def test_rich_printer_handles_surrogate_characters_in_violation_card() -> None:
"""Test that RichPrinter._print_violation_card() handles surrogate characters without errors.

The error occurs in Rich's console._write_buffer() -> write() when console.print() is called.
On Windows with strict encoding, this raises UnicodeEncodeError on surrogates.
"""
surrogate_char = chr(0xDC96)
document_content = 'A' * 1236 + surrogate_char + 'B' * 100
document = Document(
path='test.py',
content=document_content,
is_git_diff_format=False,
)

detection = Detection(
detection_type_id='test-id',
type='test-type',
message='Test message',
detection_details={
'description': 'Summary with ' + surrogate_char + ' surrogate character',
'policy_display_name': 'Test Policy',
'start_position': 1236,
'length': 1,
'line': 0,
},
detection_rule_id='test-rule-id',
severity='Medium',
)

mock_ctx = MagicMock()
mock_ctx.obj = {
'scan_type': consts.SAST_SCAN_TYPE,
'show_secret': False,
}
mock_ctx.info_name = consts.SAST_SCAN_TYPE

console, _ = create_strict_encoding_console()
printer = RichPrinter(mock_ctx, console, console)
printer._print_violation_card(document, detection, 1, 1)