From ccb2a6cf6812fc91aa1822d12e0e7f32b2261b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20M=C3=A9sz=C3=A1ros=20=28Laptop=29?= Date: Fri, 13 Feb 2026 17:31:49 +0100 Subject: [PATCH] Add unicode width strategy option --- README.md | 7 ++ examples/showcase.zig | 32 +++--- src/core/context.zig | 16 +++ src/core/program.zig | 27 +++++ src/terminal/ansi.zig | 5 + src/terminal/terminal.zig | 219 ++++++++++++++++++++++++++++++++++++++ src/unicode.zig | 92 +++++++++++++++- tests/unicode_tests.zig | 2 + 8 files changed, 381 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 8fcd86f..6f9df45 100644 --- a/README.md +++ b/README.md @@ -426,6 +426,7 @@ var program = try zz.Program(Model).initWithOptions(gpa.allocator(), .{ .cursor = false, // Show cursor .bracketed_paste = true, // Enable bracketed paste mode .kitty_keyboard = false, // Enable Kitty keyboard protocol + .unicode_width_strategy = null, // null=auto, .legacy_wcwidth, .unicode .suspend_enabled = true, // Enable Ctrl+Z suspend/resume .title = "My App", // Window title .log_file = "debug.log", // Debug log file path @@ -434,6 +435,12 @@ var program = try zz.Program(Model).initWithOptions(gpa.allocator(), .{ }); ``` +Unicode width strategy can also be overridden per-process with `ZZ_UNICODE_WIDTH=auto|legacy|unicode`. +By default (`null`/`auto`), ZigZag: +- probes DEC mode `2027` and enables it when available, +- probes kitty text-sizing support, +- applies terminal/multiplexer heuristics (e.g. tmux/screen/zellij favor legacy width). + ### Custom Event Loop For applications that need to do other work between frames (network polling, background processing, etc.), use `start()` + `tick()` instead of `run()`: diff --git a/examples/showcase.zig b/examples/showcase.zig index 32aba66..5716f12 100644 --- a/examples/showcase.zig +++ b/examples/showcase.zig @@ -692,25 +692,25 @@ const Model = struct { const cjk_box = try cjk_style.render(alloc, cjk_content); - // -- Emoji Box -- - var emoji_header_style = zz.Style{}; - emoji_header_style = emoji_header_style.bold(true); - emoji_header_style = emoji_header_style.fg(zz.Color.hex("#4ECDC4")); - emoji_header_style = emoji_header_style.inline_style(true); - const emoji_header = try emoji_header_style.render(alloc, "Emoji"); + // -- Symbol Box -- + var symbol_header_style = zz.Style{}; + symbol_header_style = symbol_header_style.bold(true); + symbol_header_style = symbol_header_style.fg(zz.Color.hex("#4ECDC4")); + symbol_header_style = symbol_header_style.inline_style(true); + const symbol_header = try symbol_header_style.render(alloc, "Symbols"); - var emoji_style = zz.Style{}; - emoji_style = emoji_style.borderAll(zz.Border.rounded); - emoji_style = emoji_style.borderForeground(zz.Color.hex("#4ECDC4")); - emoji_style = emoji_style.paddingLeft(1).paddingRight(1); - emoji_style = emoji_style.width(30); + var symbol_style = zz.Style{}; + symbol_style = symbol_style.borderAll(zz.Border.rounded); + symbol_style = symbol_style.borderForeground(zz.Color.hex("#4ECDC4")); + symbol_style = symbol_style.paddingLeft(1).paddingRight(1); + symbol_style = symbol_style.width(30); - const emoji_content = try std.fmt.allocPrint(alloc, "{s}\n\n \u{1F680} Rocket \u{2615} Coffee\n \u{1F4A9} Fun \u{2B50} Star\n \u{1F600} Grinning \u{2764} Heart", .{emoji_header}); + const symbol_content = try std.fmt.allocPrint(alloc, "{s}\n\n \u{03B1}\u{03B2}\u{03B3}\u{03B4}\u{03B5} Greek letters\n \u{2211}\u{221A}\u{2260}\u{2264}\u{2265} Math symbols\n \u{2605}\u{2606}\u{00A7}\u{00B6}\u{00B0} Misc symbols", .{symbol_header}); - const emoji_box = try emoji_style.render(alloc, emoji_content); + const symbol_box = try symbol_style.render(alloc, symbol_content); // -- Top row -- - const top_row = try zz.joinHorizontal(alloc, &.{ cjk_box, " ", emoji_box }); + const top_row = try zz.joinHorizontal(alloc, &.{ cjk_box, " ", symbol_box }); // -- Fullwidth/Halfwidth comparison box -- var fw_header_style = zz.Style{}; @@ -756,7 +756,7 @@ const Model = struct { align_style = align_style.borderForeground(zz.Color.green()); align_style = align_style.paddingLeft(1).paddingRight(1); - const align_content = try std.fmt.allocPrint(alloc, "{s}\n\n |hello | 5 cols\n |\u{4F60}\u{597D} | 4 cols (2 wide chars)\n |\u{1F680}\u{1F4A9}\u{2615} | 6 cols (3 emojis)\n |caf\u{00E9} | 4 cols (precomposed)\n |cafe\u{0301} | 4 cols (combining)", .{align_header}); + const align_content = try std.fmt.allocPrint(alloc, "{s}\n\n |hello | 5 cols\n |\u{4F60}\u{597D} | 4 cols (2 wide chars)\n |\u{03B1}\u{03B2}\u{03B3}\u{03B4} | 4 cols (Greek)\n |caf\u{00E9} | 4 cols (precomposed)\n |cafe\u{0301} | 4 cols (combining)", .{align_header}); const align_box = try align_style.render(alloc, align_content); @@ -768,7 +768,7 @@ const Model = struct { hint_style = hint_style.fg(zz.Color.gray(10)); hint_style = hint_style.italic(true); hint_style = hint_style.inline_style(true); - const hint = try hint_style.render(alloc, "All text above is laid out using Unicode-aware display width."); + const hint = try hint_style.render(alloc, "Unicode width is terminal-dependent; this tab uses width-stable samples."); return zz.joinVertical(alloc, &.{ top_row, "", mid_row, "", align_box, "", hint }); } diff --git a/src/core/context.zig b/src/core/context.zig index ac70e2c..7c97215 100644 --- a/src/core/context.zig +++ b/src/core/context.zig @@ -4,6 +4,7 @@ const std = @import("std"); const Terminal = @import("../terminal/terminal.zig").Terminal; const color_mod = @import("../style/color.zig"); +const unicode_mod = @import("../unicode.zig"); const Logger = @import("log.zig").Logger; /// Runtime context passed to init, update, and view functions @@ -41,6 +42,15 @@ pub const Context = struct { /// Whether the terminal has a dark background is_dark_background: bool, + /// Active Unicode width strategy for text measurement + unicode_width_strategy: unicode_mod.WidthStrategy, + + /// Whether DEC mode 2027 was successfully negotiated + terminal_mode_2027: bool, + + /// Whether kitty text sizing support was detected + kitty_text_sizing: bool, + /// Access to internal state (for advanced use) _terminal: ?*Terminal, @@ -68,6 +78,9 @@ pub const Context = struct { .color_256 = profile.supports256(), .color_profile = profile, .is_dark_background = color_mod.hasDarkBackground(), + .unicode_width_strategy = unicode_mod.getWidthStrategy(), + .terminal_mode_2027 = false, + .kitty_text_sizing = false, ._terminal = null, }; } @@ -153,6 +166,9 @@ pub const Options = struct { /// Enable Kitty keyboard protocol kitty_keyboard: bool = false, + /// Force Unicode width strategy (`null` = auto-detect) + unicode_width_strategy: ?unicode_mod.WidthStrategy = null, + /// Enable suspend/resume with Ctrl+Z suspend_enabled: bool = true, }; diff --git a/src/core/program.zig b/src/core/program.zig index 9d7c0d2..670d9d8 100644 --- a/src/core/program.zig +++ b/src/core/program.zig @@ -11,6 +11,7 @@ const Options = @import("context.zig").Options; const message = @import("message.zig"); const command = @import("command.zig"); const Logger = @import("log.zig").Logger; +const unicode = @import("../unicode.zig"); pub const Cmd = command.Cmd; pub const Msg = message; @@ -164,6 +165,13 @@ pub fn Program(comptime Model: type) type { self.context.height = size.rows; self.context._terminal = &self.terminal.?; + const width_caps = self.terminal.?.getUnicodeWidthCapabilities(); + const effective_width_strategy = self.resolveUnicodeWidthStrategy(width_caps.strategy); + self.context.unicode_width_strategy = effective_width_strategy; + self.context.terminal_mode_2027 = width_caps.mode_2027; + self.context.kitty_text_sizing = width_caps.kitty_text_sizing; + unicode.setWidthStrategy(effective_width_strategy); + // Initialize the model const init_cmd = self.model.init(&self.context); try self.processCommand(init_cmd); @@ -324,6 +332,25 @@ pub fn Program(comptime Model: type) type { return null; } + fn resolveUnicodeWidthStrategy(self: *const Self, detected: unicode.WidthStrategy) unicode.WidthStrategy { + if (self.options.unicode_width_strategy) |forced| { + return forced; + } + if (envUnicodeWidthOverride()) |from_env| { + return from_env; + } + return detected; + } + + fn envUnicodeWidthOverride() ?unicode.WidthStrategy { + const raw = std.process.getEnvVarOwned(std.heap.page_allocator, "ZZ_UNICODE_WIDTH") catch return null; + defer std.heap.page_allocator.free(raw); + if (std.ascii.eqlIgnoreCase(raw, "unicode")) return .unicode; + if (std.ascii.eqlIgnoreCase(raw, "legacy")) return .legacy_wcwidth; + if (std.ascii.eqlIgnoreCase(raw, "auto")) return null; + return null; + } + fn processMouseEvent(self: *Self, mouse_event: keyboard.MouseEvent) ?UserCmd { if (@hasField(UserMsg, "mouse")) { const user_msg = UserMsg{ .mouse = mouse_event }; diff --git a/src/terminal/ansi.zig b/src/terminal/ansi.zig index 21756fb..56cb1f0 100644 --- a/src/terminal/ansi.zig +++ b/src/terminal/ansi.zig @@ -39,6 +39,11 @@ pub const bracketed_paste_disable = CSI ++ "?2004l"; pub const sync_start = CSI ++ "?2026h"; pub const sync_end = CSI ++ "?2026l"; +// Unicode width mode (DECRQM/DECSET private mode 2027) +pub const unicode_width_mode_query = CSI ++ "?2027$p"; +pub const unicode_width_mode_enable = CSI ++ "?2027h"; +pub const unicode_width_mode_disable = CSI ++ "?2027l"; + // Kitty keyboard protocol pub const kitty_keyboard_enable = CSI ++ ">1u"; pub const kitty_keyboard_disable = CSI ++ " 0) { + @memcpy(collected[collected_len .. collected_len + copy_len], chunk[0..copy_len]); + collected_len += copy_len; + } + + if (parseMode2027Response(collected[0..collected_len])) |supported| { + return supported; + } + } + + return false; + } + + fn parseMode2027Response(bytes: []const u8) ?bool { + const prefix = "\x1b[?2027;"; + var search_from: usize = 0; + + while (search_from < bytes.len) { + const start = std.mem.indexOfPos(u8, bytes, search_from, prefix) orelse return null; + var i = start + prefix.len; + var param: usize = 0; + var saw_digit = false; + + while (i < bytes.len and bytes[i] >= '0' and bytes[i] <= '9') : (i += 1) { + saw_digit = true; + param = param * 10 + (bytes[i] - '0'); + } + + if (!saw_digit) { + search_from = start + 1; + continue; + } + + if (i + 1 < bytes.len and bytes[i] == '$' and bytes[i + 1] == 'y') { + return param != 0; + } + + search_from = start + 1; + } + + return null; + } + + fn selectWidthStrategy(self: *const Terminal) unicode.WidthStrategy { + if (isInsideMultiplexer()) { + return .legacy_wcwidth; + } + + if (self.unicode_width_caps.mode_2027) { + return .unicode; + } + + if (self.unicode_width_caps.kitty_text_sizing) { + return .unicode; + } + + if (isKnownUnicodeWidthTerminal()) { + return .unicode; + } + + return .legacy_wcwidth; + } + + fn queryKittyTextSizingSupport(self: *Terminal) !bool { + if (!looksLikeKittyTerminal()) return false; + + const cpr = "\x1b[6n"; + // CR, CPR, draw 2-cell space via kitty OSC 66 width-only, CPR. + const probe = "\r" ++ cpr ++ "\x1b]66;w=2; \x07" ++ cpr; + try self.writeBytes(probe); + try self.flush(); + + var buf: [512]u8 = undefined; + var len: usize = 0; + const deadline_ms = std.time.milliTimestamp() + 250; + + while (std.time.milliTimestamp() < deadline_ms) { + var chunk: [128]u8 = undefined; + const n = self.readInput(&chunk, 40) catch 0; + if (n == 0) continue; + + const copy_len = @min(n, buf.len - len); + if (copy_len > 0) { + @memcpy(buf[len .. len + copy_len], chunk[0..copy_len]); + len += copy_len; + } + + if (parseTwoCprColumns(buf[0..len])) |cols| { + return cols.second == cols.first + 2; + } + } + + return false; + } + + fn parseTwoCprColumns(bytes: []const u8) ?struct { first: usize, second: usize } { + var idx: usize = 0; + var found: [2]usize = .{ 0, 0 }; + var count: usize = 0; + + while (idx < bytes.len and count < 2) { + const esc = std.mem.indexOfPos(u8, bytes, idx, "\x1b[") orelse break; + var i = esc + 2; + + while (i < bytes.len and bytes[i] >= '0' and bytes[i] <= '9') : (i += 1) {} + if (i >= bytes.len or bytes[i] != ';') { + idx = esc + 1; + continue; + } + i += 1; + + const col_start = i; + while (i < bytes.len and bytes[i] >= '0' and bytes[i] <= '9') : (i += 1) {} + if (i >= bytes.len or bytes[i] != 'R' or col_start == i) { + idx = esc + 1; + continue; + } + + const col = std.fmt.parseInt(usize, bytes[col_start..i], 10) catch { + idx = esc + 1; + continue; + }; + found[count] = col; + count += 1; + idx = i + 1; + } + + if (count == 2) { + return .{ .first = found[0], .second = found[1] }; + } + return null; + } + + fn isInsideMultiplexer() bool { + return envVarExists("TMUX") or envVarExists("ZELLIJ") or envVarContains("TERM", "screen"); + } + + fn isKnownUnicodeWidthTerminal() bool { + // Terminals known to use grapheme-aware width by default. + return envVarEquals("TERM_PROGRAM", "WezTerm") or + envVarEquals("TERM_PROGRAM", "iTerm.app") or + envVarContains("TERM", "wezterm") or + envVarContains("TERM", "ghostty"); + } + + fn looksLikeKittyTerminal() bool { + return envVarExists("KITTY_WINDOW_ID") or envVarContains("TERM", "kitty"); + } + + fn envVarExists(name: []const u8) bool { + const value = std.process.getEnvVarOwned(std.heap.page_allocator, name) catch return false; + defer std.heap.page_allocator.free(value); + return value.len > 0; + } + + fn envVarEquals(name: []const u8, expected: []const u8) bool { + const value = std.process.getEnvVarOwned(std.heap.page_allocator, name) catch return false; + defer std.heap.page_allocator.free(value); + return std.ascii.eqlIgnoreCase(value, expected); + } + + fn envVarContains(name: []const u8, needle: []const u8) bool { + const value = std.process.getEnvVarOwned(std.heap.page_allocator, name) catch return false; + defer std.heap.page_allocator.free(value); + return std.mem.indexOf(u8, value, needle) != null; + } + /// Simple writer struct for compatibility pub const Writer = struct { terminal: *Terminal, diff --git a/src/unicode.zig b/src/unicode.zig index 6631b63..3f9cedf 100644 --- a/src/unicode.zig +++ b/src/unicode.zig @@ -1,10 +1,96 @@ //! Unicode utilities for display width calculation. +const std = @import("std"); + pub const display_width = @import("unicode/display_width.zig"); -pub const charWidth = display_width.charWidth; -pub const codepointWidth = display_width.codepointWidth; -pub const strWidth = display_width.strWidth; + +/// Runtime width strategy. +/// - legacy_wcwidth: conservative fallback for terminals without negotiated width support. +/// - unicode: full Unicode table behavior. +pub const WidthStrategy = enum(u8) { + legacy_wcwidth, + unicode, +}; + +var width_strategy: std.atomic.Value(u8) = std.atomic.Value(u8).init(@intFromEnum(WidthStrategy.legacy_wcwidth)); + +pub fn setWidthStrategy(strategy: WidthStrategy) void { + width_strategy.store(@intFromEnum(strategy), .release); +} + +pub fn getWidthStrategy() WidthStrategy { + return @enumFromInt(width_strategy.load(.acquire)); +} + +pub fn charWidth(codepoint: u21) usize { + return switch (getWidthStrategy()) { + .legacy_wcwidth => charWidthLegacy(codepoint), + .unicode => display_width.charWidth(codepoint), + }; +} + +pub fn codepointWidth(codepoint: u21) i8 { + return switch (getWidthStrategy()) { + .unicode => @as(i8, display_width.codepointWidth(codepoint)), + .legacy_wcwidth => blk: { + const w = display_width.codepointWidth(codepoint); + if (w <= 0) break :blk @as(i8, w); + if (w == 2 and isLegacyAmbiguousWide(codepoint)) break :blk 1; + break :blk @as(i8, w); + }, + }; +} + +pub fn strWidth(str: []const u8) usize { + var total: usize = 0; + var i: usize = 0; + while (i < str.len) { + const len = std.unicode.utf8ByteSequenceLength(str[i]) catch { + total += 1; + i += 1; + continue; + }; + if (i + len > str.len) { + total += 1; + i += 1; + continue; + } + const cp = std.unicode.utf8Decode(str[i..][0..len]) catch { + total += 1; + i += 1; + continue; + }; + total += charWidth(cp); + i += len; + } + return total; +} + +fn charWidthLegacy(codepoint: u21) usize { + const w = display_width.charWidth(codepoint); + if (w == 2 and isLegacyAmbiguousWide(codepoint)) return 1; + return w; +} + +/// Legacy terminals often render many BMP symbol/emoji codepoints as narrow. +fn isLegacyAmbiguousWide(codepoint: u21) bool { + return (codepoint >= 0x2300 and codepoint <= 0x23FF) or + (codepoint >= 0x2600 and codepoint <= 0x27BF) or + (codepoint >= 0x2B00 and codepoint <= 0x2BFF); +} test { _ = display_width; } + +test "legacy strategy narrows ambiguous bmp symbols" { + setWidthStrategy(.legacy_wcwidth); + defer setWidthStrategy(.unicode); + try std.testing.expectEqual(@as(usize, 1), charWidth(0x2764)); // Heart +} + +test "unicode strategy keeps full table behavior" { + setWidthStrategy(.unicode); + defer setWidthStrategy(.legacy_wcwidth); + try std.testing.expectEqual(display_width.charWidth(0x2764), charWidth(0x2764)); +} diff --git a/tests/unicode_tests.zig b/tests/unicode_tests.zig index a0283a6..3de6cb4 100644 --- a/tests/unicode_tests.zig +++ b/tests/unicode_tests.zig @@ -16,6 +16,8 @@ test "charWidth: CJK ideographs are wide" { } test "charWidth: emoji are wide" { + zz.unicode.setWidthStrategy(.unicode); + defer zz.unicode.setWidthStrategy(.legacy_wcwidth); try testing.expectEqual(@as(usize, 2), zz.unicode.charWidth(0x1F600)); // grinning face try testing.expectEqual(@as(usize, 2), zz.unicode.charWidth(0x1F680)); // rocket try testing.expectEqual(@as(usize, 2), zz.unicode.charWidth(0x2615)); // hot beverage