From 8f7406c08549275beeb4632b2a16f0924948769f Mon Sep 17 00:00:00 2001 From: gaocong Date: Fri, 26 Sep 2025 16:05:07 +0800 Subject: [PATCH] Implement enhanced C# language tokenizer to fix string highlighting issues with URLs --- SharpPad/wwwroot/csharpLanguageDefinition.js | 219 +++++++++++++++++++ SharpPad/wwwroot/csharpLanguageProvider.js | 6 +- SharpPad/wwwroot/semanticColoring.js | 26 ++- test-syntax-highlighting.cs | 24 ++ 4 files changed, 265 insertions(+), 10 deletions(-) create mode 100644 SharpPad/wwwroot/csharpLanguageDefinition.js create mode 100644 test-syntax-highlighting.cs diff --git a/SharpPad/wwwroot/csharpLanguageDefinition.js b/SharpPad/wwwroot/csharpLanguageDefinition.js new file mode 100644 index 0000000..74f7f56 --- /dev/null +++ b/SharpPad/wwwroot/csharpLanguageDefinition.js @@ -0,0 +1,219 @@ +// Custom C# Language Definition - Fixes string highlighting issues with URLs +// Specifically addresses the problem where https:// inside strings affects .Dump() highlighting + +export function overrideCSharpLanguage() { + // Enhanced C# language definition that fixes URL highlighting in strings + const enhancedCSharpLanguage = { + defaultToken: '', + tokenPostfix: '.cs', + + keywords: [ + 'extern', 'alias', 'using', 'bool', 'decimal', 'sbyte', 'byte', 'short', + 'ushort', 'int', 'uint', 'long', 'ulong', 'char', 'float', 'double', + 'object', 'dynamic', 'string', 'assembly', 'is', 'as', 'ref', 'out', + 'this', 'base', 'new', 'typeof', 'void', 'checked', 'unchecked', + 'default', 'delegate', 'var', 'const', 'if', 'else', 'switch', 'case', + 'while', 'do', 'for', 'foreach', 'in', 'break', 'continue', 'goto', + 'return', 'throw', 'try', 'catch', 'finally', 'lock', 'yield', 'from', + 'let', 'where', 'join', 'on', 'equals', 'into', 'orderby', 'ascending', + 'descending', 'select', 'group', 'by', 'namespace', 'partial', 'class', + 'field', 'event', 'method', 'param', 'public', 'protected', 'internal', + 'private', 'abstract', 'sealed', 'static', 'struct', 'readonly', + 'volatile', 'virtual', 'override', 'params', 'get', 'set', 'add', + 'remove', 'operator', 'true', 'false', 'implicit', 'explicit', + 'interface', 'enum', 'null', 'async', 'await', 'fixed', 'sizeof', + 'stackalloc', 'unsafe', 'nameof', 'when' + ], + + namespaceFollows: ['namespace', 'using'], + parenFollows: ['if', 'for', 'while', 'switch', 'foreach', 'using', 'catch', 'when'], + operators: [ + '=', '??', '||', '&&', '|', '^', '&', '==', '!=', '<=', '>=', '<<', + '+', '-', '*', '/', '%', '!', '~', '++', '--', '+=', '-=', '*=', + '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '>>', '=>' + ], + + symbols: /[=>](?!@symbols)/, '@brackets'], + + // Operators + [/@symbols/, { + cases: { + '@operators': 'delimiter', + '@default': '' + } + }], + + // Numbers + [/[0-9_]*\.[0-9_]+([eE][\-+]?\d+)?[fFdD]?/, 'number.float'], + [/0[xX][0-9a-fA-F_]+/, 'number.hex'], + [/0[bB][01_]+/, 'number.hex'], + [/[0-9_]+/, 'number'], + + // Delimiters + [/[;,.]/, 'delimiter'] + ], + + qualified: [ + [/[a-zA-Z_][\w]*/, { + cases: { + '@keywords': { token: 'keyword.$0' }, + '@default': 'identifier' + } + }], + [/\./, 'delimiter'], + ['', '', '@pop'] + ], + + namespace: [ + { include: '@whitespace' }, + [/[A-Z]\w*/, 'namespace'], + [/[\.=]/, 'delimiter'], + ['', '', '@pop'] + ], + + comment: [ + [/[^\/*]+/, 'comment'], + ['\\*/', 'comment', '@pop'], + [/[\/*]/, 'comment'] + ], + + // String state - NO comment processing allowed here + string: [ + [/[^\\"]+/, 'string'], // Everything inside string is just string content + [/@escapes/, 'string.escape'], + [/\\./, 'string.escape.invalid'], + [/"/, { token: 'string.quote', next: '@pop' }] + ], + + litstring: [ + [/[^"]+/, 'string'], // Everything inside literal string is just string content + [/""/, 'string.escape'], + [/"/, { token: 'string.quote', next: '@pop' }] + ], + + litinterpstring: [ + [/[^"{]+/, 'string'], + [/""/, 'string.escape'], + [/{{/, 'string.escape'], + [/}}/, 'string.escape'], + [/{/, { token: 'string.quote', next: 'root.litinterpstring' }], + [/"/, { token: 'string.quote', next: '@pop' }] + ], + + interpolatedstring: [ + [/[^\\"{]+/, 'string'], + [/@escapes/, 'string.escape'], + [/\\./, 'string.escape.invalid'], + [/{{/, 'string.escape'], + [/}}/, 'string.escape'], + [/{/, { token: 'string.quote', next: 'root.interpolatedstring' }], + [/"/, { token: 'string.quote', next: '@pop' }] + ], + + // Whitespace handling - comments only processed when not in string context + whitespace: [ + [/^[ \t\v\f]*#((r)|(load))(?=\s)/, 'directive.csx'], + [/^[ \t\v\f]*#\w.*$/, 'namespace.cpp'], + [/[ \t\v\f\r\n]+/, ''], + [/\/\*/, 'comment', '@comment'], + [/\/\/.*$/, 'comment'] + ] + } + }; + + // Override the existing C# language tokenizer + const languageId = 'csharp'; + let overrideApplied = false; + + const applyOverride = () => { + if (!globalThis.monaco?.languages?.setMonarchTokensProvider) { + return false; + } + + try { + globalThis.monaco.languages.setMonarchTokensProvider(languageId, enhancedCSharpLanguage); + if (!overrideApplied) { + overrideApplied = true; + console.log('C# language tokenizer successfully overridden - fixed string URL highlighting'); + } + return true; + } catch (error) { + console.error('Failed to override C# language tokenizer:', error); + return false; + } + }; + + const scheduleOverride = (delay = 0, attemptsLeft = 10) => { + if (attemptsLeft <= 0) { + return; + } + + setTimeout(() => { + const applied = applyOverride(); + if (!applied) { + scheduleOverride(Math.min(delay + 50, 250), attemptsLeft - 1); + } + }, delay); + }; + + try { + if (globalThis.monaco?.languages?.getLanguages?.().some(lang => lang.id === languageId)) { + scheduleOverride(); + } else { + scheduleOverride(25); + } + } catch (error) { + console.warn('Unable to verify Monaco languages for override immediately:', error); + scheduleOverride(); + } + + if (globalThis.monaco?.languages?.onLanguage) { + globalThis.monaco.languages.onLanguage(languageId, () => { + scheduleOverride(); + scheduleOverride(50); + scheduleOverride(200); + }); + } else { + // Fallback: keep trying until Monaco exposes the language APIs + scheduleOverride(); + } +} + + diff --git a/SharpPad/wwwroot/csharpLanguageProvider.js b/SharpPad/wwwroot/csharpLanguageProvider.js index 2640991..ca4f3d1 100644 --- a/SharpPad/wwwroot/csharpLanguageProvider.js +++ b/SharpPad/wwwroot/csharpLanguageProvider.js @@ -1,9 +1,11 @@ import { getCurrentFile, shouldUseMultiFileMode, createMultiFileRequest, createSingleFileRequest } from './utils/common.js'; import { sendRequest } from './utils/apiService.js'; +import { overrideCSharpLanguage } from './csharpLanguageDefinition.js'; export function registerCsharpProvider() { - // Monaco Editor 已内置 C# 语言支持 - // 完全使用原生语法高亮,不添加任何增强着色 + // Override Monaco's built-in C# language with enhanced tokenizer + // This specifically fixes the issue where https:// inside strings affects .Dump() highlighting + overrideCSharpLanguage(); monaco.languages.registerCompletionItemProvider('csharp', { triggerCharacters: [".", " "], diff --git a/SharpPad/wwwroot/semanticColoring.js b/SharpPad/wwwroot/semanticColoring.js index e8a6773..56a73d8 100644 --- a/SharpPad/wwwroot/semanticColoring.js +++ b/SharpPad/wwwroot/semanticColoring.js @@ -289,18 +289,23 @@ function setupModelSemanticColoring(model, legend) { */ async function applySemanticDecorationsFast(model, tokenData, legend, decorationIds) { const lines = model.getLinesContent(); + const stringRanges = detectStringRanges(model); const commentLines = new Set(); - - // 极速注释行扫描 + + // 粗略注释扫描(忽略字符串内的 //) for (let i = 0; i < lines.length; i++) { - if (lines[i].includes('//')) { - commentLines.add(i + 1); + const line = lines[i]; + const commentIndex = line.indexOf("//"); + if (commentIndex !== -1) { + const commentPos = { line: i + 1, char: commentIndex }; + if (!isPositionInString(commentPos, stringRanges)) { + commentLines.add(i + 1); + } } } - // 如果有 Worker 且数据量大,使用 Worker 处理 + // 如果有 Worker 支持且数据量较大,使用 Worker 处理 if (semanticWorker && tokenData.length > 1000) { - const stringRanges = detectStringRanges(model); return new Promise((resolve) => { const handleWorkerMessage = (e) => { @@ -330,7 +335,6 @@ async function applySemanticDecorationsFast(model, tokenData, legend, decoration let currentChar = 0; // 检测字符串范围以避免在字符串内部应用语义着色 - const stringRanges = detectStringRanges(model); // 只处理关键令牌类型,添加更多重要的类型 const importantTypes = new Set(['class', 'interface', 'method', 'function', 'comment', 'type', 'struct', 'enum']); @@ -344,7 +348,13 @@ async function applySemanticDecorationsFast(model, tokenData, legend, decoration // 更精确的注释检测 const lineContent = model.getLineContent(currentLine + 1); - const commentStart = lineContent.indexOf('//'); + let commentStart = lineContent.indexOf("//"); + if (commentStart !== -1) { + const commentPos = { line: currentLine + 1, char: commentStart }; + if (isPositionInString(commentPos, stringRanges)) { + commentStart = -1; + } + } const isInComment = commentStart !== -1 && currentChar >= commentStart; // 检查是否在字符串内部 diff --git a/test-syntax-highlighting.cs b/test-syntax-highlighting.cs new file mode 100644 index 0000000..348005b --- /dev/null +++ b/test-syntax-highlighting.cs @@ -0,0 +1,24 @@ +using System; + +class Program +{ + public static void Main() + { + // 这行代码应该展示正确的语法着色 + "Hello, SharpPad! 关注我: https://github.com/gaoconggit/SharpPad".Dump(); + + // 另一个测试案例 + string message = "Visit: https://github.com/microsoft/monaco-editor"; + message.Dump(); + + // 确保普通注释仍然正常工作 + // This is a regular comment and should be highlighted as comment + + /* + * Block comment test with URL: https://example.com + * Should all be highlighted as comment + */ + + Console.WriteLine("Regular method calls should work normally"); + } +} \ No newline at end of file