From 666b2a89448d0711c9f441f3365e9c5b92cf42ec Mon Sep 17 00:00:00 2001 From: imsenyu Date: Tue, 25 Mar 2025 23:57:36 +0800 Subject: [PATCH 01/18] feat: jsesc --- CMakeLists.txt | 4 +- src/babel/babel-arena/memory.hpp | 2 +- .../babel-generator/generators/types.hpp | 9 +- src/babel/babel-generator/index.h | 6 +- src/babel/babel-generator/index.hpp | 3 +- src/babel/babel-generator/printer.hpp | 5 +- src/babel/babel-generator/test/index.hpp | 4 +- src/babel/babel-parser/index.h | 10 +- src/babel/babel-parser/index.hpp | 4 +- src/benchmark/generate.hpp | 4 +- src/npm_modules/jsesc/jsesc.hpp | 427 ++++++++++++++++++ src/testrunner-babel-parser.cpp | 2 +- 12 files changed, 459 insertions(+), 21 deletions(-) create mode 100644 src/npm_modules/jsesc/jsesc.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8548984d..dadf7a22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,9 @@ if(${COVERAGE}) ) endif() endif() -set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) +if(CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) +endif () # compile if (MSVC) add_compile_options(/Zc:preprocessor) diff --git a/src/babel/babel-arena/memory.hpp b/src/babel/babel-arena/memory.hpp index d5cccb71..91724a1f 100644 --- a/src/babel/babel-arena/memory.hpp +++ b/src/babel/babel-arena/memory.hpp @@ -31,7 +31,7 @@ namespace BabelArena { approximate_size = codeSize * 10.0; break; case static_cast(MemoryType::String): - approximate_size = codeSize / 50.0; + approximate_size = codeSize / 10.0; break; case static_cast(MemoryType::Identifier): approximate_size = codeSize * 10.0; diff --git a/src/babel/babel-generator/generators/types.hpp b/src/babel/babel-generator/generators/types.hpp index c72adfa3..b3abc71f 100644 --- a/src/babel/babel-generator/generators/types.hpp +++ b/src/babel/babel-generator/generators/types.hpp @@ -5,6 +5,7 @@ #include "../printer.hpp" #include "./index.h" #include "./methods.hpp" +#include "../../../npm_modules/jsesc/jsesc.hpp" namespace BabelGenerator::Generators { using namespace BabelParser; @@ -267,10 +268,10 @@ namespace BabelGenerator::Generators { _this.token(*raw); return; } - // - // const val = jsesc(node.value, this.format.jsescOption); - // - // this.token(val); + + auto val = Jsesc::jsesc(_this.allocator, node->value); + // auto val = Jsesc::jsesc(_this.allocator, node->value, this.format.jsescOption); + _this.token(val); } DEF_GENERATOR(BigIntLiteral, node, _) { diff --git a/src/babel/babel-generator/index.h b/src/babel/babel-generator/index.h index e50473f6..54eb75a3 100644 --- a/src/babel/babel-generator/index.h +++ b/src/babel/babel-generator/index.h @@ -1,9 +1,13 @@ #pragma once #include "../../base/unicode.hpp" +#include "../../base/allocator.hpp" #include "../babel-types/ast-types/generated/enum.hpp" #include "../babel-types/ast-types/generated/index.hpp" +namespace BabelArena { + class Memory; +} namespace BabelGenerator { struct GeneratorResult { @@ -130,5 +134,5 @@ namespace BabelGenerator { std::optional importAttributesKeyword; }; GeneratorResult - generate(PTR(BabelParser::Node::File) ast, GeneratorOptions opts = GeneratorOptions{}, std::optional code = std::nullopt); + generate(BabelArena::Memory& allocator, PTR(BabelParser::Node::File) ast, GeneratorOptions opts = GeneratorOptions{}, std::optional code = std::nullopt); } // namespace BabelGenerator diff --git a/src/babel/babel-generator/index.hpp b/src/babel/babel-generator/index.hpp index 1e58812d..1e303c77 100644 --- a/src/babel/babel-generator/index.hpp +++ b/src/babel/babel-generator/index.hpp @@ -148,6 +148,7 @@ namespace BabelGenerator { * @returns - an object containing the output code and source map. */ GeneratorResult generate( + BabelArena::Memory& allocator, PTR(Node::File) ast, GeneratorOptions opts, std::optional code // code?: string | { [filename: string]: string }, ) { @@ -160,7 +161,7 @@ namespace BabelGenerator { // (ast as any).tokens, // typeof code === "string" ? code : null, // ); - Printer::Printer printer(format, code); + Printer::Printer printer(*allocator.string(), format, code); return printer.generate(ast); } diff --git a/src/babel/babel-generator/printer.hpp b/src/babel/babel-generator/printer.hpp index 04ba4085..eae33094 100644 --- a/src/babel/babel-generator/printer.hpp +++ b/src/babel/babel-generator/printer.hpp @@ -169,7 +169,10 @@ namespace BabelGenerator::Printer { // } // declare _inputMap: TraceMap; - Printer(Format const &format, std::optional originalCode = std::nullopt) : + BASE::Memory::Allocator& allocator; + + Printer(BASE::Memory::Allocator& allocator, Format const &format, std::optional originalCode = std::nullopt) : + allocator(allocator), format(format), _buf(format.indent.style[0], _originalCode ? _originalCode->size() : 0), _indentRepeat(format.indent.style.size()), diff --git a/src/babel/babel-generator/test/index.hpp b/src/babel/babel-generator/test/index.hpp index 0469bc3e..6fc98721 100644 --- a/src/babel/babel-generator/test/index.hpp +++ b/src/babel/babel-generator/test/index.hpp @@ -22,10 +22,10 @@ namespace BabelGenerator { if (!throwMsg.has_value()) { auto resultCode = BabelParser::withParser( actualCode, parserOpts, - [&](PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options) + [&](BabelArena::Memory& allocator, PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options) { auto generatorOptions = task.options.toGeneratorOptions(); - auto result = BabelGenerator::generate(file, generatorOptions); + auto result = BabelGenerator::generate(allocator, file, generatorOptions); return result.code; } ); diff --git a/src/babel/babel-parser/index.h b/src/babel/babel-parser/index.h index acec8eb9..93aeb860 100644 --- a/src/babel/babel-parser/index.h +++ b/src/babel/babel-parser/index.h @@ -14,15 +14,15 @@ namespace BabelParser { using ParseFunction = JSON(BASE::UStringView const &source, Options::PartialOptions); struct ParseArgumentedCallback { - using Callback = void (*)(PTR(BabelParser::Node::File), BabelParser::Options::Options const &, void *arguments); + using Callback = void (*)(BabelArena::Memory&, PTR(BabelParser::Node::File), BabelParser::Options::Options const &, void *arguments); void *arguments; Callback callback; - void invoke(PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options) { this->callback(file, options, this->arguments); } + void invoke(BabelArena::Memory& allocator, PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options) { this->callback(allocator, file, options, this->arguments); } }; extern void withParserImpl(const BASE::UStringView &source, Options::PartialOptions partial, ParseArgumentedCallback callback); template inline auto withParser(const BASE::UStringView &source, Options::PartialOptions partial, CB &&callback) { - using ReturnType = typename std::invoke_result_t; + using ReturnType = typename std::invoke_result_t; struct Arguments { CB &&callback; std::optional returnValue = std::nullopt; @@ -33,10 +33,10 @@ namespace BabelParser { ParseArgumentedCallback{ .arguments = &args, .callback = - [](PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options, void *_arguments) + [](BabelArena::Memory& allocator, PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options, void *_arguments) { auto const &arguments = static_cast(_arguments); - arguments->returnValue = arguments->callback(file, options); + arguments->returnValue = arguments->callback(allocator, file, options); } } ); diff --git a/src/babel/babel-parser/index.hpp b/src/babel/babel-parser/index.hpp index a0c14622..809edb37 100644 --- a/src/babel/babel-parser/index.hpp +++ b/src/babel/babel-parser/index.hpp @@ -56,7 +56,7 @@ namespace BabelParser { [&](PARSER &&parser) { auto file = parser.parse(); - callback.invoke(file, options); + callback.invoke(allocator, file, options); }, allocator, options, source ); @@ -92,7 +92,7 @@ namespace BabelParser { BASE::UString code; { STAT_TIMING(Generator, Generate) - code = std::move(BabelGenerator::generate(file).code); + code = std::move(BabelGenerator::generate(*allocator, file).code); } { STAT_TIMING(Runtime, Deallocation) diff --git a/src/benchmark/generate.hpp b/src/benchmark/generate.hpp index 7886b470..c3621cbb 100644 --- a/src/benchmark/generate.hpp +++ b/src/benchmark/generate.hpp @@ -30,10 +30,10 @@ static void BM_BabelGenerate(benchmark::State &state, std::string const &filenam auto ret = BabelParser::withParser( code, BabelParser::Options::PartialOptions{}, - [&](PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options) + [&](BabelArena::Memory& allocator, PTR(BabelParser::Node::File) file, BabelParser::Options::Options const &options) { for (auto _ : state) { - auto code = BabelGenerator::generate(file).code; + auto code = BabelGenerator::generate(allocator, file).code; benchmark::DoNotOptimize(code); } return true; diff --git a/src/npm_modules/jsesc/jsesc.hpp b/src/npm_modules/jsesc/jsesc.hpp new file mode 100644 index 00000000..973af1d0 --- /dev/null +++ b/src/npm_modules/jsesc/jsesc.hpp @@ -0,0 +1,427 @@ +#pragma once + +#include "../../babel/babel-arena/index.hpp" +#include "../../base/unicode.hpp" +#include "../../base/string.hpp" +#include "../../base/allocator.hpp" + +namespace Jsesc { +// +//const object = {}; +//const hasOwnProperty = object.hasOwnProperty; +//const forOwn = (object, callback) => { +// for (const key in object) { +// if (hasOwnProperty.call(object, key)) { +// callback(key, object[key]); +// } +// } +//}; +// +//const extend = (destination, source) => { +// if (!source) { +// return destination; +// } +// forOwn(source, (key, value) => { +// destination[key] = value; +// }); +// return destination; +//}; +// +//const forEach = (array, callback) => { +// const length = array.length; +// let index = -1; +// while (++index < length) { +// callback(array[index]); +// } +//}; +// +//const fourHexEscape = (hex) => { +// return '\\u' + ('0000' + hex).slice(-4); +//} +// + inline std::array hexadecimal(BASE::UChar code, bool lowercase, int* size = nullptr) { + std::array escaped{u'0',u'0',u'0',u'0'}; + int i; + for(i = 3; i >= 0; i--) { + uint8_t ch = code % 16; + code = (code - ch) / 16; + if (ch < 10) { + escaped[i] = u'0' + ch; + } else if (!lowercase) { + escaped[i] = u'A' + ch - 10; + } else { + escaped[i] = u'a' + ch - 10; + } + if (!code) { + break; + } + } + if (size != nullptr) { + *size = 4 - i; + } + return escaped; + } + inline void fourHexEscape(BASE::String::Slices& str, BASE::UChar code, bool lowercase) { + auto const& hex = hexadecimal(code, lowercase); + str.add<2>(u"\\u"); + str.add(hex[0]); + str.add(hex[1]); + str.add(hex[2]); + str.add(hex[3]); + } +//const toString = object.toString; +//const isArray = Array.isArray; +//const isBuffer = (value) => { +// return typeof Buffer === 'function' && Buffer.isBuffer(value); +//}; +//const isObject = (value) => { +// // This is a very simple check, but it’s good enough for what we need. +// return toString.call(value) == '[object Object]'; +//}; +//const isString = (value) => { +// return typeof value == 'string' || +// toString.call(value) == '[object String]'; +//}; +//const isNumber = (value) => { +// return typeof value == 'number' || +// toString.call(value) == '[object Number]'; +//}; +//const isFunction = (value) => { +// return typeof value == 'function'; +//}; +//const isMap = (value) => { +// return toString.call(value) == '[object Map]'; +//}; +//const isSet = (value) => { +// return toString.call(value) == '[object Set]'; +//}; +// +///*--------------------------------------------------------------------------*/ +// +//// https://mathiasbynens.be/notes/javascript-escapes#single +//const singleEscapes = { +// '\\': '\\\\', +// '\b': '\\b', +// '\f': '\\f', +// '\n': '\\n', +// '\r': '\\r', +// '\t': '\\t' +// // `\v` is omitted intentionally, because in IE < 9, '\v' == 'v'. +// // '\v': '\\x0B' +//}; +//const regexSingleEscape = /[\\\b\f\n\r\t]/; +// +//const regexDigit = /[0-9]/; +//const regexWhitespace = /[\xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]/; +// +//const escapeEverythingRegex = /([\uD800-\uDBFF][\uDC00-\uDFFF])|([\uD800-\uDFFF])|(['"`])|[^]/g; +//const escapeNonAsciiRegex = /([\uD800-\uDBFF][\uDC00-\uDFFF])|([\uD800-\uDFFF])|(['"`])|[^ !#-&\(-\[\]-_a-~]/g; +// +inline BASE::UStringViewArray<3> jsesc(BASE::Memory::Allocator& allocator, BASE::UStringView const& input) { +//const jsesc = (argument, options) => { +// const increaseIndentation = () => { +// oldIndent = indent; +// ++options.indentLevel; +// indent = options.indent.repeat(options.indentLevel) +// }; + // Handle options + // quotes: "double", + // wrap: true, + // minimal: process.env.BABEL_8_BREAKING ? true : false, + // ...opts.jsescOption, + + constexpr bool escapeEverything = false; + constexpr bool minimal = false; + constexpr bool isScriptContext = false; + constexpr BASE::UChar quote = u'\"'; + constexpr bool wrap = true; + constexpr bool es6 = false; + constexpr bool json = false; + constexpr bool compact = true; + constexpr bool lowercaseHex = false; +// const defaults = { +// 'escapeEverything': false, +// 'minimal': false, +// 'isScriptContext': false, +// 'quotes': 'single', +// 'wrap': false, +// 'es6': false, +// 'json': false, +// 'compact': true, +// 'lowercaseHex': false, +// 'numbers': 'decimal', +// 'indent': '\t', +// 'indentLevel': 0, +// '__inline1__': false, +// '__inline2__': false +// }; +// const json = options && options.json; +// if (json) { +// defaults.quotes = 'double'; +// defaults.wrap = true; +// } +// options = extend(defaults, options); +// if ( +// options.quotes != 'single' && +// options.quotes != 'double' && +// options.quotes != 'backtick' +// ) { +// options.quotes = 'single'; +// } +// const quote = options.quotes == 'double' ? +// '"' : +// (options.quotes == 'backtick' ? +// '`' : +// '\'' +// ); +// const compact = options.compact; +// const lowercaseHex = options.lowercaseHex; +// let indent = options.indent.repeat(options.indentLevel); +// let oldIndent = ''; +// const inline1 = options.__inline1__; +// const inline2 = options.__inline2__; +// const newLine = compact ? '' : '\n'; +// let result; +// let isEmpty = true; +// const useBinNumbers = options.numbers == 'binary'; +// const useOctNumbers = options.numbers == 'octal'; +// const useDecNumbers = options.numbers == 'decimal'; +// const useHexNumbers = options.numbers == 'hexadecimal'; +// +// if (json && argument && isFunction(argument.toJSON)) { +// argument = argument.toJSON(); +// } +// +// if (!isString(argument)) { +// if (isMap(argument)) { +// if (argument.size == 0) { +// return 'new Map()'; +// } +// if (!compact) { +// options.__inline1__ = true; +// options.__inline2__ = false; +// } +// return 'new Map(' + jsesc(Array.from(argument), options) + ')'; +// } +// if (isSet(argument)) { +// if (argument.size == 0) { +// return 'new Set()'; +// } +// return 'new Set(' + jsesc(Array.from(argument), options) + ')'; +// } +// if (isBuffer(argument)) { +// if (argument.length == 0) { +// return 'Buffer.from([])'; +// } +// return 'Buffer.from(' + jsesc(Array.from(argument), options) + ')'; +// } +// if (isArray(argument)) { +// result = []; +// options.wrap = true; +// if (inline1) { +// options.__inline1__ = false; +// options.__inline2__ = true; +// } +// if (!inline2) { +// increaseIndentation(); +// } +// forEach(argument, (value) => { +// isEmpty = false; +// if (inline2) { +// options.__inline2__ = false; +// } +// result.push( +// (compact || inline2 ? '' : indent) + +// jsesc(value, options) +// ); +// }); +// if (isEmpty) { +// return '[]'; +// } +// if (inline2) { +// return '[' + result.join(', ') + ']'; +// } +// return '[' + newLine + result.join(',' + newLine) + newLine + +// (compact ? '' : oldIndent) + ']'; +// } else if (isNumber(argument)) { +// if (json) { +// // Some number values (e.g. `Infinity`) cannot be represented in JSON. +// return JSON.stringify(argument); +// } +// if (useDecNumbers) { +// return String(argument); +// } +// if (useHexNumbers) { +// let hexadecimal = argument.toString(16); +// if (!lowercaseHex) { +// hexadecimal = hexadecimal.toUpperCase(); +// } +// return '0x' + hexadecimal; +// } +// if (useBinNumbers) { +// return '0b' + argument.toString(2); +// } +// if (useOctNumbers) { +// return '0o' + argument.toString(8); +// } +// } else if (!isObject(argument)) { +// if (json) { +// // For some values (e.g. `undefined`, `function` objects), +// // `JSON.stringify(value)` returns `undefined` (which isn’t valid +// // JSON) instead of `'null'`. +// return JSON.stringify(argument) || 'null'; +// } +// return String(argument); +// } else { // it’s an object +// result = []; +// options.wrap = true; +// increaseIndentation(); +// forOwn(argument, (key, value) => { +// isEmpty = false; +// result.push( +// (compact ? '' : indent) + +// jsesc(key, options) + ':' + +// (compact ? '' : ' ') + +// jsesc(value, options) +// ); +// }); +// if (isEmpty) { +// return '{}'; +// } +// return '{' + newLine + result.join(',' + newLine) + newLine + +// (compact ? '' : oldIndent) + '}'; +// } +// } +// +// const regex = options.escapeEverything ? escapeEverythingRegex : escapeNonAsciiRegex; + BASE::String::Slices str{input, &allocator}; + auto data = input.data(); + size_t size = input.size(); + size_t index = 0; + size_t next = 1; + for(;index < size;index = next, ++next) { + BASE::UChar current_char = data[index]; + // if constexpr (quote == u'`') { + // if (current_char == u'$' && next_char == u'{') { + // str.add(u'\\'); + // str.add(index, ++next); + // continue; + // } + // } + if constexpr (!escapeEverything) { + if ( + current_char == ' ' || current_char == '!' || (current_char >= '#' && current_char <= '&') || + (current_char >= '(' && current_char <= '[') || (current_char >= ']' && current_char <= '_') || (current_char >= 'a' && current_char <= '~') + ) [[likely]] { + str.add(index, next); + continue; + } + } + if ( + current_char >= 0xD800 && current_char <= 0xDBFF + ) [[unlikely]] { + BASE::UChar next_char = next < size ? data[next] : 0; + if (next_char >= 0xDC00 && next_char <= 0xDFFF) { + if constexpr (minimal) { + str.add(index, ++next); + continue; + } else { + if constexpr (es6) { + BASE::UCodePoint codePoint = (current_char - 0xD800) * 0x400 + next_char - 0xDC00 + 0x10000; + int hex_size = 0; + str.add<3>(u"\\u{"); + auto const& hexFirst = hexadecimal(codePoint >> 32, lowercaseHex, &hex_size); + hex_size >= 4 ? str.add(hexFirst[0]) : void(0); + hex_size >= 3 ? str.add(hexFirst[1]) : void(0); + hex_size >= 2 ? str.add(hexFirst[2]) : void(0); + hex_size >= 1 ? str.add(hexFirst[3]) : void(0); + auto const& hexSecond = hexadecimal(codePoint % 0xFFFF, lowercaseHex, &hex_size); + str.add(hexSecond[0]); + str.add(hexSecond[1]); + str.add(hexSecond[2]); + str.add(hexSecond[3]); + str.add(u'}'); + } else { + fourHexEscape(str, current_char, lowercaseHex); + fourHexEscape(str, next_char, lowercaseHex); + } + continue; + } + } + } + if ( + current_char >= 0xD800 && current_char <= 0xDFFF + ) [[unlikely]] { + fourHexEscape(str, current_char, lowercaseHex); + continue; + } + if ( + current_char == 0 && + !json + ) [[unlikely]] { + BASE::UChar next_char = next < size ? data[next] : 0; + if (!(next_char >= u'0' && next_char <= u'9')) { + str.add<3>(u"\\0"); + continue; + } + } + if ( current_char == u'\'' || current_char == u'"' || current_char == u'`') [[unlikely]] { + if (current_char == quote || escapeEverything) { + str.add(u'\\'); + } + str.add(index, next); + continue; + } + if (current_char == u'\\' || current_char == u'\b' || current_char == u'\f' || current_char == u'\n' || current_char == u'\r' || current_char == u'\t') [[unlikely]] { + str.add(u'\\'); + str.add(index, next); + continue; + } + if (minimal && !( + current_char == 0xa0 || current_char == 0x1680 || + (current_char >= 0x2000 && current_char <= 0x200a) || + current_char == 0x2028 || current_char == 0x2029 || current_char == 0x202f || current_char == 0x205f || current_char == 0x3000 + )) [[likely]] { + str.add(index, next); + continue; + } + + auto const& hex = hexadecimal(current_char, lowercaseHex); + if (json || current_char >= 256) { + str.add<2>(u"\\u"); + str.add(hex[0]); + str.add(hex[1]); + str.add(hex[2]); + str.add(hex[3]); + } else { + str.add<2>(u"\\x"); + str.add(hex[2]); + str.add(hex[3]); + } + continue; + } +// if (quote == '`') { +// result = result.replace(/\$\{/g, '\\${'); +// } +// if (options.isScriptContext) { +// // https://mathiasbynens.be/notes/etago +// result = result +// .replace(/<\/(script|style)/gi, '<\\/$1') +// .replace(/