From db4154f07246f43c9b86bafa499444e5ab758bce Mon Sep 17 00:00:00 2001 From: Gregory Golberg Date: Wed, 2 Oct 2019 22:01:33 +0000 Subject: [PATCH] Fix for surrogate issue (https://github.com/cloudflare/lua-resty-json/issues/10) --- scaner.c | 3 +-- tests/test_spec/test_diagnostic.txt | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scaner.c b/scaner.c index b18dc36..c3a38f3 100644 --- a/scaner.c +++ b/scaner.c @@ -338,10 +338,9 @@ process_u_esc(scaner_t* scaner, const char* src, const char* src_end, /* Detect UTF-16 surrogate pair. The codepoint be in this form : 110110x... */ - if (codepoint >= 0xd800) { + if (codepoint >= 0xd800 && (codepoint < 0xe000 || codepoint > 0xffff)) { int32_t codepoint_low; const char* lower = src + 6; - if (codepoint & 0x400) { set_scan_err(scaner, src, "Higher part of UTF-16 surrogate must " "be in the range of [0xd800, 0xdbff]"); diff --git a/tests/test_spec/test_diagnostic.txt b/tests/test_spec/test_diagnostic.txt index a0de334..467827a 100644 --- a/tests/test_spec/test_diagnostic.txt +++ b/tests/test_spec/test_diagnostic.txt @@ -26,5 +26,8 @@ output: (line:1,col:10) Higher part of UTF-16 surrogate must be in the range of input: ["\u",""] output: (line:1,col:10) illegal escape \u -input: {"\uE330\uE330": [" -output: (line:1,col:16) Lower part of UTF-16 surrogate must be in the range of [0xdc00, 0xdfff] +input: "\uE330\uE330" +output: "\ue330\ue330" + +input: ["\u6570\u5b57\u534e\u5bb9\u9053_f3\uff08IOS\uff09"] +output: ["\u6570\u5b57\u534e\u5bb9\u9053_f3\uff08IOS\uff09"]