-
Notifications
You must be signed in to change notification settings - Fork 1
14 html escaping and unescaping is slow and incomplete #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
319aca6
3e8d2b4
5d7a60e
c190b21
5a96efd
d718fad
6576ec0
7e52fb6
801678b
0596c9e
b586df5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| name: test | ||
|
|
||
| on: | ||
| push: | ||
| branches: | ||
| - master | ||
| - main | ||
| pull_request: | ||
|
|
||
| jobs: | ||
| test: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - uses: erlef/setup-beam@v1 | ||
| with: | ||
| otp-version: "28" | ||
| gleam-version: "1.13.0" | ||
| rebar3-version: "3" | ||
| # elixir-version: "1" | ||
| - run: gleam deps download | ||
| - run: gleam test | ||
| - run: gleam format --check src test |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # Contributing to str | ||
|
|
||
| Thanks for helping! Short, practical guide. | ||
|
|
||
| ## Quick start | ||
| - Fork, create a branch: `git switch -c feat/your-change`. | ||
| - Run `gleam format` and `gleam test` locally. | ||
| - Open a PR against `main` with a short description and tests. | ||
|
|
||
| ## Setup | ||
| - Requirements: Gleam (see `gleam.toml`) | ||
|
|
||
| Commands: | ||
| ```bash | ||
| gleam format | ||
| gleam test | ||
| ``` | ||
|
|
||
| ## Commits | ||
| Use brief prefixes: `feat:`, `fix:`, `chore:`, `test:`, `perf:`. | ||
| Example: `feat(display): add truncate_display` | ||
| No strict enforcement, use these prefixes as a guideline, not a hard rule. | ||
|
|
||
| ## PR checklist | ||
| - [ ] Tests added/updated | ||
| - [ ] `gleam format` & `gleam test` pass | ||
| - [ ] Update `CHANGELOG.md` if behaviour changes | ||
| - [ ] Document noteworthy changes in `README.md` , docs/ or examples/ | ||
|
|
||
| ## Deprecations | ||
| - Report breaking changes in an issue and add migration notes in PRs. See `DEPRECATIONS.md` if present. | ||
|
|
||
| ## Testing | ||
| - Add unit tests for edge cases (ZWJ, skin tones, combining marks, CJK, ambiguous widths). | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,5 +1,5 @@ | ||||||
| <p align="center"> | ||||||
| <img src="assets/img/logo-str.png" alt="str logo" width="280"> | ||||||
| <img src="https://raw.githubusercontent.com/lupodevelop/str/c190b21/assets/img/logo-str.png" alt="str logo" width="280"> | ||||||
| </p> | ||||||
|
|
||||||
| <h1 align="center">str</h1> | ||||||
|
|
@@ -327,6 +327,8 @@ gleam test | |||||
| python3 scripts/generate_character_tables.py | ||||||
| ``` | ||||||
|
|
||||||
| Note: as of **1.2.3**, `escape_html` now uses the `houdini` library for fast, allocation‑friendly escaping, and `unescape_html` uses `odysseus` for comprehensive entity support (named, decimal and hex numeric entities). See [CHANGELOG.md](CHANGELOG.md) for details. | ||||||
|
||||||
| Note: as of **1.2.3**, `escape_html` now uses the `houdini` library for fast, allocation‑friendly escaping, and `unescape_html` uses `odysseus` for comprehensive entity support (named, decimal and hex numeric entities). See [CHANGELOG.md](CHANGELOG.md) for details. | |
| Note: as of **1.2.3**, `escape_html` now uses the `houdini` library for fast, allocation-friendly escaping, and `unescape_html` uses `odysseus` for comprehensive entity support (named, decimal and hex numeric entities). See [CHANGELOG.md](CHANGELOG.md) for details. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,18 @@ | ||
| name = "str" | ||
| version = "1.2.2" | ||
| version = "1.2.3" | ||
|
|
||
| # Project metadata (fill or replace placeholders before publishing) | ||
| description = "Unicode-aware string utilities for Gleam: grapheme-safe operations, pragmatic ASCII transliteration, and slug generation." | ||
| licenses = ["MIT"] | ||
| repository = { type = "github", user = "lupodevelop", repo = "str" } | ||
| links = [{ title = "Repository", href = "https://github.com/lupodevelop/str" }] | ||
|
|
||
| # For a full reference of all the available options, see: | ||
| # https://gleam.run/writing-gleam/gleam-toml/ | ||
|
|
||
| [dependencies] | ||
| gleam_stdlib = ">= 0.44.0 and < 2.0.0" | ||
| houdini = ">= 1.0.0 and < 2.0.0" | ||
| odysseus = ">= 1.0.0 and < 2.0.0" | ||
|
|
||
| [dev-dependencies] | ||
| gleeunit = ">= 1.0.0 and < 2.0.0" |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,64 @@ | ||||||||||||||||||||||||||||
| import gleam/list | ||||||||||||||||||||||||||||
| import gleeunit | ||||||||||||||||||||||||||||
| import str | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| pub fn main() -> Nil { | ||||||||||||||||||||||||||||
| gleeunit.main() | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| pub fn roundtrip_basic_entities_test() { | ||||||||||||||||||||||||||||
| let cases = [ | ||||||||||||||||||||||||||||
| "<div>Hello</div>", | ||||||||||||||||||||||||||||
| "Tom & Jerry", | ||||||||||||||||||||||||||||
| "Say \"hello\"", | ||||||||||||||||||||||||||||
| "It's me", | ||||||||||||||||||||||||||||
| "5 < 10 && 10 > 5", | ||||||||||||||||||||||||||||
| "Ampersand: &", | ||||||||||||||||||||||||||||
| ] | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| list.fold(cases, True, fn(_, s) { | ||||||||||||||||||||||||||||
| let escaped = str.escape_html(s) | ||||||||||||||||||||||||||||
| let unescaped = str.unescape_html(escaped) | ||||||||||||||||||||||||||||
| assert unescaped == s | ||||||||||||||||||||||||||||
| True | ||||||||||||||||||||||||||||
| }) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
Comment on lines
+19
to
+26
|
||||||||||||||||||||||||||||
| list.fold(cases, True, fn(_, s) { | |
| let escaped = str.escape_html(s) | |
| let unescaped = str.unescape_html(escaped) | |
| assert unescaped == s | |
| True | |
| }) | |
| } | |
| list.each(cases, fn(s) { | |
| let escaped = str.escape_html(s) | |
| let unescaped = str.unescape_html(escaped) | |
| assert unescaped == s | |
| }) | |
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| import gleeunit | ||
| import str | ||
| import gleam/list | ||
| import gleam/string | ||
|
|
||
| pub fn main() -> Nil { | ||
| gleeunit.main() | ||
| } | ||
|
|
||
| // Deterministic, simple generator over a token pool. | ||
| fn gen_token_pool() -> List(String) { | ||
| [ | ||
| "a","b","c","1","2","3"," ","\n","<",">","&","\"","'", | ||
| "&","<",">",""","'","'",""","¬anentity;", | ||
| "&","&","&#", "&#x", | ||
| "\u{00A0}", // NBSP | ||
| "Café","naïve","ø","漢","字", | ||
| "👩👩👧👦","👨👩👧","️","✈️","🏳️🌈", | ||
| "\u{0301}", // combining acute | ||
| "α","β","γ" | ||
| ] | ||
| } | ||
|
|
||
| // Deterministic pseudo-random index using seed and i | ||
| fn idx_for(seed: Int, i: Int, len: Int) -> Int { | ||
| // simple LCG-ish formula; keep small to avoid large-int overhead | ||
| let v = seed * 1103515245 + 12345 + i | ||
| let v_pos = case v < 0 { True -> -v False -> v } | ||
| v_pos % len | ||
| } | ||
|
|
||
| fn gen_string(seed: Int, tokens: List(String), n: Int) -> String { | ||
| let len = list.length(tokens) | ||
| let seq = list.range(0, n - 1) | ||
| seq | ||
| |> list.map(fn(i) { | ||
| let j = idx_for(seed, i, len) | ||
| case list.drop(tokens, j) { | ||
| [first, ..] -> first | ||
| [] -> "" | ||
| } | ||
| }) | ||
| |> list.fold("", fn(acc, s) { acc <> s }) | ||
| } | ||
|
|
||
| fn run_cfg(seed: Int, n: Int, tokens: List(String)) -> Bool { | ||
| let s = gen_string(seed, tokens, n) | ||
| // Roundtrip: unescape(escape(s)) == s | ||
| let escaped = str.escape_html(s) | ||
| let unescaped = str.unescape_html(escaped) | ||
| assert unescaped == s | ||
|
|
||
| // Escaped string must not contain raw angle brackets or quotes | ||
| assert string.contains(escaped, "<") == False | ||
| assert string.contains(escaped, ">") == False | ||
| assert string.contains(escaped, "\"") == False | ||
| assert string.contains(escaped, "'") == False | ||
|
|
||
| True | ||
|
||
| } | ||
|
|
||
| pub fn fuzz_roundtrip_test() { | ||
| let tokens = gen_token_pool() | ||
|
|
||
| run_cfg(1, 20, tokens) | ||
| run_cfg(42, 50, tokens) | ||
| run_cfg(123, 200, tokens) | ||
|
|
||
| True | ||
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| import str | ||
|
|
||
| pub fn escape_basic_test() { | ||
| assert str.escape_html("<div>Hello</div>") == "<div>Hello</div>" | ||
| assert str.escape_html("Tom & Jerry") == "Tom & Jerry" | ||
| assert str.escape_html("Say \"hello\"") == "Say "hello"" | ||
| } | ||
|
|
||
| pub fn unescape_basic_test() { | ||
| assert str.unescape_html("<div>") == "<div>" | ||
| assert str.unescape_html("Tom & Jerry") == "Tom & Jerry" | ||
| assert str.unescape_html("Say "hello"") == "Say \"hello\"" | ||
| assert str.unescape_html("It's me") == "It's me" | ||
| } | ||
|
|
||
| pub fn roundtrip_test() { | ||
| let s = "Hello & < > \"" | ||
| let escaped = str.escape_html(s) | ||
| assert str.unescape_html(escaped) == s | ||
| } | ||
|
|
||
| pub fn numeric_entities_test() { | ||
| // Decimal numeric entity | ||
| assert str.unescape_html("I like 'quotes'") == "I like 'quotes'" | ||
|
|
||
| // Hex numeric entity | ||
| assert str.unescape_html("Hex: '") == "Hex: '" | ||
|
|
||
| // Double quote numeric and hex | ||
| assert str.unescape_html("" and " and "") == "\" and \" and \"" | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is an extra space before the comma in "README.md , docs/". It should be "README.md, docs/" without the space before the comma.