From 319aca6d08ed4538a674bd523b6c28b6b3c800f8 Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 13:27:20 +0100 Subject: [PATCH 01/11] Add houdini and odysseus as dependencies Updated gleam.toml and manifest.toml to include houdini and odysseus as new dependencies. Bumped package version to 1.2.3 to reflect these changes. --- gleam.toml | 4 +++- manifest.toml | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gleam.toml b/gleam.toml index 7117b4f..4052702 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,5 +1,5 @@ name = "str" -version = "1.2.2" +version = "1.2.3" # Project metadata (fill or replace placeholders before publishing) description = "Unicode-aware string utilities for Gleam: grapheme-safe operations, pragmatic ASCII transliteration, and slug generation." @@ -12,6 +12,8 @@ links = [{ title = "Repository", href = "https://github.com/lupodevelop/str" }] [dependencies] gleam_stdlib = ">= 0.44.0 and < 2.0.0" +houdini = ">= 1.0.0 and < 2.0.0" +odysseus = ">= 1.0.0 and < 2.0.0" [dev-dependencies] gleeunit = ">= 1.0.0 and < 2.0.0" diff --git a/manifest.toml b/manifest.toml index 61ab519..cf214d7 100644 --- a/manifest.toml +++ b/manifest.toml @@ -4,8 +4,12 @@ packages = [ { name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" }, { name = "gleeunit", version = "1.9.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "DA9553CE58B67924B3C631F96FE3370C49EB6D6DC6B384EC4862CC4AAA718F3C" }, + { name = "houdini", version = "1.2.0", build_tools = ["gleam"], requirements = [], otp_app = "houdini", source = "hex", outer_checksum = "5DB1053F1AF828049C2B206D4403C18970ABEF5C18671CA3C2D2ED0DD64F6385" }, + { name = "odysseus", version = "1.0.0", build_tools = ["gleam"], requirements = [], otp_app = "odysseus", source = "hex", outer_checksum = "6A97DA1075BDDEA8B60F47B1DFFAD49309FA27E73843F13A0AF32EA7087BA11C" }, ] [requirements] gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" } gleeunit = { version = ">= 1.0.0 and < 2.0.0" } +houdini = { version = ">= 1.0.0 and < 2.0.0" } +odysseus = { version = ">= 1.0.0 and < 2.0.0" } From 3e8d2b47df12f0516ea623bb0f3205d9417c359c Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 13:27:27 +0100 Subject: [PATCH 02/11] Use houdini and odysseus for HTML escaping Replaces manual HTML escaping and unescaping logic with calls to the houdini and odysseus libraries, simplifying the code and improving maintainability. --- src/str/core.gleam | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/str/core.gleam b/src/str/core.gleam index ad7304a..ad07923 100644 --- a/src/str/core.gleam +++ b/src/str/core.gleam @@ -13,6 +13,8 @@ import gleam/dict import gleam/int import gleam/list import gleam/string +import houdini +import odysseus import str/config /// Detects if a grapheme cluster likely contains emoji components. @@ -1766,12 +1768,7 @@ pub fn is_hex(text: String) -> Bool { /// escape_html("Say \"hello\"") -> "Say "hello"" /// pub fn escape_html(text: String) -> String { - text - |> string.replace("&", "&") - |> string.replace("<", "<") - |> string.replace(">", ">") - |> string.replace("\"", """) - |> string.replace("'", "'") + houdini.escape(text) } /// Unescapes HTML entities to their character equivalents. @@ -1781,12 +1778,7 @@ pub fn escape_html(text: String) -> String { /// unescape_html("Tom & Jerry") -> "Tom & Jerry" /// pub fn unescape_html(text: String) -> String { - text - |> string.replace("'", "'") - |> string.replace(""", "\"") - |> string.replace(">", ">") - |> string.replace("<", "<") - |> string.replace("&", "&") + odysseus.unescape(text) } /// Escapes regex metacharacters so the string can be used as a literal pattern. From 5d7a60e4b36a3c3939ef0fc357173782c24f682b Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 13:27:35 +0100 Subject: [PATCH 03/11] Add tests for HTML escape and unescape functions Introduces unit tests for str.escape_html and str.unescape_html, covering basic escaping, unescaping, roundtrip conversions, and numeric entity handling. --- test/str_html_escape_test.gleam | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 test/str_html_escape_test.gleam diff --git a/test/str_html_escape_test.gleam b/test/str_html_escape_test.gleam new file mode 100644 index 0000000..ae1c9cb --- /dev/null +++ b/test/str_html_escape_test.gleam @@ -0,0 +1,31 @@ +import str + +pub fn escape_basic_test() { + assert str.escape_html("
Hello
") == "<div>Hello</div>" + assert str.escape_html("Tom & Jerry") == "Tom & Jerry" + assert str.escape_html("Say \"hello\"") == "Say "hello"" +} + +pub fn unescape_basic_test() { + assert str.unescape_html("<div>") == "
" + assert str.unescape_html("Tom & Jerry") == "Tom & Jerry" + assert str.unescape_html("Say "hello"") == "Say \"hello\"" + assert str.unescape_html("It's me") == "It's me" +} + +pub fn roundtrip_test() { + let s = "Hello & < > \"" + let escaped = str.escape_html(s) + assert str.unescape_html(escaped) == s +} + +pub fn numeric_entities_test() { + // Decimal numeric entity + assert str.unescape_html("I like 'quotes'") == "I like 'quotes'" + + // Hex numeric entity + assert str.unescape_html("Hex: '") == "Hex: '" + + // Double quote numeric and hex + assert str.unescape_html("" and " and "") == "\" and \" and \"" +} From c190b21c301b5d78faf4525f26d86f370b55dad2 Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 13:27:45 +0100 Subject: [PATCH 04/11] Update CHANGELOG for v1.2.3 release Documented changes for version 1.2.3, including replacement of HTML escape/unescape implementations, new dependencies, and added tests. Contributions and suggestions are also acknowledged. --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 937bc90..f1d4a26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ All notable changes to this project are documented in this file. +## [1.2.3] - 2026-01-08 +### Changed +- Replaced `escape_html` implementation with `houdini.escape` for faster, + allocation-friendly HTML escaping. +- Replaced `unescape_html` with `odysseus.unescape` for comprehensive HTML + entity unescaping (named entities, numeric decimal and hex entities). +- Added dependencies: `houdini`, `odysseus`. + +### Tests +- Added tests for HTML escape/unescape and numeric entities (decimal and hex). + +Contributed by: Daniele (`lupodevelop`) +Suggested by: Louis Pilfold (`@lpil`) + ## [1.2.2] - 2026-01-05 ### Added - Added internal helper `grapheme_len/1` (internal) to centralize grapheme cluster length computation and avoid repetitive `string.to_graphemes |> list.length` patterns. From 5a96efdf42eee8593f55c10c047b12ec642d126b Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 15:58:07 +0100 Subject: [PATCH 05/11] Update README logo URL and add escape_html note Changed the README logo image source to use a raw.githubusercontent URL for better Hexdocs compatibility. Added a note about escape_html and unescape_html library changes in version 1.2.3. Updated CHANGELOG to document these changes and credit the suggestion. --- CHANGELOG.md | 4 ++++ README.md | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1d4a26..20eed64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ All notable changes to this project are documented in this file. Contributed by: Daniele (`lupodevelop`) Suggested by: Louis Pilfold (`@lpil`) +Suggested by: NNB (`@NNBnh`) +Suggested change: updated README logo pointer to use the raw.githubusercontent URL +(pointing to the repository commit) so the logo is resolvable on Hexdocs. + ## [1.2.2] - 2026-01-05 ### Added - Added internal helper `grapheme_len/1` (internal) to centralize grapheme cluster length computation and avoid repetitive `string.to_graphemes |> list.length` patterns. diff --git a/README.md b/README.md index 8f6b7a3..765b690 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- str logo + str logo

str

@@ -327,6 +327,8 @@ gleam test python3 scripts/generate_character_tables.py ``` +Note: as of **1.2.3**, `escape_html` now uses the `houdini` library for fast, allocation‑friendly escaping, and `unescape_html` uses `odysseus` for comprehensive entity support (named, decimal and hex numeric entities). See [CHANGELOG.md](CHANGELOG.md) for details. + --- ## 📊 Test Coverage From d718fad25f7f67917769da6bc12b32dc1a974ec6 Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 16:36:15 +0100 Subject: [PATCH 06/11] Add CI workflow for Gleam project Introduces a GitHub Actions workflow to run tests and formatting checks on push and pull request events for master and main branches. The workflow sets up the BEAM environment, installs dependencies, runs tests, and checks code formatting. --- .github/workflows/ci.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f6e42b5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: test + +on: + push: + branches: + - master + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: erlef/setup-beam@v1 + with: + otp-version: "28" + gleam-version: "1.13.0" + rebar3-version: "3" + # elixir-version: "1" + - run: gleam deps download + - run: gleam test + - run: gleam format --check src test From 6576ec09e16cf394b325926f46534946c4a1e897 Mon Sep 17 00:00:00 2001 From: Daniele Date: Thu, 8 Jan 2026 19:29:37 +0100 Subject: [PATCH 07/11] Add CONTRIBUTING.md with contribution guidelines Introduces a CONTRIBUTING.md file outlining steps for contributing, commit conventions, PR checklist, setup instructions, and testing recommendations for the project. --- CONTRIBUTING.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..750ed0e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# Contributing to str + +Thanks for helping! Short, practical guide. + +## Quick start +- Fork, create a branch: `git switch -c feat/your-change`. +- Run `gleam format` and `gleam test` locally. +- Open a PR against `main` with a short description and tests. + +## Setup +- Requirements: Gleam (see `gleam.toml`) + +Commands: +```bash +gleam format +gleam test +``` + +## Commits +Use brief prefixes: `feat:`, `fix:`, `chore:`, `test:`, `perf:`. +Example: `feat(display): add truncate_display` +No strict enforcement, use these prefixes as a guideline, not a hard rule. + +## PR checklist +- [ ] Tests added/updated +- [ ] `gleam format` & `gleam test` pass +- [ ] Update `CHANGELOG.md` if behaviour changes +- [ ] Document noteworthy changes in `README.md` , docs/ or examples/ + +## Deprecations +- Report breaking changes in an issue and add migration notes in PRs. See `DEPRECATIONS.md` if present. + +## Testing +- Add unit tests for edge cases (ZWJ, skin tones, combining marks, CJK, ambiguous widths). From 7e52fb65dd2c3920fd89eb5bf4b5dd805ffb9c46 Mon Sep 17 00:00:00 2001 From: Daniele Date: Fri, 9 Jan 2026 07:46:41 +0100 Subject: [PATCH 08/11] Add extended HTML escape/unescape tests Introduces comprehensive tests for str.escape_html and str.unescape_html, covering basic entities, numeric and named entities, malformed and unknown entities, combined entities, Unicode/emoji roundtrips, and idempotence/double escaping. --- test/str_html_escape_extended_test.gleam | 63 ++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 test/str_html_escape_extended_test.gleam diff --git a/test/str_html_escape_extended_test.gleam b/test/str_html_escape_extended_test.gleam new file mode 100644 index 0000000..7542ccd --- /dev/null +++ b/test/str_html_escape_extended_test.gleam @@ -0,0 +1,63 @@ +import gleeunit +import str +import gleam/list + +pub fn main() -> Nil { + gleeunit.main() +} + +pub fn roundtrip_basic_entities_test() { + let cases = [ + "
Hello
", + "Tom & Jerry", + "Say \"hello\"", + "It's me", + "5 < 10 && 10 > 5", + "Ampersand: &", + ] + + list.fold(cases, True, fn(_, s) { + let escaped = str.escape_html(s) + let unescaped = str.unescape_html(escaped) + assert unescaped == s + True + }) +} + +pub fn numeric_and_named_entities_test() { + assert str.unescape_html("<>&''"") == "<>&''\"" + assert str.unescape_html("" and " and "") == "\" and \" and \"" + assert str.unescape_html("I like 'quotes'") == "I like 'quotes'" + assert str.unescape_html("Hex: '") == "Hex: '" +} + +pub fn malformed_and_unknown_entity_test() { + // Missing semicolon should remain unchanged + assert str.unescape_html("This & is broken") == "This & is broken" + + // Unknown entity should remain unchanged + assert str.unescape_html("This ¬anentity; remains") == "This ¬anentity; remains" +} + +pub fn combined_and_adjacent_entities_test() { + assert str.unescape_html("<< >>") == "<< >>" + assert str.unescape_html("&&&") == "&&&" +} + +pub fn unicode_and_emoji_roundtrip_test() { + let s = "Café — ️👩‍👩‍👧‍👦 \u{00A0}" + let escaped = str.escape_html(s) + // Expect unescape to restore the original (escape may not change emoji/nbspace) + assert str.unescape_html(escaped) == s +} + +pub fn idempotence_and_double_escape_test() { + let s = "&" + let once = str.escape_html(s) + let twice = str.escape_html(once) + assert once == "&" + assert twice == "&amp;" + // unescape decodes one level: "&amp;" -> "&"; double unescape restores original + assert str.unescape_html(twice) == "&" + assert str.unescape_html(str.unescape_html(twice)) == s +} From 801678b977a716252a8bf8dd2225c0a96960296a Mon Sep 17 00:00:00 2001 From: Daniele Date: Fri, 9 Jan 2026 08:30:59 +0100 Subject: [PATCH 09/11] Reorder imports and reformat test assertion Moved the gleam/list import to the top and reformatted a long assertion in malformed_and_unknown_entity_test for improved readability. (gleam format) --- test/str_html_escape_extended_test.gleam | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/str_html_escape_extended_test.gleam b/test/str_html_escape_extended_test.gleam index 7542ccd..0335224 100644 --- a/test/str_html_escape_extended_test.gleam +++ b/test/str_html_escape_extended_test.gleam @@ -1,6 +1,6 @@ +import gleam/list import gleeunit import str -import gleam/list pub fn main() -> Nil { gleeunit.main() @@ -36,7 +36,8 @@ pub fn malformed_and_unknown_entity_test() { assert str.unescape_html("This & is broken") == "This & is broken" // Unknown entity should remain unchanged - assert str.unescape_html("This ¬anentity; remains") == "This ¬anentity; remains" + assert str.unescape_html("This ¬anentity; remains") + == "This ¬anentity; remains" } pub fn combined_and_adjacent_entities_test() { From 0596c9eff33859056b7dae47c3d8b8669e45a0f5 Mon Sep 17 00:00:00 2001 From: Daniele Date: Fri, 9 Jan 2026 08:31:07 +0100 Subject: [PATCH 10/11] Remove unused links field from gleam.toml Deleted the 'links' field from gleam.toml as it is no longer needed. This cleans up the configuration file. --- gleam.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/gleam.toml b/gleam.toml index 4052702..80e9ec3 100644 --- a/gleam.toml +++ b/gleam.toml @@ -5,7 +5,6 @@ version = "1.2.3" description = "Unicode-aware string utilities for Gleam: grapheme-safe operations, pragmatic ASCII transliteration, and slug generation." licenses = ["MIT"] repository = { type = "github", user = "lupodevelop", repo = "str" } -links = [{ title = "Repository", href = "https://github.com/lupodevelop/str" }] # For a full reference of all the available options, see: # https://gleam.run/writing-gleam/gleam-toml/ From b586df5378789a92b238781ead942ad5e87b008c Mon Sep 17 00:00:00 2001 From: Daniele Date: Fri, 9 Jan 2026 08:39:26 +0100 Subject: [PATCH 11/11] Add fuzz test for HTML escape/unescape functions Introduces a deterministic fuzz test to verify that str.escape_html and str.unescape_html roundtrip correctly and that the escaped output does not contain raw angle brackets or quotes. --- test/str_html_escape_fuzz_test.gleam | 70 ++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 test/str_html_escape_fuzz_test.gleam diff --git a/test/str_html_escape_fuzz_test.gleam b/test/str_html_escape_fuzz_test.gleam new file mode 100644 index 0000000..5debba8 --- /dev/null +++ b/test/str_html_escape_fuzz_test.gleam @@ -0,0 +1,70 @@ +import gleeunit +import str +import gleam/list +import gleam/string + +pub fn main() -> Nil { + gleeunit.main() +} + +// Deterministic, simple generator over a token pool. +fn gen_token_pool() -> List(String) { + [ + "a","b","c","1","2","3"," ","\n","<",">","&","\"","'", + "&","<",">",""","'","'",""","¬anentity;", + "&","&","&#", "&#x", + "\u{00A0}", // NBSP + "Café","naïve","ø","漢","字", + "👩‍👩‍👧‍👦","👨‍👩‍👧","️","✈️","🏳️‍🌈", + "\u{0301}", // combining acute + "α","β","γ" + ] +} + +// Deterministic pseudo-random index using seed and i +fn idx_for(seed: Int, i: Int, len: Int) -> Int { + // simple LCG-ish formula; keep small to avoid large-int overhead + let v = seed * 1103515245 + 12345 + i + let v_pos = case v < 0 { True -> -v False -> v } + v_pos % len +} + +fn gen_string(seed: Int, tokens: List(String), n: Int) -> String { + let len = list.length(tokens) + let seq = list.range(0, n - 1) + seq + |> list.map(fn(i) { + let j = idx_for(seed, i, len) + case list.drop(tokens, j) { + [first, ..] -> first + [] -> "" + } + }) + |> list.fold("", fn(acc, s) { acc <> s }) +} + +fn run_cfg(seed: Int, n: Int, tokens: List(String)) -> Bool { + let s = gen_string(seed, tokens, n) + // Roundtrip: unescape(escape(s)) == s + let escaped = str.escape_html(s) + let unescaped = str.unescape_html(escaped) + assert unescaped == s + + // Escaped string must not contain raw angle brackets or quotes + assert string.contains(escaped, "<") == False + assert string.contains(escaped, ">") == False + assert string.contains(escaped, "\"") == False + assert string.contains(escaped, "'") == False + + True +} + +pub fn fuzz_roundtrip_test() { + let tokens = gen_token_pool() + + run_cfg(1, 20, tokens) + run_cfg(42, 50, tokens) + run_cfg(123, 200, tokens) + + True +}