diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..f6e42b5
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,23 @@
+name: test
+
+on:
+ push:
+ branches:
+ - master
+ - main
+ pull_request:
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: erlef/setup-beam@v1
+ with:
+ otp-version: "28"
+ gleam-version: "1.13.0"
+ rebar3-version: "3"
+ # elixir-version: "1"
+ - run: gleam deps download
+ - run: gleam test
+ - run: gleam format --check src test
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 937bc90..20eed64 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,24 @@
All notable changes to this project are documented in this file.
+## [1.2.3] - 2026-01-08
+### Changed
+- Replaced `escape_html` implementation with `houdini.escape` for faster,
+ allocation-friendly HTML escaping.
+- Replaced `unescape_html` with `odysseus.unescape` for comprehensive HTML
+ entity unescaping (named entities, numeric decimal and hex entities).
+- Added dependencies: `houdini`, `odysseus`.
+
+### Tests
+- Added tests for HTML escape/unescape and numeric entities (decimal and hex).
+
+Contributed by: Daniele (`lupodevelop`)
+Suggested by: Louis Pilfold (`@lpil`)
+
+Suggested by: NNB (`@NNBnh`)
+Suggested change: updated README logo pointer to use the raw.githubusercontent URL
+(pointing to the repository commit) so the logo is resolvable on Hexdocs.
+
## [1.2.2] - 2026-01-05
### Added
- Added internal helper `grapheme_len/1` (internal) to centralize grapheme cluster length computation and avoid repetitive `string.to_graphemes |> list.length` patterns.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..750ed0e
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,34 @@
+# Contributing to str
+
+Thanks for helping! Short, practical guide.
+
+## Quick start
+- Fork, create a branch: `git switch -c feat/your-change`.
+- Run `gleam format` and `gleam test` locally.
+- Open a PR against `main` with a short description and tests.
+
+## Setup
+- Requirements: Gleam (see `gleam.toml`)
+
+Commands:
+```bash
+gleam format
+gleam test
+```
+
+## Commits
+Use brief prefixes: `feat:`, `fix:`, `chore:`, `test:`, `perf:`.
+Example: `feat(display): add truncate_display`
+No strict enforcement, use these prefixes as a guideline, not a hard rule.
+
+## PR checklist
+- [ ] Tests added/updated
+- [ ] `gleam format` & `gleam test` pass
+- [ ] Update `CHANGELOG.md` if behaviour changes
+- [ ] Document noteworthy changes in `README.md` , docs/ or examples/
+
+## Deprecations
+- Report breaking changes in an issue and add migration notes in PRs. See `DEPRECATIONS.md` if present.
+
+## Testing
+- Add unit tests for edge cases (ZWJ, skin tones, combining marks, CJK, ambiguous widths).
diff --git a/README.md b/README.md
index 8f6b7a3..765b690 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-
+
str
@@ -327,6 +327,8 @@ gleam test
python3 scripts/generate_character_tables.py
```
+Note: as of **1.2.3**, `escape_html` now uses the `houdini` library for fast, allocation‑friendly escaping, and `unescape_html` uses `odysseus` for comprehensive entity support (named, decimal and hex numeric entities). See [CHANGELOG.md](CHANGELOG.md) for details.
+
---
## 📊 Test Coverage
diff --git a/gleam.toml b/gleam.toml
index 7117b4f..80e9ec3 100644
--- a/gleam.toml
+++ b/gleam.toml
@@ -1,17 +1,18 @@
name = "str"
-version = "1.2.2"
+version = "1.2.3"
# Project metadata (fill or replace placeholders before publishing)
description = "Unicode-aware string utilities for Gleam: grapheme-safe operations, pragmatic ASCII transliteration, and slug generation."
licenses = ["MIT"]
repository = { type = "github", user = "lupodevelop", repo = "str" }
-links = [{ title = "Repository", href = "https://github.com/lupodevelop/str" }]
# For a full reference of all the available options, see:
# https://gleam.run/writing-gleam/gleam-toml/
[dependencies]
gleam_stdlib = ">= 0.44.0 and < 2.0.0"
+houdini = ">= 1.0.0 and < 2.0.0"
+odysseus = ">= 1.0.0 and < 2.0.0"
[dev-dependencies]
gleeunit = ">= 1.0.0 and < 2.0.0"
diff --git a/manifest.toml b/manifest.toml
index 61ab519..cf214d7 100644
--- a/manifest.toml
+++ b/manifest.toml
@@ -4,8 +4,12 @@
packages = [
{ name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" },
{ name = "gleeunit", version = "1.9.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "DA9553CE58B67924B3C631F96FE3370C49EB6D6DC6B384EC4862CC4AAA718F3C" },
+ { name = "houdini", version = "1.2.0", build_tools = ["gleam"], requirements = [], otp_app = "houdini", source = "hex", outer_checksum = "5DB1053F1AF828049C2B206D4403C18970ABEF5C18671CA3C2D2ED0DD64F6385" },
+ { name = "odysseus", version = "1.0.0", build_tools = ["gleam"], requirements = [], otp_app = "odysseus", source = "hex", outer_checksum = "6A97DA1075BDDEA8B60F47B1DFFAD49309FA27E73843F13A0AF32EA7087BA11C" },
]
[requirements]
gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" }
gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
+houdini = { version = ">= 1.0.0 and < 2.0.0" }
+odysseus = { version = ">= 1.0.0 and < 2.0.0" }
diff --git a/src/str/core.gleam b/src/str/core.gleam
index ad7304a..ad07923 100644
--- a/src/str/core.gleam
+++ b/src/str/core.gleam
@@ -13,6 +13,8 @@ import gleam/dict
import gleam/int
import gleam/list
import gleam/string
+import houdini
+import odysseus
import str/config
/// Detects if a grapheme cluster likely contains emoji components.
@@ -1766,12 +1768,7 @@ pub fn is_hex(text: String) -> Bool {
/// escape_html("Say \"hello\"") -> "Say "hello""
///
pub fn escape_html(text: String) -> String {
- text
- |> string.replace("&", "&")
- |> string.replace("<", "<")
- |> string.replace(">", ">")
- |> string.replace("\"", """)
- |> string.replace("'", "'")
+ houdini.escape(text)
}
/// Unescapes HTML entities to their character equivalents.
@@ -1781,12 +1778,7 @@ pub fn escape_html(text: String) -> String {
/// unescape_html("Tom & Jerry") -> "Tom & Jerry"
///
pub fn unescape_html(text: String) -> String {
- text
- |> string.replace("'", "'")
- |> string.replace(""", "\"")
- |> string.replace(">", ">")
- |> string.replace("<", "<")
- |> string.replace("&", "&")
+ odysseus.unescape(text)
}
/// Escapes regex metacharacters so the string can be used as a literal pattern.
diff --git a/test/str_html_escape_extended_test.gleam b/test/str_html_escape_extended_test.gleam
new file mode 100644
index 0000000..0335224
--- /dev/null
+++ b/test/str_html_escape_extended_test.gleam
@@ -0,0 +1,64 @@
+import gleam/list
+import gleeunit
+import str
+
+pub fn main() -> Nil {
+ gleeunit.main()
+}
+
+pub fn roundtrip_basic_entities_test() {
+ let cases = [
+ "Hello
",
+ "Tom & Jerry",
+ "Say \"hello\"",
+ "It's me",
+ "5 < 10 && 10 > 5",
+ "Ampersand: &",
+ ]
+
+ list.fold(cases, True, fn(_, s) {
+ let escaped = str.escape_html(s)
+ let unescaped = str.unescape_html(escaped)
+ assert unescaped == s
+ True
+ })
+}
+
+pub fn numeric_and_named_entities_test() {
+ assert str.unescape_html("<>&''"") == "<>&''\""
+ assert str.unescape_html("" and " and "") == "\" and \" and \""
+ assert str.unescape_html("I like 'quotes'") == "I like 'quotes'"
+ assert str.unescape_html("Hex: '") == "Hex: '"
+}
+
+pub fn malformed_and_unknown_entity_test() {
+ // Missing semicolon should remain unchanged
+ assert str.unescape_html("This & is broken") == "This & is broken"
+
+ // Unknown entity should remain unchanged
+ assert str.unescape_html("This ¬anentity; remains")
+ == "This ¬anentity; remains"
+}
+
+pub fn combined_and_adjacent_entities_test() {
+ assert str.unescape_html("<< >>") == "<< >>"
+ assert str.unescape_html("&&&") == "&&&"
+}
+
+pub fn unicode_and_emoji_roundtrip_test() {
+ let s = "Café — ️👩👩👧👦 \u{00A0}"
+ let escaped = str.escape_html(s)
+ // Expect unescape to restore the original (escape may not change emoji/nbspace)
+ assert str.unescape_html(escaped) == s
+}
+
+pub fn idempotence_and_double_escape_test() {
+ let s = "&"
+ let once = str.escape_html(s)
+ let twice = str.escape_html(once)
+ assert once == "&"
+ assert twice == "&"
+ // unescape decodes one level: "&" -> "&"; double unescape restores original
+ assert str.unescape_html(twice) == "&"
+ assert str.unescape_html(str.unescape_html(twice)) == s
+}
diff --git a/test/str_html_escape_fuzz_test.gleam b/test/str_html_escape_fuzz_test.gleam
new file mode 100644
index 0000000..5debba8
--- /dev/null
+++ b/test/str_html_escape_fuzz_test.gleam
@@ -0,0 +1,70 @@
+import gleeunit
+import str
+import gleam/list
+import gleam/string
+
+pub fn main() -> Nil {
+ gleeunit.main()
+}
+
+// Deterministic, simple generator over a token pool.
+fn gen_token_pool() -> List(String) {
+ [
+ "a","b","c","1","2","3"," ","\n","<",">","&","\"","'",
+ "&","<",">",""","'","'",""","¬anentity;",
+ "&","&","", "",
+ "\u{00A0}", // NBSP
+ "Café","naïve","ø","漢","字",
+ "👩👩👧👦","👨👩👧","️","✈️","🏳️🌈",
+ "\u{0301}", // combining acute
+ "α","β","γ"
+ ]
+}
+
+// Deterministic pseudo-random index using seed and i
+fn idx_for(seed: Int, i: Int, len: Int) -> Int {
+ // simple LCG-ish formula; keep small to avoid large-int overhead
+ let v = seed * 1103515245 + 12345 + i
+ let v_pos = case v < 0 { True -> -v False -> v }
+ v_pos % len
+}
+
+fn gen_string(seed: Int, tokens: List(String), n: Int) -> String {
+ let len = list.length(tokens)
+ let seq = list.range(0, n - 1)
+ seq
+ |> list.map(fn(i) {
+ let j = idx_for(seed, i, len)
+ case list.drop(tokens, j) {
+ [first, ..] -> first
+ [] -> ""
+ }
+ })
+ |> list.fold("", fn(acc, s) { acc <> s })
+}
+
+fn run_cfg(seed: Int, n: Int, tokens: List(String)) -> Bool {
+ let s = gen_string(seed, tokens, n)
+ // Roundtrip: unescape(escape(s)) == s
+ let escaped = str.escape_html(s)
+ let unescaped = str.unescape_html(escaped)
+ assert unescaped == s
+
+ // Escaped string must not contain raw angle brackets or quotes
+ assert string.contains(escaped, "<") == False
+ assert string.contains(escaped, ">") == False
+ assert string.contains(escaped, "\"") == False
+ assert string.contains(escaped, "'") == False
+
+ True
+}
+
+pub fn fuzz_roundtrip_test() {
+ let tokens = gen_token_pool()
+
+ run_cfg(1, 20, tokens)
+ run_cfg(42, 50, tokens)
+ run_cfg(123, 200, tokens)
+
+ True
+}
diff --git a/test/str_html_escape_test.gleam b/test/str_html_escape_test.gleam
new file mode 100644
index 0000000..ae1c9cb
--- /dev/null
+++ b/test/str_html_escape_test.gleam
@@ -0,0 +1,31 @@
+import str
+
+pub fn escape_basic_test() {
+ assert str.escape_html("Hello
") == "<div>Hello</div>"
+ assert str.escape_html("Tom & Jerry") == "Tom & Jerry"
+ assert str.escape_html("Say \"hello\"") == "Say "hello""
+}
+
+pub fn unescape_basic_test() {
+ assert str.unescape_html("<div>") == ""
+ assert str.unescape_html("Tom & Jerry") == "Tom & Jerry"
+ assert str.unescape_html("Say "hello"") == "Say \"hello\""
+ assert str.unescape_html("It's me") == "It's me"
+}
+
+pub fn roundtrip_test() {
+ let s = "Hello & < > \""
+ let escaped = str.escape_html(s)
+ assert str.unescape_html(escaped) == s
+}
+
+pub fn numeric_entities_test() {
+ // Decimal numeric entity
+ assert str.unescape_html("I like 'quotes'") == "I like 'quotes'"
+
+ // Hex numeric entity
+ assert str.unescape_html("Hex: '") == "Hex: '"
+
+ // Double quote numeric and hex
+ assert str.unescape_html("" and " and "") == "\" and \" and \""
+}