Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Tokenizes Ainu text into morphemes.
```py
from ainu_utils import tokenize

tokenize("irankarapte. e=iwanke ya?", False)
tokenize("irankarapte. e=iwanke ya?", keep_whitespace=False)
# => ["irankarapte", ".", "e=", "iwanke", "ya?"]
```

Expand All @@ -37,7 +37,7 @@ tokenize("irankarapte. e=iwanke ya?", False)
```js
import { tokenize } from "ainu-utils";

tokenize("irankarapte. e=iwanke ya?", false);
tokenize("irankarapte. e=iwanke ya?", { keepWhitespace: false });
// => ["irankarapte", ".", "e=", "iwanke", "ya?"]
```

Expand All @@ -57,9 +57,9 @@ to_kana("irankarapte. e=iwanke ya?")
**JS:**

```js
import { to_kana } from "ainu-utils";
import { toKana } from "ainu-utils";

to_kana("irankarapte. e=iwanke ya?");
toKana("irankarapte. e=iwanke ya?");
// => "イランカラㇷ゚テ。 エイワンケ ヤ?"
```

Expand All @@ -79,9 +79,9 @@ number_to_words(91)
**JS:**

```js
import { number_to_words } from "ainu-utils";
import { numberToWords } from "ainu-utils";

number_to_words(91);
numberToWords(91);
// => "sine ikasma wan easiknehotne"
```

Expand Down
1 change: 1 addition & 0 deletions ainu-utils-js/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ crate-type = ["cdylib", "rlib"]
[dependencies]
ainu-utils = { path = "../ainu-utils" }
wasm-bindgen = "0.2.114"
js-sys = "0.3"
11 changes: 8 additions & 3 deletions ainu-utils-js/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
use ainu_utils::{kana, numbers, syllables, tokenizer};
use js_sys::Reflect;
use wasm_bindgen::prelude::*;

#[wasm_bindgen]
pub fn tokenize(text: &str, keep_whitespace: bool) -> Vec<String> {
pub fn tokenize(text: &str, options: JsValue) -> Vec<String> {
let keep_whitespace = Reflect::get(&options, &JsValue::from_str("keepWhitespace"))
.ok()
.and_then(|v| v.as_bool())
.unwrap_or(false);
tokenizer::tokenize(text, keep_whitespace)
}

#[wasm_bindgen]
#[wasm_bindgen(js_name = toKana)]
pub fn to_kana(text: &str) -> String {
kana::to_kana(text)
}

#[wasm_bindgen]
#[wasm_bindgen(js_name = numberToWords)]
pub fn number_to_words(input: i32) -> String {
numbers::parse(input).unwrap().to_string()
}
Expand Down
2 changes: 1 addition & 1 deletion ainu-utils-js/tests/index.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ import { test, expect } from "vitest";
import { tokenize } from "../dist/index.js";

test("tokenize", () => {
const tokens = tokenize("irankarapte. e=iwanke ya?", false);
const tokens = tokenize("irankarapte. e=iwanke ya?", { keepWhitespace: false });
expect(tokens).toEqual(["irankarapte", ".", "e=", "iwanke", "ya", "?"]);
});
2 changes: 1 addition & 1 deletion ainu-utils-python/ainu_utils.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://www.maturin.rs/project_layout#adding-python-type-information
def tokenize(text: str, keep_whitespace: bool) -> list[str]: ...
def tokenize(text: str, *, keep_whitespace: bool = False) -> list[str]: ...
def to_kana(text: str) -> str: ...
def number_to_words(number: int) -> str: ...
def syllabicate(text: str) -> list[str]: ...
1 change: 1 addition & 0 deletions ainu-utils-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ extern crate ainu_utils as ainu_utils_rust;
use pyo3::prelude::*;

#[pyfunction]
#[pyo3(signature = (text, *, keep_whitespace = false))]
fn tokenize(text: &str, keep_whitespace: bool) -> Vec<String> {
ainu_utils_rust::tokenizer::tokenize(text, keep_whitespace)
}
Expand Down
Loading