Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci_js.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ jobs:
node-version: '24'

- name: Install wasm-bindgen-cli
run: cargo install wasm-bindgen-cli --version 0.2.114
uses: taiki-e/install-action@v2
with:
tool: wasm-bindgen-cli@0.2.114

- name: Install Node.js dependencies
run: npm ci
Expand Down
53 changes: 27 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![CI: Rust](https://github.com/aynumosir/ainu-utils/actions/workflows/ci_rust.yaml/badge.svg)](https://github.com/aynumosir/ainu-utils/actions/workflows/ci_rust.yaml)
[![codecov](https://codecov.io/gh/aynumosir/ainu-utils/graph/badge.svg?token=aQHfYRVtsd)](https://codecov.io/gh/aynumosir/ainu-utils)

A collection of utility for with the Ainu language
A toolkit for Ainu language processing, available in Rust, JavaScript, and Python.

## Releases

Expand All @@ -19,7 +19,7 @@ ainu-utils is distributed as a Rust crate, but you can also use its binding for

`ainu-utils` provides several features for working with the Ainu language:

### `tokenize`
### Tokenization

Tokenizes Ainu text into morphemes.

Expand All @@ -41,70 +41,71 @@ tokenize("irankarapte. e=iwanke ya?", { keepWhitespace: false });
// => ["irankarapte", ".", "e=", "iwanke", "ya?"]
```

### `to_kana`
### Syllabication

Converts Ainu text written in Latin script to Kana.
Parses Ainu text into syllables.

**Python:**

```py
from ainu_utils import to_kana
from ainu_utils import syllabicate

to_kana("irankarapte. e=iwanke ya?")
# => "イランカラㇷ゚テ。 エイワンケ ヤ?"
syllabicate("irankarapte. e=iwanke ya?")
# => ["i", "ran", "ka", "rap", "te", ".", " ", "e", "=", "i", "wan", "ke", " ", "ya", "?"]
```

**JS:**

```js
import { toKana } from "ainu-utils";
import { syllabicate } from "ainu-utils";

toKana("irankarapte. e=iwanke ya?");
// => "イランカラㇷ゚テ。 エイワンケ ヤ?"
syllabicate("irankarapte. e=iwanke ya?")
// => ["i", "ran", "ka", "rap", "te", ".", " ", "e", "=", "i", "wan", "ke", " ", "ya", "?"]
```

### `number_to_words`

Converts integers between 1 and 100 to Ainu words.
### Transliteration

Converts Ainu text written in Latin script to Kana.

**Python:**

```py
from ainu_utils import number_to_words
from ainu_utils import transliterate_to_kana

number_to_words(91)
# => "sine ikasma wan easiknehotne"
transliterate_to_kana("irankarapte. e=iwanke ya?")
# => "イランカラㇷ゚テ。 エイワンケ ヤ?"
```

**JS:**

```js
import { numberToWords } from "ainu-utils";
import { transliterateToKana } from "ainu-utils";

numberToWords(91);
// => "sine ikasma wan easiknehotne"
transliterateToKana("irankarapte. e=iwanke ya?");
// => "イランカラㇷ゚テ。 エイワンケ ヤ?"
```

### `syllabicate`
### Convertion from a number to words

Parses Ainu text into syllables.
Converts integers between 1 and 100 to Ainu words.

**Python:**

```py
from ainu_utils import syllabicate
from ainu_utils import number_to_words

syllabicate("irankarapte. e=iwanke ya?")
# => ["i", "ran", "ka", "rap", "te", ".", " ", "e", "=", "i", "wan", "ke", " ", "ya", "?"]
number_to_words(91)
# => "sine ikasma wan easiknehotne"
```

**JS:**

```js
import { syllabicate } from "ainu-utils";
import { numberToWords } from "ainu-utils";

syllabicate("irankarapte. e=iwanke ya?")
// => ["i", "ran", "ka", "rap", "te", ".", " ", "e", "=", "i", "wan", "ke", " ", "ya", "?"]
numberToWords(91);
// => "sine ikasma wan easiknehotne"
```

## License
Expand Down
4 changes: 2 additions & 2 deletions ainu-utils-js/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "ainu-utils-js"
version = "0.5.1"
version = "0.5.3"
edition = "2024"
description = "A collection of utilities for the Ainu language"
description = "A toolkit for Ainu language processing, available in Rust, JavaScript, and Python."
license = "MIT"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
16 changes: 8 additions & 8 deletions ainu-utils-js/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ pub fn tokenize(text: &str, options: JsValue) -> Vec<String> {
tokenizer::tokenize(text, keep_whitespace)
}

#[wasm_bindgen(js_name = toKana)]
pub fn to_kana(text: &str) -> String {
kana::to_kana(text)
#[wasm_bindgen]
pub fn syllabicate(text: &str) -> Vec<String> {
syllables::parse(text)
}

#[wasm_bindgen(js_name = transliterateToKana)]
pub fn transliterate_to_kana(text: &str) -> String {
kana::transliterate_to_kana(text)
}

#[wasm_bindgen(js_name = numberToWords)]
pub fn number_to_words(input: i32) -> String {
numbers::parse(input).unwrap().to_string()
}

#[wasm_bindgen]
pub fn syllabicate(text: &str) -> Vec<String> {
syllables::parse(text)
}
8 changes: 7 additions & 1 deletion ainu-utils-js/tests/index.spec.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import { test, expect } from "vitest";
import { tokenize } from "../dist/index.js";
import { tokenize, transliterateToKana } from "../dist/index.js";

test("tokenize", () => {
const tokens = tokenize("irankarapte. e=iwanke ya?", { keepWhitespace: false });
expect(tokens).toEqual(["irankarapte", ".", "e=", "iwanke", "ya", "?"]);
});


test("transliterateToKana", () => {
const tokens = transliterateToKana("irankarapte. e=iwanke ya?");
expect(tokens).toBe("イランカラㇷ゚テ。 エイワンケ ヤ?");
});
4 changes: 2 additions & 2 deletions ainu-utils-python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "ainu-utils-python"
version = "0.5.1"
version = "0.5.3"
edition = "2021"
description = "A collection of utilities for the Ainu language"
description = "A toolkit for Ainu language processing, available in Rust, JavaScript, and Python."
license = "MIT"

[lib]
Expand Down
4 changes: 2 additions & 2 deletions ainu-utils-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ features = ["pyo3/extension-module"]

[project]
name = "ainu-utils"
description = "A collection of utility for with the Ainu language"
description = "A toolkit for Ainu language processing"
requires-python = ">=3.8"
version = "0.5.1"
version = "0.5.3"
license = "MIT"

[project.optional-dependencies]
Expand Down
6 changes: 3 additions & 3 deletions ainu-utils-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ fn tokenize(text: &str, keep_whitespace: bool) -> Vec<String> {
}

#[pyfunction]
fn to_kana(text: &str) -> String {
ainu_utils_rust::kana::to_kana(text)
fn transliterate_to_kana(text: &str) -> String {
ainu_utils_rust::kana::transliterate_to_kana(text)
}

#[pyfunction]
Expand All @@ -26,7 +26,7 @@ fn syllabicate(text: &str) -> Vec<String> {
#[pymodule]
fn ainu_utils(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(tokenize, m)?)?;
m.add_function(wrap_pyfunction!(to_kana, m)?)?;
m.add_function(wrap_pyfunction!(transliterate_to_kana, m)?)?;
m.add_function(wrap_pyfunction!(number_to_words, m)?)?;
m.add_function(wrap_pyfunction!(syllabicate, m)?)?;
m.add("test_number", 123)?;
Expand Down
6 changes: 0 additions & 6 deletions ainu-utils-python/tests/test_to_kana.py

This file was deleted.

6 changes: 6 additions & 0 deletions ainu-utils-python/tests/test_transliterate_to_kana.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import ainu_utils


def test_transliterate_to_kana():
result = ainu_utils.transliterate_to_kana("irankarapte. e=iwanke ya?")
assert result == "イランカラㇷ゚テ。 エイワンケ ヤ?"
4 changes: 2 additions & 2 deletions ainu-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "ainu-utils"
version = "0.5.1"
version = "0.5.3"
edition = "2024"
description = "A collection of utilities for the Ainu language"
description = "A toolkit for Ainu language processing, available in Rust, JavaScript, and Python."
license = "MIT"

[dependencies]
Expand Down
16 changes: 11 additions & 5 deletions ainu-utils/src/kana/kana.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
use crate::normalizer::normalize;
use crate::phoneme::Phoneme;
use unicode_normalization::UnicodeNormalization;
use unicode_normalization::char::is_combining_mark;

use super::kana_linking::link;
use super::kana_map_c::map_c;
use super::kana_map_cv::map_cv;
use super::kana_map_punc::map_punc;
use super::kana_map_v::map_v;

pub fn to_kana(input: &str) -> String {
pub fn transliterate_to_kana(input: &str) -> String {
let mut input: String = input.to_string();
input = link(&input);

let words: Vec<&str> = input.split(' ').collect();
let mut output = String::new();

for word in words {
let kana = word_to_kana(word);
let kana = transliterate_word_to_kana(word);

if kana.chars().any(|c| c.is_ascii_alphabetic()) {
output += word;
Expand All @@ -31,10 +32,15 @@ pub fn to_kana(input: &str) -> String {
output
}

fn word_to_kana(input: &str) -> String {
fn strip_accents(input: &str) -> String {
input.nfkd().filter(|c| !is_combining_mark(*c)).collect()
}

fn transliterate_word_to_kana(input: &str) -> String {
let mut input: String = input.to_string();

input = normalize(&input);
input = input.to_lowercase();
input = strip_accents(&input);
input = map_punc(&input);

let chars: Vec<char> = input.chars().collect();
Expand Down
Loading
Loading