Skip to content
This repository was archived by the owner on Nov 27, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"size": "size-limit",
"analyze": "size-limit --why"
},
"peerDependencies": {},
"type": "module",
"husky": {
"hooks": {
"pre-commit": "tsdx lint"
Expand All @@ -45,17 +45,15 @@
}
],
"devDependencies": {
"@size-limit/preset-small-lib": "^7.0.8",
"@size-limit/preset-small-lib": "^11.0.2",
"@types/chai": "^4.3.0",
"@types/jest": "^27.4.0",
"@types/mocha": "^9.1.0",
"husky": "^7.0.4",
"size-limit": "^7.0.8",
"@types/jest": "^29.5.12",
"@types/mocha": "^10.0.6",
"husky": "^9.0.11",
"size-limit": "^11.0.2",
"tsdx": "^0.14.1",
"tslib": "^2.3.1",
"typescript": "^4.5.5"
},
"dependencies": {
"chai": "^4.3.6"
"typescript": "^5.3.3",
"chai": "^5.1.0"
}
}
58 changes: 55 additions & 3 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,26 +154,78 @@ export const DUTCH_MAGNITUDE = {
quintiljard: 1000000000000000000000000000000000,
};

export const GERMAN_UNIT = {
null: 0,
ein: 1,
eins: 1,
elf: 11,
zwölf: 12,
zwei: 2,
zwo:2,
dreizehn: 13,
drei: 3,
vierzehn: 14,
vier: 4,
fünf: 5,
fünzehn: 15,
sechs: 6,
sechzehn: 16,
siebzehn: 17,
sieben: 7,
achtzehn: 18,
acht: 8,
neunzehn: 19,
neun: 9,
zehn: 10
};

export const GERMAN_TEN = {
zwanzig: 20,
dreissig: 30,
dreißig: 30,
vierzig: 40,
fünfzig: 50,
sechzig: 60,
siebzig: 70,
achtzig: 80,
neunzig: 90,
};

export const GERMAN_MAGNITUDE = {
hundert: 100,
tausend: 1000,
million: 1000000,
millionen: 1000000,
milliarde: 1000000000,
billion: 1000000000000,
billiarde: 1000000000000000,
trillion: 1000000000000000000
};

export const NUMBER = {
...ENGLISH_UNIT,
...ENGLISH_TEN,
...ENGLISH_MAGNITUDE,
...DUTCH_UNIT,
...DUTCH_TEN,
...DUTCH_MAGNITUDE,
...GERMAN_UNIT,
...GERMAN_TEN,
...GERMAN_MAGNITUDE,
};

export const UNIT_KEYS = Object.keys({ ...ENGLISH_UNIT, ...DUTCH_UNIT });
export const TEN_KEYS = Object.keys({ ...ENGLISH_TEN, ...DUTCH_TEN });
export const UNIT_KEYS = Object.keys({ ...ENGLISH_UNIT, ...DUTCH_UNIT, ...GERMAN_UNIT });
export const TEN_KEYS = Object.keys({ ...ENGLISH_TEN, ...DUTCH_TEN, ...GERMAN_TEN });
export const MAGNITUDE_KEYS = Object.keys({
...ENGLISH_MAGNITUDE,
...DUTCH_MAGNITUDE,
...GERMAN_MAGNITUDE,
});

//@ts-ignore
export const NUMBER_WORDS = [...UNIT_KEYS, ...TEN_KEYS, ...MAGNITUDE_KEYS];

export const JOINERS = ['and', 'en'];
export const JOINERS = ['and', 'en', 'und'];
export const DECIMALS = ['point', 'dot', 'komma', 'punt'];

export const PUNCTUATION = [
Expand Down
108 changes: 108 additions & 0 deletions src/modifiers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ import {
DUTCH_MAGNITUDE,
DUTCH_TEN,
DUTCH_UNIT,
GERMAN_MAGNITUDE,
GERMAN_TEN,
GERMAN_UNIT,
ENGLISH_MAGNITUDE,
ENGLISH_SPECIFIC_SPLIT,
ENGLISH_TEN,
Expand Down Expand Up @@ -121,6 +124,111 @@ function predict(
return { longestStart, longestEnd };
}

export const modifyGerman = (chunk: string): string | string[] => {
const units = [...Object.keys(GERMAN_UNIT)];
const tens = [...Object.keys(GERMAN_TEN)];
const magnitudes = [...Object.keys(GERMAN_MAGNITUDE)];

if (
units.includes(chunk) ||
tens.includes(chunk) ||
magnitudes.includes(chunk)
) {
return chunk; //This chunk is already a whole number that doesn't need converting
}

const possibleUnits: string[] = units.filter(unit => chunk.includes(unit));
const possibleTens: string[] = tens.filter(unit => chunk.includes(unit));
const possibleMagnitudes: string[] = magnitudes.filter(unit =>
chunk.includes(unit)
);

const possibilities: Possibility[] = calculatePossibilities(
possibleUnits,
possibleTens,
possibleMagnitudes,
chunk
);
let numbers: Possibility[] = [];

// Filter out smaller units
for (const possibility of possibilities) {
switch (possibility.type) {
case TOKEN_TYPE.UNIT: {
const nested = possibilities.find(p => {
return (
p.value !== possibility.value &&
p.start <= possibility.start &&
p.end >= possibility.end
);
});
if (!nested) {
numbers.push(possibility);
}
break;
}
case TOKEN_TYPE.TEN: {
const hasAdjective = possibilities.find(p => {
return (
p.value.includes(possibility.value) &&
p.start === possibility.start &&
p.end > possibility.end
);
});
if (!hasAdjective) numbers.push(possibility);
break;
}
default:
numbers.push(possibility);
break;
}
}

numbers = numbers.sort((a, b) => {
return a.start - b.start;
});

const result: string[] = [];
for (let i = 0; i < numbers.length; i++) {
const previous = numbers[i - 1];
const next = numbers[i + 1];
const number = numbers[i];
if (!previous) {
if(!next) {
result.push(number.value);
} else if (
number.type === TOKEN_TYPE.UNIT &&
next.type === TOKEN_TYPE.MAGNITUDE
) {
result.push(number.value);
} else if (
number.type === TOKEN_TYPE.UNIT &&
next.type === TOKEN_TYPE.TEN
) {
result.push(next.value);
} else {
result.push(number.value);
}
} else {
if (number.type === TOKEN_TYPE.TEN && previous.type === TOKEN_TYPE.UNIT) {
result.push(previous.value);
} else if (
number.type === TOKEN_TYPE.MAGNITUDE &&
previous.type === TOKEN_TYPE.UNIT
) {
result.push(number.value);
} else if (
number.type === TOKEN_TYPE.UNIT &&
next &&
next.type === TOKEN_TYPE.TEN
) {
result.push(next.value);
} else result.push(number.value);
}
}
return result;
};

/**
* Changes the lexicongraphy of dutch chunks so the compiler understands which number
* Much more difficult then the english modifiers since dutch has multiple numbers in a piece of text
Expand Down
5 changes: 4 additions & 1 deletion src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
TOKEN_TYPE,
UNIT_KEYS,
} from './constants';
import { modifyDutch, modifyEnglish } from './modifiers';
import { modifyDutch, modifyEnglish, modifyGerman } from './modifiers';
import { HANDLE_TOKEN, Languages, Region, SubRegion, Token } from './types';

/**
Expand Down Expand Up @@ -274,6 +274,9 @@ export const parser = (text: string, language: Languages): Region[] => {
case Languages['nl-nl']:
splitted = modifyDutch(currentValue);
break;
case Languages['de-de']:
splitted = modifyGerman(currentValue);
break;
case Languages['en-us']:
splitted = modifyEnglish(currentValue);
break;
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { TOKEN_TYPE } from './constants';
export enum Languages {
'nl-nl' = 'nl-nl',
'en-us' = 'en-us',
'de-de' = 'de-de',
}
/**
* Token is a 'substring' of a sentence, it includes the position of the substring and checks if this substring is 'word' that needs converting to a number.
Expand Down
Loading