From 974ad889fb3083374e361ac16325258799c3c45f Mon Sep 17 00:00:00 2001 From: afonsof Date: Mon, 13 Nov 2023 17:39:07 -0300 Subject: [PATCH 1/3] feat: add support to portuguese language --- src/compiler.ts | 14 ++- src/constants.ts | 239 +++++++++++++++++++++++++++++++++++++++- src/modifiers.ts | 105 ++++++++++++++++++ src/parser.ts | 34 +++--- src/types.ts | 1 + test/portuguese.spec.ts | 116 +++++++++++++++++++ 6 files changed, 486 insertions(+), 23 deletions(-) create mode 100644 test/portuguese.spec.ts diff --git a/src/compiler.ts b/src/compiler.ts index 8a85a80..3c3d14d 100644 --- a/src/compiler.ts +++ b/src/compiler.ts @@ -1,6 +1,6 @@ -import { NUMBER, TOKEN_TYPE } from './constants'; -import { Languages, Region, SubRegion } from './types'; -import { splice } from './util'; +import {NUMBER, TOKEN_TYPE} from './constants'; +import {Languages, Region, SubRegion} from './types'; +import {splice} from './util'; export type Options = Partial<{ numbersOnly: boolean; @@ -39,11 +39,17 @@ export const compileSubRegion = (subRegion: SubRegion, decimal: boolean = false) throw 'SHOULD HAVE 1 TOKEN! SUBREGION TYPE = TEN'; sum += convertChunkToNumber(subRegion.tokens[0].lowerCaseValue); break; + case TOKEN_TYPE.HUNDRED: + if (subRegion.tokens.length > 1 || subRegion.tokens.length === 0) + throw 'SHOULD HAVE 1 TOKEN! SUBREGION TYPE = HUNDRED'; + sum += convertChunkToNumber(subRegion.tokens[0].lowerCaseValue); + break; case TOKEN_TYPE.MAGNITUDE: subRegion.tokens.map(token => { switch (token.type) { case TOKEN_TYPE.UNIT: case TOKEN_TYPE.TEN: + case TOKEN_TYPE.HUNDRED: sum += convertChunkToNumber(token.lowerCaseValue); break; case TOKEN_TYPE.MAGNITUDE: @@ -194,7 +200,7 @@ export const compiler = ( }); return parseFloat(temp); } - if (regions[0].end - regions[0].start === text.length - 1) { + if (regions.length && regions[0].end - regions[0].start === text.length - 1) { return compileRegion(regions[0]); } return replaceRegionsInText(regions, text); diff --git a/src/constants.ts b/src/constants.ts index 4987bc2..5420bd3 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -41,6 +41,79 @@ export const ENGLISH_UNIT = { a: 1, }; +export const PORTUGUESE_UNIT = { + zero: 0, + + um: 1, + uma: 1, + primeiro: 1, + primeira: 1, + primeiros: 1, + primeiras: 1, + + dois: 2, + duas: 2, + segundo: 2, + segunda: 2, + segundos: 2, + segundas: 2, + + tres: 3, + terceiro: 3, + terceira: 3, + terceiros: 3, + terceiras: 3, + + quatro: 4, + quarto: 4, + quarta: 4, + quartos: 4, + quartas: 4, + + cinco: 5, + quinto: 5, + quinta: 5, + quintos: 5, + quintas: 5, + + seis: 6, + sexto: 6, + sexta: 6, + sextos: 6, + sextas: 6, + + sete: 7, + setimo: 7, + setima: 7, + setimos: 7, + setimas: 7, + + oito: 8, + oitavo: 8, + oitava: 8, + oitavos: 8, + oitavas: 8, + + nove: 9, + nono: 9, + nona: 9, + nonos: 9, + nonas: 9, + + dez: 10, + + onze: 11, + doze: 12, + treze: 13, + quatorze: 14, + catorze: 14, + quinze: 15, + dezesseis: 16, + dezessete: 17, + dezoito: 18, + dezenove: 19, +} + export const ENGLISH_TEN = { twenty: 20, twentieth: 20, @@ -60,6 +133,126 @@ export const ENGLISH_TEN = { ninetieth: 90, }; +export const PORTUGUESE_TEN = { + decima: 10, + decimo: 10, + decimos: 10, + decimas: 10, + + vinte: 20, + vigesimo: 20, + vigesima: 20, + vigesimos: 20, + vigesimas: 20, + + trinta: 30, + trigesimo: 30, + trigesima: 30, + trigesimos: 30, + trigesimas: 30, + + quarenta: 40, + quadragesimo: 40, + quadragesima: 40, + quadragesimos: 40, + quadragesimas: 40, + + cinquenta: 50, + quinquagesimo: 50, + quinquagesima: 50, + quinquagesimos: 50, + quinquagesimas: 50, + + sessenta: 60, + sexagesimo: 60, + sexagesima: 60, + sexagesimos: 60, + sexagesimas: 60, + + setenta: 70, + septuagesimo: 70, + septuagesima: 70, + septuagesimos: 70, + septuagesimas: 70, + + oitenta: 80, + octogesimo: 80, + octogesima: 80, + octogesimos: 80, + octogesimas: 80, + + noventa: 90, + nonagesimo: 90, + nonagesima: 90, + nonagesimos: 90, + nonagesimas: 90, +}; + +export const PORTUGUESE_HUNDRED = { + cem: 100, + cento: 100, + centesimo: 100, + centesima: 100, + centesimos: 100, + centesimas: 100, + + duzentos: 200, + duzentas: 200, + duzentesimo: 200, + duzentesima: 200, + duzentesimos: 200, + duzentesimas: 200, + + trezentos: 300, + trezentas: 300, + trecentesimo: 300, + trecentesima: 300, + trecentesimos: 300, + trecentesimas: 300, + + quatrocentos: 400, + quatrocentas: 400, + quatrocentesimo: 400, + quatrocentesima: 400, + quatrocentesimos: 400, + quatrocentesimas: 400, + + quinhentos: 500, + quinhentas: 500, + quingentesimo: 500, + quingentesima: 500, + quingentesimos: 500, + quingentesimas: 500, + + seiscentos: 600, + seiscentas: 600, + sexcentesimo: 600, + sexcentesima: 600, + sexcentesimos: 600, + sexcentesimas: 600, + + setecentos: 700, + setecentas: 700, + septingentesimo: 700, + septingentesima: 700, + septingentesimos: 700, + septingentesimas: 700, + + oitocentos: 800, + oitocentas: 800, + octingentesimo: 800, + octingentesima: 800, + octingentesimos: 800, + octingentesimas: 800, + + novecentos: 900, + novecentas: 900, + nongentesimo: 900, + nongentesima: 900, + noningentesimos: 900, + noningentesimas: 900, +} + export const ENGLISH_MAGNITUDE = { hundred: 100, hundredth: 100, @@ -76,6 +269,35 @@ export const ENGLISH_MAGNITUDE = { decillion: 1000000000000000000000000000000000, }; +export const PORTUGUESE_MAGNITUDE = { + mil: 1000, + milesimo: 1000, + milesima: 1000, + milesimos: 1000, + milesimas: 1000, + + milhao: 1000000, + milhoes: 1000000, + milionesimo: 1000000, + milionesima: 1000000, + milionesimos: 1000000, + milionesimas: 1000000, + + bilhao: 1000000000, + bilhoes: 1000000000, + bilionesimo: 1000000000, + bilionesima: 1000000000, + bilionesimos: 1000000000, + bilionesimas: 1000000000, + + trilhao: 1000000000000, + trilhoes: 1000000000000, + trilionesimo: 1000000000000, + trilionesima: 1000000000000, + trilionesimos: 1000000000000, + trilionesimas: 1000000000000, +} + export const DUTCH_UNIT = { nul: 0, eerste: 1, @@ -161,20 +383,26 @@ export const NUMBER = { ...DUTCH_UNIT, ...DUTCH_TEN, ...DUTCH_MAGNITUDE, + ...PORTUGUESE_UNIT, + ...PORTUGUESE_TEN, + ...PORTUGUESE_MAGNITUDE, + ...PORTUGUESE_HUNDRED, }; -export const UNIT_KEYS = Object.keys({ ...ENGLISH_UNIT, ...DUTCH_UNIT }); -export const TEN_KEYS = Object.keys({ ...ENGLISH_TEN, ...DUTCH_TEN }); +export const UNIT_KEYS = Object.keys({...ENGLISH_UNIT, ...DUTCH_UNIT, ...PORTUGUESE_UNIT}); +export const TEN_KEYS = Object.keys({...ENGLISH_TEN, ...DUTCH_TEN, ...PORTUGUESE_TEN}); +export const HUNDRED_KEYS = Object.keys({...PORTUGUESE_HUNDRED}); export const MAGNITUDE_KEYS = Object.keys({ ...ENGLISH_MAGNITUDE, ...DUTCH_MAGNITUDE, + ...PORTUGUESE_MAGNITUDE, }); //@ts-ignore -export const NUMBER_WORDS = [...UNIT_KEYS, ...TEN_KEYS, ...MAGNITUDE_KEYS]; +export const NUMBER_WORDS = [...UNIT_KEYS, ...TEN_KEYS, ...MAGNITUDE_KEYS, ...Object.keys(PORTUGUESE_HUNDRED)]; -export const JOINERS = ['and', 'en']; -export const DECIMALS = ['point', 'dot', 'komma', 'punt']; +export const JOINERS = ['and', 'en', 'e']; +export const DECIMALS = ['point', 'dot', 'komma', 'punt', 'virgula']; export const PUNCTUATION = [ '.', @@ -207,6 +435,7 @@ export enum TOKEN_TYPE { TEN = 'TEN', MAGNITUDE = 'MAGNITUDE', DECIMAL = 'DECIMAL', + HUNDRED = 'HUNDRED', } export const ALL_WORDS = [...NUMBER_WORDS, ...JOINERS, ...DECIMALS]; diff --git a/src/modifiers.ts b/src/modifiers.ts index 9324836..1dd3526 100644 --- a/src/modifiers.ts +++ b/src/modifiers.ts @@ -6,6 +6,9 @@ import { ENGLISH_SPECIFIC_SPLIT, ENGLISH_TEN, ENGLISH_UNIT, + PORTUGUESE_MAGNITUDE, + PORTUGUESE_TEN, + PORTUGUESE_UNIT, TOKEN_TYPE, } from './constants'; import { getAllIndexes } from './util'; @@ -22,6 +25,7 @@ type Possibility = { * * @param possibleUnits Language specific units * @param possibleTens Language specific tens + * @param possibleHundreds Language specific hundreds * @param possibleMagnitudes Language specific magnitudes * @param chunk A piece of the text * @returns {Possibility[]} @@ -29,6 +33,7 @@ type Possibility = { function calculatePossibilities( possibleUnits: string[], possibleTens: string[], + possibleHundreds: string[], possibleMagnitudes: string[], chunk: string ): Possibility[] { @@ -59,6 +64,19 @@ function calculatePossibilities( }); }); + possibleHundreds.forEach(possibility => { + const indexes = getAllIndexes(chunk, possibility); + indexes.forEach(start => { + const end = start + possibility.length - 1; + possibilities.push({ + start, + end, + type: TOKEN_TYPE.HUNDRED, + value: possibility, + }); + }); + }); + possibleMagnitudes.forEach(possibility => { const indexes = getAllIndexes(chunk, possibility); indexes.forEach(start => { @@ -151,6 +169,7 @@ export const modifyDutch = (chunk: string): string | string[] => { const possibilities: Possibility[] = calculatePossibilities( possibleUnits, possibleTens, + [], possibleMagnitudes, chunk ); @@ -272,6 +291,89 @@ export const modifyEnglish = (chunk: string): string | string[] => { const possibilities: Possibility[] = calculatePossibilities( possibleUnits, possibleTens, + [], + possibleMagnitudes, + chunk + ); + //Check which possibilities DO NOT OVERLAP and are valid. + if (possibilities.length >= 2) { + const { longestStart, longestEnd } = predict(possibilities, chunk); + if (!longestStart || !longestEnd) return []; + //Pick possibilities with shortest distance between start and end + if (longestStart.end === longestEnd.start - 1) { + //No Splitter in this chunk + return [longestStart.value, longestEnd.value]; + } else { + // ! IMPORTANT: ENGLISH SPECIFIC SPLITTERS + //Splitter in this chunk + //@ts-ignore + let possibleSplitter: Possibility = null; + const possible = ENGLISH_SPECIFIC_SPLIT.some(splitter => { + const index = chunk.indexOf(splitter, longestStart.end); + if (index !== -1) { + possibleSplitter = { + start: index, + end: index + splitter.length - 1, + type: 'splitter', + value: splitter, + }; + return ( + longestStart.start === 0 && + longestStart.end < possibleSplitter.start && + possibleSplitter.end < longestEnd.start && + longestEnd.end === chunk.length - 1 + ); + } + return false; + }); + + if (possible) { + //Perfect match + // ! ENGLISH SPECIFIC + if ( + longestStart.type === TOKEN_TYPE.TEN && + longestEnd.type === TOKEN_TYPE.UNIT + ) + return [longestStart.value, longestEnd.value]; + if ( + longestStart.type === TOKEN_TYPE.UNIT && + longestEnd.type === TOKEN_TYPE.MAGNITUDE + ) + return [longestStart.value, longestEnd.value]; + } else { + console.log(longestStart, longestEnd, possibleSplitter, chunk); + throw 'CANNOT PARSE CHUNK INTO NUMBER (ENGLISH: CANNOT FIND A GOOD SPLITTER)'; + } + } + } +}; + +export const modifyPortuguese = (chunk: string): string | string[] | undefined => { + const units = [...Object.keys(PORTUGUESE_UNIT)]; + const tens = [...Object.keys(PORTUGUESE_TEN)]; + const magnitudes = [...Object.keys(PORTUGUESE_MAGNITUDE)]; + const hundreds = [...Object.keys(PORTUGUESE_UNIT)]; + + if ( + units.includes(chunk) || + tens.includes(chunk) || + hundreds.includes(chunk) || + magnitudes.includes(chunk) + ) { + return chunk; //This chunk is already a whole number that doesnt need converting + } + + const possibleUnits: string[] = units.filter(unit => chunk.includes(unit)); + const possibleTens: string[] = tens.filter(ten => chunk.includes(ten)); + const possibleHundreds: string[] = hundreds.filter(hundred => chunk.includes(hundred)); + const possibleMagnitudes: string[] = magnitudes.filter(unit => + chunk.includes(unit) + ); + + const possibilities: Possibility[] = calculatePossibilities( + possibleUnits, + possibleTens, + possibleHundreds, possibleMagnitudes, chunk ); @@ -326,8 +428,11 @@ export const modifyEnglish = (chunk: string): string | string[] => { } } } + return; }; + + // ! MY FIRST VERSION, KINDA WORKED BUT UNREADBLE PIECE OF GARABGE, SHOULD KEEP IT FOR REFERENCE/FUTURE // /** // * Custom made, never part of the original package. diff --git a/src/parser.ts b/src/parser.ts index 9058a6b..3c19613 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,5 @@ /* eslint-disable no-extra-parens */ -import { compileSubRegion } from './compiler'; +import {compileSubRegion} from './compiler'; import { BLACKLIST_SINGULAR_WORDS, DECIMALS, @@ -8,11 +8,12 @@ import { NUMBER_WORDS, PUNCTUATION, TEN_KEYS, + HUNDRED_KEYS, TOKEN_TYPE, UNIT_KEYS, } from './constants'; -import { modifyDutch, modifyEnglish } from './modifiers'; -import { HANDLE_TOKEN, Languages, Region, SubRegion, Token } from './types'; +import {modifyDutch, modifyEnglish, modifyPortuguese} from './modifiers'; +import {HANDLE_TOKEN, Languages, Region, SubRegion, Token} from './types'; /** * Check if token can be appened to the sub region @@ -21,7 +22,7 @@ import { HANDLE_TOKEN, Languages, Region, SubRegion, Token } from './types'; * @returns */ const canAddTokenToEndOfSubRegion = (subRegion: SubRegion, currentToken: Token) => { - const { tokens } = subRegion; + const {tokens} = subRegion; const prevToken = tokens[0]; if (!prevToken) return true; if (prevToken.type === TOKEN_TYPE.DECIMAL) return false; @@ -51,9 +52,9 @@ const canAddTokenToEndOfSubRegion = (subRegion: SubRegion, currentToken: Token) * @returns {type: TOKEN_TYPE} */ const getSubRegionType = (subRegion: SubRegion, currentToken: Token): { type: TOKEN_TYPE } => { - if (currentToken.type === TOKEN_TYPE.DECIMAL) return { type: TOKEN_TYPE.DECIMAL }; - if (subRegion && subRegion.type === TOKEN_TYPE.MAGNITUDE) return { type: TOKEN_TYPE.MAGNITUDE }; - return { type: currentToken.type }; + if (currentToken.type === TOKEN_TYPE.DECIMAL) return {type: TOKEN_TYPE.DECIMAL}; + if (subRegion && subRegion.type === TOKEN_TYPE.MAGNITUDE) return {type: TOKEN_TYPE.MAGNITUDE}; + return {type: currentToken.type}; }; /** @@ -63,12 +64,12 @@ const getSubRegionType = (subRegion: SubRegion, currentToken: Token): { type: TO * @returns {HANDLE_TOKEN,TOKEN_TYPE} what should happen with the token and subregion (start new region, add) */ const checkIfTokenFitsSubRegion = (subRegion: SubRegion, token: Token): { action: HANDLE_TOKEN; type: TOKEN_TYPE } => { - const { type } = getSubRegionType(subRegion, token); - if (!subRegion) return { action: HANDLE_TOKEN.START_NEW_REGION, type }; + const {type} = getSubRegionType(subRegion, token); + if (!subRegion) return {action: HANDLE_TOKEN.START_NEW_REGION, type}; if (canAddTokenToEndOfSubRegion(subRegion, token)) { - return { action: HANDLE_TOKEN.ADD, type }; + return {action: HANDLE_TOKEN.ADD, type}; } - return { action: HANDLE_TOKEN.START_NEW_REGION, type }; + return {action: HANDLE_TOKEN.START_NEW_REGION, type}; }; /** @@ -84,7 +85,7 @@ const getSubRegions = (region: Region): SubRegion[] => { let i = tokensCount - 1; while (i >= 0) { const token = region.tokens[i]; - const { action, type } = checkIfTokenFitsSubRegion(currentSubRegion!, token); + const {action, type} = checkIfTokenFitsSubRegion(currentSubRegion!, token); token.type = token.type; switch (action) { case HANDLE_TOKEN.ADD: { @@ -119,7 +120,7 @@ const getSubRegions = (region: Region): SubRegion[] => { * @returns {boolean} */ const canAddTokenToEndOfRegion = (region: Region, currentToken: Token): boolean => { - const { tokens } = region; + const {tokens} = region; const prevToken = tokens[tokens.length - 1]; //If previous and current token are both UNITS and there is NO DECIMAL => false (five five is not valid, but five.five is a valid combination) if (prevToken.type === TOKEN_TYPE.UNIT && currentToken.type === TOKEN_TYPE.UNIT && !region.hasDecimal) return false; @@ -241,7 +242,7 @@ const matchRegions = (tokens: Token[]): Region[] => { } i++; } - return regions.map(region => ({ ...region, subRegions: getSubRegions(region) })); + return regions.map(region => ({...region, subRegions: getSubRegions(region)})); }; /** * Check what type the chunk is. This chunk will later be converted to a Token @@ -254,6 +255,7 @@ const getTokenType = (chunk: string): TOKEN_TYPE => { if (TEN_KEYS.includes(chunk.toLowerCase())) return TOKEN_TYPE.TEN; if (MAGNITUDE_KEYS.includes(chunk.toLowerCase())) return TOKEN_TYPE.MAGNITUDE; if (DECIMALS.includes(chunk.toLowerCase())) return TOKEN_TYPE.DECIMAL; + if(HUNDRED_KEYS.includes(chunk.toLowerCase())) return TOKEN_TYPE.HUNDRED; }; /** @@ -277,6 +279,10 @@ export const parser = (text: string, language: Languages): Region[] => { case Languages['en-us']: splitted = modifyEnglish(currentValue); break; + case Languages['pt-br']: + currentValue = currentValue.normalize('NFD').replace(/[\u0300-\u036f]/g, '') + splitted = modifyPortuguese(currentValue); + break; } if (splitted && splitted.length > 0 && Array.isArray(splitted)) { diff --git a/src/types.ts b/src/types.ts index 4f73f76..cc74a9f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -3,6 +3,7 @@ import { TOKEN_TYPE } from './constants'; export enum Languages { 'nl-nl' = 'nl-nl', 'en-us' = 'en-us', + 'pt-br' = 'pt-br', } /** * Token is a 'substring' of a sentence, it includes the position of the substring and checks if this substring is 'word' that needs converting to a number. diff --git a/test/portuguese.spec.ts b/test/portuguese.spec.ts new file mode 100644 index 0000000..235625b --- /dev/null +++ b/test/portuguese.spec.ts @@ -0,0 +1,116 @@ +import chai from 'chai'; +import wordsToNumbers from '../src/index'; +import { Languages } from '../src/types'; + +describe('Convert words to numbers', () => { + it('trinta e três', () => { + const words = 'trinta e três'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(33); + }); + it('quarenta e quatro', () => { + const words = 'quarenta e quatro'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(44); + }); + it('dois milhões seiscentos e vinte e dois mil trezentos e oitenta e oito', () => { + const words = + 'dois milhões seiscentos e vinte e dois mil trezentos e oitenta e oito'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(2622388); + }); + it('vinte e dois mil', () => { + const words = 'vinte e dois mil'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(22000); + }); + + it('cinco vírgula sessenta e sete', () => { + const words = 'cinco vírgula sessenta e sete'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(5.67); + }); + + it('nove milhões setecentos e sessenta e três mil quatrocentos e quarenta e quatro', () => { + const words = + 'nove milhões setecentos e sessenta e três mil quatrocentos e quarenta e quatro'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(9763444); + }); + it('cinco vírgula vinte e um', () => { + const words = 'cinco vírgula vinte e um'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(5.21); + }); + it('cinco vírgula quinhentos e vinte e um', () => { + const words = 'cinco vírgula quinhentos e vinte e um'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(5.521); + }); + + it('cinco milhões quatrocentos mil trezentos e vinte e um', () => { + const words = + 'cinco milhões quatrocentos mil trezentos e vinte e um'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(5400321); + }); + + it('cinco milhões quatrocentos mil trezentos e vinte e texto normal deveria criar outra região como essa quinhentos e dois', () => { + const words = + 'cinco milhões quatrocentos mil trezentos e vinte e texto normal deveria criar outra região como essa quinhentos e dois'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai + .expect(result) + .to.equal( + '5400320 e texto normal deveria criar outra região como essa 502' + ); + }); + + it('cem', () => { + const words = 'cem'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(100); + }); + + it('11111', () => { + const words = 'onze mil cento e onze'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(11111); + }); + + it('Os décimos terceiros salários', ()=> { + const words = 'Os décimos terceiros salários'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal('Os 13 salários'); + }); + + it('vinte e cinco', () => { + const words = 'vinte e cinco'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(25); + }); + + it('vigésima segunda imperatriz', () => { + const words = 'vigésima segunda imperatriz'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal('22 imperatriz'); + }); + + it('quinto dia de maio de mil novecentos e noventa e nove', () => { + const words = 'quinto dia de maio de mil novecentos e noventa e nove'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal('5 dia de maio de 1999'); + }); + + it('duzentos mil', () => { + const words = 'duzentos mil'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(200000); + }); + + it('dois trilhões quatrocentos e noventa e sete bilhões cem milhões quinhentos e vinte e dois mil trezentos e oitenta e oito', () => { + const words = 'dois trilhões quatrocentos e noventa e sete bilhões cem milhões quinhentos e vinte e dois mil trezentos e oitenta e oito'; + const result = wordsToNumbers(words, { language: Languages['pt-br'] }); + chai.expect(result).to.equal(2497100522388) + }) +}); From bb9a23e153a9339e7c52638aab6a18d487b11790 Mon Sep 17 00:00:00 2001 From: Afonso F Date: Mon, 13 Nov 2023 17:43:31 -0300 Subject: [PATCH 2/3] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d6e5b1a..c970438 100644 --- a/README.md +++ b/README.md @@ -15,5 +15,6 @@ This wasn't possible without the authors of [words-to-numbers](https://github.co # Supported languages - English - Dutch +- Portuguese Feel free to open issues to support more languages. From d17b1275f0a2d10498505dc7126b5a5f2202f1c5 Mon Sep 17 00:00:00 2001 From: afonsof Date: Mon, 13 Nov 2023 18:10:53 -0300 Subject: [PATCH 3/3] fix: remove disturbing console.log --- src/modifiers.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/modifiers.ts b/src/modifiers.ts index 1dd3526..87554b5 100644 --- a/src/modifiers.ts +++ b/src/modifiers.ts @@ -341,7 +341,6 @@ export const modifyEnglish = (chunk: string): string | string[] => { ) return [longestStart.value, longestEnd.value]; } else { - console.log(longestStart, longestEnd, possibleSplitter, chunk); throw 'CANNOT PARSE CHUNK INTO NUMBER (ENGLISH: CANNOT FIND A GOOD SPLITTER)'; } } @@ -423,7 +422,6 @@ export const modifyPortuguese = (chunk: string): string | string[] | undefined = ) return [longestStart.value, longestEnd.value]; } else { - console.log(longestStart, longestEnd, possibleSplitter, chunk); throw 'CANNOT PARSE CHUNK INTO NUMBER (ENGLISH: CANNOT FIND A GOOD SPLITTER)'; } }