Skip to content

Commit 3af8e71

Browse files
committed
Feature: Add "generous" as a token indicating an approximate amount.
1 parent 4aedb7a commit 3af8e71

3 files changed

Lines changed: 23 additions & 1 deletion

File tree

ingredient_parser/en/_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,7 @@
419419
"nearly",
420420
"roughly",
421421
"~",
422+
"generous",
422423
]
423424
# Tokens that indicate an amount is singular
424425
SINGULAR_TOKENS = ["each"]

ingredient_parser/en/postprocess.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1551,7 +1551,7 @@ def _fallback_pattern(
15511551
def _is_approximate(
15521552
self, i: int, tokens: list[str], labels: list[str], idx: list[int]
15531553
) -> bool:
1554-
"""Return True is token at current index is approximate.
1554+
"""Return True if token at current index is approximate.
15551555
15561556
This is determined by the token label being QTY and the previous token being in
15571557
a list of approximate tokens.
@@ -1611,6 +1611,11 @@ def _is_approximate(
16111611
self.consumed.append(idx[i - 1])
16121612
self.consumed.append(idx[i - 2])
16131613
return True
1614+
elif labels[i] == "UNIT" and tokens[i - 1].lower() in APPROXIMATE_TOKENS:
1615+
# For cases like "2 generous cups"
1616+
# Mark i - 1 element as consumed
1617+
self.consumed.append(idx[i - 1])
1618+
return True
16141619

16151620
return False
16161621

tests/postprocess/test_is_approximate.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,22 @@ def test_is_approximate_nearly(self):
8282
assert p._is_approximate(1, tokens, labels, idx)
8383
assert p.consumed == [0]
8484

85+
def test_is_approximate_generous(self):
86+
"""
87+
Test that QTY at index is indicated as approximate
88+
"""
89+
sentence = "6 generous cups orange juice"
90+
tokens = ["6", "generous", "cups", "orange", "juice"]
91+
pos_tags = ["CD", "JJ", "NNS", "NN", "NN"]
92+
labels = ["QTY", "UNIT", "UNIT", "B_NAME_TOK", "I_NAME_TOK"]
93+
idx = [0, 1, 2, 3, 4]
94+
95+
p = PostProcessor(
96+
sentence, tokens, pos_tags, labels, [0] * len(tokens), custom_units={}
97+
)
98+
assert p._is_approximate(2, tokens, labels, idx)
99+
assert p.consumed == [1]
100+
85101
def test_not_approximate(self):
86102
"""
87103
Test that QTY at index is not indicated as approximate

0 commit comments

Comments
 (0)