Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 3.0.5 (2025-06-18)

### Changed

- Updated dependencies

## 3.0.4 (2025-05-06)

### Changed
Expand Down
5 changes: 5 additions & 0 deletions deduce/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
from deduce.deduce import Deduce, __version__

__all__ = [
"Deduce",
"__version__",
]
17 changes: 0 additions & 17 deletions deduce/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def __init__(
self._matching_pipeline = None

if len(self.pattern) > 0 and "lookup" in self.pattern[0]:

if self.ds is None:
raise RuntimeError(
"Created pattern with lookup in TokenPatternAnnotator, but "
Expand Down Expand Up @@ -218,7 +217,6 @@ def annotate(self, doc: dd.Document) -> list[dd.Annotation]:
)

for token in tokens:

annotation = self._match_sequence(
doc.text, self.pattern, token, direction="right", skip=self.skip
)
Expand Down Expand Up @@ -252,12 +250,10 @@ def __init__(
def _apply_context_pattern(
self, text: str, annotations: dd.AnnotationSet, context_pattern: dict
) -> dd.AnnotationSet:

direction = context_pattern["direction"]
skip = set(context_pattern.get("skip", []))

for annotation in annotations.copy():

tag = list(_DIRECTION_MAP[direction]["order"](annotation.tag.split("+")))[
-1
]
Expand Down Expand Up @@ -319,7 +315,6 @@ def _annotate(self, text: str, annotations: dd.AnnotationSet) -> dd.AnnotationSe
)

if self.iterative:

changed = dd.AnnotationSet(annotations.difference(original_annotations))
annotations = dd.AnnotationSet(
annotations.intersection(original_annotations)
Expand Down Expand Up @@ -356,7 +351,6 @@ class implements logic for detecting first name(s), initials and surnames.
"""

def __init__(self, tokenizer: Tokenizer, *args, **kwargs) -> None:

self.tokenizer = tokenizer
self.skip = [".", "-", " "]

Expand All @@ -366,9 +360,7 @@ def __init__(self, tokenizer: Tokenizer, *args, **kwargs) -> None:
def _match_first_names(
doc: dd.Document, token: dd.Token
) -> Optional[tuple[dd.Token, dd.Token]]:

for first_name in doc.metadata["patient"].first_names:

if str_match(token.text, first_name) or (
len(token.text) > 3
and str_match(token.text, first_name, max_edit_distance=1)
Expand All @@ -381,7 +373,6 @@ def _match_first_names(
def _match_initial_from_name(
doc: dd.Document, token: dd.Token
) -> Optional[tuple[dd.Token, dd.Token]]:

for _, first_name in enumerate(doc.metadata["patient"].first_names):
if str_match(token.text, first_name[0]):
next_token = token.next()
Expand All @@ -397,7 +388,6 @@ def _match_initial_from_name(
def _match_initials(
doc: dd.Document, token: dd.Token
) -> Optional[tuple[dd.Token, dd.Token]]:

if str_match(token.text, doc.metadata["patient"].initials):
return token, token

Expand All @@ -417,7 +407,6 @@ def next_with_skip(self, token: dd.Token) -> Optional[dd.Token]:
def _match_surname(
self, doc: dd.Document, token: dd.Token
) -> Optional[tuple[dd.Token, dd.Token]]:

if doc.metadata["surname_pattern"] is None:
doc.metadata["surname_pattern"] = self.tokenizer.tokenize(
doc.metadata["patient"].surname
Expand Down Expand Up @@ -473,9 +462,7 @@ def annotate(self, doc: Document) -> list[Annotation]:
annotations = []

for token in doc.get_tokens():

for matcher, tag in matchers:

match = matcher(doc, token)

if match is None:
Expand Down Expand Up @@ -518,7 +505,6 @@ def __init__(
lowercase: bool = True,
**kwargs,
) -> None:

self.pre_pseudo = set(pre_pseudo or [])
self.post_pseudo = set(post_pseudo or [])
self.lowercase = lowercase
Expand Down Expand Up @@ -553,7 +539,6 @@ def _get_previous_word(self, char_index: int, text: str) -> str:
result = ""

for ch in text[::-1]:

if not self._is_word_char(ch):
break

Expand All @@ -576,7 +561,6 @@ def _get_next_word(self, char_index: int, text: str) -> str:
result = ""

for ch in text:

if not self._is_word_char(ch):
break

Expand Down Expand Up @@ -648,7 +632,6 @@ def annotate(self, doc: Document) -> list[Annotation]:
annotations = []

for match in self.bsn_regexp.finditer(doc.text):

text = match.group(self.capture_group)
digits = re.sub(r"\D", "", text)

Expand Down
10 changes: 10 additions & 0 deletions deduce/str/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,13 @@
UpperCase,
UpperCaseFirstChar,
)

__all__ = [
"Acronimify",
"FilterBasedOnLookupSet",
"RemoveValues",
"TakeLastToken",
"TitleCase",
"UpperCase",
"UpperCaseFirstChar",
]
1 change: 0 additions & 1 deletion deduce/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def _merge(
i = 0

while i < len(tokens):

if tokens_text[i] not in self._start_words:
tokens_merged.append(tokens[i])
i += 1
Expand Down
3 changes: 0 additions & 3 deletions deduce/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,7 @@ def initialize_class(cls: type, args: dict, extras: dict) -> object:
cls_params = inspect.signature(cls).parameters

for arg_name, arg in extras.items():

if arg_name in cls_params:

args[arg_name] = arg

return cls(**args)
Expand Down Expand Up @@ -209,7 +207,6 @@ def apply_transform(items: set[str], transform_config: dict) -> set[str]:
transforms = transform_config.get("transforms", {})

for _, transform in transforms.items():

to_add = []

for item in items:
Expand Down
2 changes: 0 additions & 2 deletions docs/emojize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ def emojize_all(s: str) -> str:


if __name__ == "__main__":

dir = argv[1]

for file in glob.glob(dir + "/*.html"):

with open(file, "r") as f:
html = f.readlines()

Expand Down
Loading