Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions bin/demeuk.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@
check-replacement-character, check-empty-line
"""
from binascii import hexlify, unhexlify
from collections import deque
from glob import glob
from html import unescape
from inspect import cleandoc
Expand Down Expand Up @@ -171,7 +172,7 @@
from unidecode import unidecode


version = '4.5.0'
version = '4.5.1'

# Search from start to finish for the string $HEX[], with block of a-f0-9 with even number
# of hex chars. The first match group is repeated.
Expand Down Expand Up @@ -1016,8 +1017,16 @@ def clean_up(lines):
"""
results = []
log = []
processed_lines = set()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the unique qualities of set

work_queue = deque(lines)

while work_queue:
line = work_queue.popleft()

if line in processed_lines:
continue
processed_lines.add(line)

for line in lines:
# Check if the limit is set, if so minus 1 and if 0 is reached lets quit.
if type(config['limit']) is int:
if config['limit'] > 0:
Expand Down Expand Up @@ -1057,7 +1066,7 @@ def clean_up(lines):
if status:
# Lines contains hex, this function will return binary string, so add it back to
# our undecoded lines
lines.append(line_decoded)
work_queue.append(line_decoded)
if config['debug']:
log.append(f'Clean_hex; replaced $HEX[], added to queue and quiting; {line}{linesep}')
# Aborting future processing of this line.
Expand All @@ -1069,7 +1078,7 @@ def clean_up(lines):
if status:
# Line contains html string, because this can be binary data (linefeeds etc)
# convert back to binary string and add to queue again.
lines.append(line_decoded.encode())
work_queue.append(line_decoded.encode())
if config['debug']:
log.append(f'Clean_html; replaced html, added to queue and quiting; {line_decoded}{linesep}')
stop = True
Expand Down Expand Up @@ -1283,49 +1292,49 @@ def clean_up(lines):
for modified_line in modified_lines:
if config['debug']:
log.append(f'Add_split; new line because of split; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config.get('add-lower'):
modified_line = add_lower(line_decoded)
if modified_line:
if config['debug']:
log.append(f'Add_lower; new line; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config.get('add-first-upper'):
modified_line = add_first_upper(line_decoded)
if modified_line:
if config['debug']:
log.append(f'Add_first_upper; new line; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config.get('add-title-case'):
modified_line = add_title_case(line_decoded)
if modified_line:
if config['debug']:
log.append(f'Add_title_case; new line; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config.get('add-latin-ligatures'):
modified_line = add_latin_ligatures(line_decoded)
if modified_line:
if config['debug']:
log.append(f'Add_latin_ligatures; new line; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config.get('add-umlaut'):
status, modified_line = clean_add_umlaut(line_decoded)
if status:
if config['debug']:
log.append(f'Add_umlaut; new line; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config.get('add-without-punctuation'):
modified_line = add_without_punctuation(line_decoded, config.get('punctuation'))
if modified_line:
if config['debug']:
log.append(f'Add_without_punctuation; new line; {modified_line}{linesep}')
lines.append(modified_line.encode())
work_queue.append(modified_line.encode())

if config['debug']:
log.append(f'----End---- {line_decoded}{linesep}{linesep}')
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,7 @@
file.write(f'_amsterdam {linesep}')
file.write(f'ROTTERDAM_ {linesep}')
file.write(f'Cookie Monster {linesep}')

with open('testdata/input54', 'w') as file:
file.write(f'Golf Trip{linesep}')
file.write(f'Sequences{linesep}')
25 changes: 23 additions & 2 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from subprocess import PIPE, run
from unittest.mock import patch

from pytest import raises
from pytest import raises, mark

from bin.demeuk import main

Expand Down Expand Up @@ -188,7 +188,7 @@ def test_language_processing():
with patch.object(sys, 'argv', testargs):
main()
line_num_output = calculate_line_numbers('testdata/output11')
assert line_num_output == 29
assert line_num_output == 21
with open('testdata/output11') as f:
filecontent = f.read()
assert 'cijfer\n' in filecontent
Expand Down Expand Up @@ -982,3 +982,24 @@ def test_check_contains():
assert '_amsterdam' not in filecontent
assert 'ROTTERDAM_' not in filecontent
assert 'Cookie Monster' in filecontent


@mark.timeout(1)
def test_infinite_loop():
testargs = [
'demeuk', '-i', 'testdata/input54', '-o', 'testdata/output54', '-l', 'testdata/log54',
'--add-lower', '--add-title-case',
]

with patch.object(sys, 'argv', testargs):
main()

with open('testdata/output54') as f:
filecontent = f.read()

line_num_output = calculate_line_numbers('testdata/output54')
assert line_num_output == 4
assert 'Golf Trip' in filecontent
assert 'Sequences' in filecontent
assert 'golf trip' in filecontent
assert 'sequences' in filecontent
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ deps =
-rrequirements.txt
pytest
flake8
pytest-timeout
commands =
pytest
flake8
Expand Down