Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
5c4358a
source files added
joelgrondman Apr 19, 2018
5df1839
path global parameters added
joelgrondman Apr 19, 2018
239b396
class for creating distance measure
joelgrondman Apr 19, 2018
f124d5d
collection of functions for retrieving files
joelgrondman Apr 19, 2018
34baa48
for creating candidate/blacklist/whitelist.txt
joelgrondman Apr 19, 2018
41348b1
reactive input listener with no need for ENTER
joelgrondman Apr 19, 2018
e75b93d
some tests for cognate_info
joelgrondman Apr 19, 2018
496e143
empty textfile created when not found
joelgrondman Apr 19, 2018
24c1622
exception rethrown
joelgrondman Apr 19, 2018
98fbfd7
standalone test ready to run, set MAX_WORDS to 100 for fast execution
joelgrondman Apr 19, 2018
e0e22f4
renamed abstract class
joelgrondman Apr 26, 2018
cfbdf69
append to file method added
joelgrondman Apr 26, 2018
f4900a5
moved from list to set, database support added, moved several functio…
joelgrondman Apr 26, 2018
a2cb588
added several edit_distance tests, database tests'
joelgrondman Apr 26, 2018
f17dc24
database support of cognates
joelgrondman Apr 26, 2018
8dc1673
abstract edit_distance class for creating edit_distance functions
joelgrondman Apr 26, 2018
f9d8e7d
example of interactive evaluation of cognates through terminal
joelgrondman Apr 26, 2018
fa6357a
short explanation of cognate folder structure and class methods
joelgrondman Apr 26, 2018
427cac3
added more room for language codes
joelgrondman Apr 27, 2018
6dde7e0
removed whitelines
joelgrondman Apr 27, 2018
e79460c
some cleanup
joelgrondman Apr 27, 2018
c1291b0
seperated language codes
joelgrondman Apr 28, 2018
deeeb05
support for rules added directly to edit distance function factory
joelgrondman Apr 29, 2018
d486938
...
mircealungu Apr 30, 2018
6b9e093
changed in memory storage to dict, automatic loading from db or file,…
joelgrondman May 3, 2018
0e879d7
some renames for consistency
joelgrondman May 3, 2018
7b9b778
additional functions for retrieving cognates
joelgrondman May 3, 2018
543dab8
fix: loading from db caused candidates to only have one cognate for e…
joelgrondman May 3, 2018
e790041
additional test added
joelgrondman May 3, 2018
6d0c59e
uncommented code
joelgrondman May 3, 2018
277bb84
can we start committing blacklists / whitelists? it seems wasteful to…
mircealungu May 8, 2018
f50ae6a
actually, rules are also probably worth always committing ?
mircealungu May 8, 2018
0f46d9d
actually - the best way would be to create a blacklist / whitelist fi…
mircealungu May 8, 2018
241949a
translation module added to setup
joelgrondman May 9, 2018
1b52ac3
author variable added to cognateInfo
joelgrondman May 9, 2018
347603c
rules are now dict, store in db option added, edit distance correction
joelgrondman May 9, 2018
ff49419
tests for db loading/saving added
joelgrondman May 9, 2018
2fb2ad1
Merge branch 'towards_refactoring_the_api' of https://github.com/zeeg…
joelgrondman May 9, 2018
e15e626
author added to rules filename
joelgrondman May 9, 2018
912b854
erroneous rule loading resolved
joelgrondman May 9, 2018
382530f
setup dependencies added
joelgrondman May 17, 2018
e7bc442
automatic evaluation through translator added
joelgrondman May 17, 2018
7c3a545
repetitive code replaced
joelgrondman May 17, 2018
c7bb3ea
cognates in txt and db format for german-english
joelgrondman May 17, 2018
6d3d8d5
added different separators
joelgrondman May 21, 2018
29c54e9
french-english cognates
joelgrondman May 21, 2018
38b68af
moved file_handling to it's own module
mircealungu May 23, 2018
d441adc
created portability module
mircealungu May 23, 2018
86a03b7
moving file handling to it's own module: part two
mircealungu May 23, 2018
7a54b1b
finalized: moving the scripts to their own module
mircealungu May 23, 2018
e24bf24
updated path
mircealungu May 23, 2018
9f0da16
an init in the scripts. just in case.
mircealungu May 23, 2018
3040a3b
moved also loading_from_hermit to file_handling
mircealungu May 23, 2018
bc2a993
some renames, reorganizing
joelgrondman May 23, 2018
7ce271b
Merge pull request #10 from zeeguu-ecosystem/refactoring_the_refactoring
joelgrondman May 23, 2018
379c233
added stemming option
joelgrondman Jun 14, 2018
a9af77b
several parameter tweaks
joelgrondman Jun 14, 2018
e2c0ab2
stemmed cognates for experiments
joelgrondman Jun 14, 2018
632b1b0
converted to python3 style
joelgrondman Jul 1, 2018
e5aed65
refactored cognate_info into translate and cognate_evaluation
joelgrondman Jul 1, 2018
31a938f
changed name of database
joelgrondman Jul 1, 2018
315dfc2
small optimization
joelgrondman Jul 1, 2018
bedd7e8
removed associated file/refactored
joelgrondman Jul 1, 2018
56e38d7
translations/cognates
joelgrondman Jul 1, 2018
123275e
cleaning up
joelgrondman Aug 31, 2018
a6f1bd1
cognate pairs + documentation
joelgrondman Sep 6, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from wordstats import Word


print Word.stats("maman", "fr")
print Word.stats("Mutter", "de")
print Word.stats("Mother", "en")
print Word.stats("mama", "ro")
print(str(Word.stats("maman", "fr")))
print(str(Word.stats("Mutter", "de")))
print(str(Word.stats("Mother", "en")))
print(str(Word.stats("mama", "ro")))

print Word.stats("maman", "fr")
print Word.stats("Mutter", "de")
print Word.stats("Mother", "en")
print Word.stats("mamamama", "ro").importance
print(str(Word.stats("maman", "fr")))
print(str(Word.stats("Mutter", "de")))
print(str(Word.stats("Mother", "en")))
print(str(Word.stats("mamamama", "ro").importance))
File renamed without changes.
120 changes: 120 additions & 0 deletions portability/getchunix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import sys

class _Getch:
"""Gets a single character from standard input."""
def __init__(self):
try:
self.impl = _GetchWindows()
except ImportError:
self.impl = _GetchUnix()

def __call__(self):
return self.impl()


class _GetchUnix:
def __init__(self):
import tty

def __call__(self):
import tty, termios
fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(sys.stdin.fileno())
ch = sys.stdin.read(1)
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
return ch


class _GetchWindows:
def __init__(self):
import msvcrt

def __call__(self):
import msvcrt
return msvcrt.getch()


parsenum = (lambda num:
(sys.maxsize if 0 > num else num))


def read_single_keypress():
"""interface for _Getch that interprets backspace and DEL properly"""
getch = _Getch()
x = getch.__call__()
ox = ord(x)
if ox == 27 or ox == 127:
sys.stdout.write(chr(8))
sys.stdout.write(chr(32)) # hacky? indeed. does it *work*? hell yeah!
sys.stdout.write(chr(8))

elif ox == 3: raise KeyboardInterrupt
elif ox == 4: raise EOFError
return x


def nbsp(x, y):
"""append x to y as long as x is not DEL or backspace"""
if ord(x) == 27 or ord(x) == 127:
try:
y.pop()
except IndexError:
pass
return y
y.append(x)
return y


def thismany(count=-1) -> str:
"""get exactly count chars of stdin"""
y = []
count = parsenum(count)
while len(y) <= count:
i = read_single_keypress()
_ = sys.stdout.write(i)
sys.stdout.flush()
y = nbsp(i, y)
return "".join(y)


def until(chars, count=-1) -> str:
"""get chars of stdin until any of chars is read,
or until count chars have been read, whichever comes first"""
y = []
chars = list(chars)
count = parsenum(count)
while len(y) <= count:
i = read_single_keypress()
_ = sys.stdout.write(i)
sys.stdout.flush()
if i in chars:
break
y = nbsp(i, y)
return "".join(y)


def until_not(chars, count=-1) -> str:
"""read stdin until any of chars stop being read,
or until count chars have been read; whichever comes first"""
y = []
chars = list(chars)
count = parsenum(count)
while len(y) <= count:
i = read_single_keypress()
_ = sys.stdout.write(i)
sys.stdout.flush()
if i not in chars:
break
y = nbsp(i, y)
return "".join(y)


def pretty_press() -> str:
"""literally just read any fancy char from stdin let caller do whatever"""
i = read_single_keypress()
_ = sys.stdout.write(i)
sys.stdout.flush()
return nbsp(i, y)
Empty file modified run_tests.sh
100755 → 100644
Empty file.
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def package_files(directory):
description="Python Class for Word Statistics ",
keywords="second language acquisition api",
package_data={'': extra_files},
dependency_links=[
"https://github.com/zeeguu-ecosystem/Python-Translators/tarball/master#egg=python_translators"],
install_requires=("configobj",
"sqlalchemy")
"sqlalchemy",
'python_translators')
)
4 changes: 4 additions & 0 deletions wordstats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
# them when it does the reflection thing to create the DB tables
from .word_info import WordInfo
from .language_info import LanguageInfo
from .edit_distance_function_factory import WordDistance
from .translate_db import TranslationDatabase
from .cognate_db import CognateDatabase


# Create all tables in the engine. equivalent to "Create Table" in SQL
Base.metadata.create_all(BaseService.engine)
Expand Down
67 changes: 67 additions & 0 deletions wordstats/cognate_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from sys import stdout

import sqlalchemy.orm
import sqlalchemy
from sqlalchemy import Column, Integer, String, UniqueConstraint, Boolean

from .base_service import Base, SimplifiedQuery

# structure for reviewed cognates
class CognateDatabase(SimplifiedQuery, Base):
__tablename__ = 'cognate_database'
__table_args__ = {'mysql_collate': 'utf8_bin'}

id = Column(Integer, primary_key=True)

word_primary = Column(String(255), nullable =False, index = True)
word_secondary = Column(String(255), nullable=False, index=True)
primary = Column(String(20), nullable =False, index = True)
secondary = Column(String(20), nullable=False, index=True)
author = Column(String(255), nullable=False, index=True)
whitelist = Column(Boolean)

UniqueConstraint(word_primary, word_secondary, primary, secondary, author)

def __init__(self, word_primary, word_secondary, primary, secondary, whitelist, author: str = ""):
self.word_primary = word_primary
self.word_secondary = word_secondary
self.primary = primary
self.secondary = secondary
self.whitelist = whitelist
self.author = author

def __str__(self):
result = "info: {2} ({0} {1}, whitelist: {3}, author: {4})".format(
self.word_primary,
self.word_secondary,
self.primary + self.secondary,
self.whitelist,
self.author)

result = result.encode(stdout.encoding)
return result

@classmethod
def find(cls, word, primary, secondary, author: str = ""):
word = word.lower()
try:
return (cls.query().filter(cls.word_primary == word).\
filter(cls.primary == primary).\
filter(cls.secondary == secondary). \
filter(cls.author == author). \
one())
except sqlalchemy.orm.exc.NoResultFound:
return None

@classmethod
def find_all(cls, primary, secondary, author: str = ""):
return cls.query().filter(cls.primary == primary).\
filter(cls.secondary == secondary). \
filter(cls.author == author). \
all()

@classmethod
def clear_entries(cls, primary, secondary, author: str = ""):
cls.query().filter(cls.primary == primary). \
filter(cls.secondary == secondary).filter(cls.author == author).\
delete(synchronize_session=False)
Loading