From 94d6deb440a95c6b717ab8bfdb5e865b405a2291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 12 Apr 2025 14:45:44 +0200 Subject: [PATCH 1/4] Add punctuation in comments --- grader/applications_.py | 2 +- grader/person.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/grader/applications_.py b/grader/applications_.py index 590b7d2..919c9b7 100644 --- a/grader/applications_.py +++ b/grader/applications_.py @@ -492,7 +492,7 @@ def __len__(self): return len(self.people) def filter(self, **kwargs): - """Return a sequence of the applications which match certain criteria: + """Return a sequence of applications which match certain criteria: The returned object is a vector, i.e it can be used to extract list of Person attributes, like: diff --git a/grader/person.py b/grader/person.py index 2050825..7990330 100644 --- a/grader/person.py +++ b/grader/person.py @@ -116,12 +116,12 @@ class Person: # editions underrep: str = '' # underrepresentaiton travel_grant: str = '' # if poor then too bad! - # internal attribute signaling relaxed checking - # needed to relax value checks for old application files [should not be - # necessary for new application files + # Internal attribute signaling relaxed checking. + # Needed to relax value checks for old application files (should not be + # necessary for new application files). _relaxed: bool = dataclasses.field(default=False, repr=False) - # internal attribute keeping a reference to the application.ini file + # Internal attribute keeping a reference to the application.ini file _ini: applications.ApplicationsIni = \ dataclasses.field(default=None, repr=False) @@ -154,7 +154,7 @@ def get_rating(self, name): if not val and not ratings: return math.nan - # the values of these attributes need to converted to their numerical + # The values of these attributes need to converted to their numerical # value as found in the INI file. For example from # Person.open_source -> "Minor Contributions (bug reports, mailing lists, ...)" # we extract "minor contributions" and look for it in the INI file ratings: @@ -163,7 +163,7 @@ def get_rating(self, name): # minor contributions = 0.5 # ... # The rule is to match anything until the first "/" or "(" or "," - # and removing trialing whitespace if any + # and removing trailing whitespace if any. key = re.match(r'(.+?)\s*(?:[(/,]|$)', val).group(1).lower() if key not in ratings: From 315134207d40506bb84cab8be62bc7316b0f43cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 12 Apr 2025 14:54:51 +0200 Subject: [PATCH 2/4] tests: fix tests to actually test --- grader/test_applications_.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/grader/test_applications_.py b/grader/test_applications_.py index edc8386..1deb82e 100644 --- a/grader/test_applications_.py +++ b/grader/test_applications_.py @@ -189,46 +189,46 @@ def test_applications_object(tmp_path): vegans = app.filter(label = ['VEGAN']) assert len(vegans) == 1 - vegans.name == ['Jędrzej Marcin'] + assert vegans.name == ['Jędrzej Marcin'] vegans = app.filter(label = 'VEGAN') assert len(vegans) == 1 - vegans.name == ['Jędrzej Marcin'] + assert vegans.name == ['Jędrzej Marcin'] vegans = app.filter(label = ['VEGAN', 'UTF-8']) assert len(vegans) == 1 - vegans.name == ['Jędrzej Marcin'] + assert vegans.name == ['Jędrzej Marcin'] byname = app.filter(name = 'Person') assert len(byname) == 2 - byname.name == ['Person', 'Person'] - byname.lastname = ['One', 'Two'] + assert byname.name == ['Person', 'Person'] + assert byname.lastname == ['One', 'Two'] byname_and_l = app.filter(name = 'Person', label=['-','PALEO']) assert len(byname_and_l) == 1 - byname_and_l.name == ['Person'] - byname_and_l.lastname = ['Two'] + assert byname_and_l.name == ['Person'] + assert byname_and_l.lastname == ['Two'] byname = app.filter(name = 'Person', affiliation='Paleolithic 1') assert len(byname) == 1 - byname.fullname == ['Person One'] + assert byname.fullname == ['Person One'] byname = app.filter(name = 'Person', affiliation=r'Paleolithic') assert len(byname) == 1 - byname.fullname == ['Person One'] + assert byname.fullname == ['Person One'] byname = app.filter(name = 'Person', affiliation=r'paleo[a-z]ithic') assert len(byname) == 1 - byname.fullname == ['Person One'] + assert byname.fullname == ['Person One'] byname = app.filter(name = 'Person', affiliation=r'^aleolithic') assert len(byname) == 0 - byname.fullname == [] + assert byname.fullname == [] # also check with utf-8 in the pattern and label/non-label matching byname = app.filter(label = ['VEGAN', 'UTF-8'], name = 'Jędrzej') assert len(byname) == 1 - byname.name == ['Jędrzej Marcin'] + assert byname.name == ['Jędrzej Marcin'] with pytest.raises(AttributeError): app.filter(unknown_attr = '11') @@ -236,7 +236,10 @@ def test_applications_object(tmp_path): # non-string match byyear = app.filter(born=1980) assert len(byyear) == 1 - byyear.name == ['Jędrzej Marcin'] + assert byyear.name == ['Jędrzej Marcin'] + + # TODO: this should fail: + # byname.lastname = ['One', 'Two'] def test_applications_getitem(tmp_path): csv = get_applications_csv(tmp_path) From ee7b616a63c710647c70738c46398865787cc6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 12 Apr 2025 15:49:19 +0200 Subject: [PATCH 3/4] tests: port the remaining tests for old applications code --- grader/applications_.py | 22 ++-- grader/test_applications.py | 222 ----------------------------------- grader/test_applications_.py | 78 ++++++++++-- 3 files changed, 80 insertions(+), 242 deletions(-) delete mode 100644 grader/test_applications.py diff --git a/grader/applications_.py b/grader/applications_.py index 919c9b7..6520023 100644 --- a/grader/applications_.py +++ b/grader/applications_.py @@ -353,19 +353,20 @@ def get_ratings(self, field): return section return None - def save(self, file=None): + def save(self, filename=None): + filename = filename or self.filename + with open(filename, 'wt') as file: + self.save_to_file(file) + + def save_to_file(self, file): # save our data to the INI file cp = configparser.ConfigParser(comment_prefixes='#', inline_comment_prefixes='#') cp.read_dict(self.data) + cp.write(file) - file = file or self.filename - if not hasattr(file, 'write'): - file = open(file, 'w') - - with file as fh: - cp.write(fh) + name = getattr(file, 'name', '(tmp)') + printff(f'Saved changes to {name}') - printff(f'Saved changes to {fh.name!r}') def __setitem__(self, key, value): # allow to set items in the section of the INI using a dotted form, for ex: @@ -477,6 +478,11 @@ def all_nationalities(self): def all_affiliations(self): return set(p.affiliation for p in self.people) + def all_labels(self): + return set(l + for p in self.people + for l in p.labels) + def __getitem__(self, key): """Get people by numerical index or by fullname""" # we want to be able to do applications[0] and application["mario rossi"] diff --git a/grader/test_applications.py b/grader/test_applications.py deleted file mode 100644 index 561fe7a..0000000 --- a/grader/test_applications.py +++ /dev/null @@ -1,222 +0,0 @@ -from io import StringIO -from textwrap import dedent - -from pytest import raises - -from .configfile import ConfigFile -from .applications import Applications, build_person_factory -from .util import list_of_str - - -def _tmp_application_files(tmpdir, config_string, csv_string): - config_tmpfile = tmpdir.join('test_grader.conf') - config_tmpfile.write(config_string) - csv_tmpfile = tmpdir.join('test_applications.csv') - csv_tmpfile.write(csv_string) - return config_tmpfile, csv_tmpfile - - -def test_applications_from_paths(tmpdir): - config_string = dedent(""" - [labels] - john doe = VEGAN - """) - csv_string = dedent(""" - "First name","Last name","Email address" - "John","Doe","john.dow@nowhere.com" - "Mary Jane","Smith","mary82@something.org" - """).strip() - config_tmpfile, csv_tmpfile = _tmp_application_files( - tmpdir, config_string, csv_string) - - fields_to_col_names_section = { - 'name': ['First name'], - 'lastname': ['Last name'], - 'email': ['Email address'], - } - - applications = Applications.from_paths( - config_tmpfile.strpath, - csv_tmpfile.strpath, - fields_to_col_names_section - ) - - assert len(applications.applicants) == 2 - assert applications.applicants[0].name == 'John' - assert applications.applicants[1].lastname == 'Smith' - assert applications.applicants[0].labels == ['VEGAN'] - - -def test_applications_init(): - config_string = dedent(""" - [labels] - john doe = VEGAN, VIP - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - applicants = [person_factory('John', 'Doe')] - - applications = Applications(applicants, config) - - assert len(applications.applicants) == 1 - assert applications.applicants[0].labels == ['VEGAN', 'VIP'] - - -def test_applications_find_applicant_by_fullname(): - config_string = dedent(""" - [labels] - john doe = VEGAN - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - applicants = [person_factory('John', 'Doe')] - - applications = Applications(applicants, config) - john_doe = applications.find_applicant_by_fullname('john doe') - assert applications.applicants[0] is john_doe - - with raises(ValueError): - applications.find_applicant_by_fullname('johnny mnemonic') - - -def test_applications_add_labels(): - config_string = dedent(""" - [labels] - john doe = VEGAN - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - john_doe = person_factory('John', 'Doe') - ben_johnson = person_factory('Ben', 'Johnson') - applicants = [john_doe, ben_johnson] - - applications = Applications(applicants, config) - applications.add_labels('john doe', ['VIP', 'VIRULENT']) - applications.add_labels('ben johnson', ['VIPER']) - - assert john_doe.labels == ['VEGAN', 'VIP', 'VIRULENT'] - assert config.sections['labels']['john doe'] \ - == ['VEGAN', 'VIP', 'VIRULENT'] - - assert ben_johnson.labels == ['VIPER'] - assert config.sections['labels']['ben johnson'] == ['VIPER'] - - -def test_applications_clear_labels(): - config_string = dedent(""" - [labels] - john doe = VEGAN, VIP - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - john_doe = person_factory('John', 'Doe') - applicants = [john_doe] - - applications = Applications(applicants, config) - - assert john_doe.labels == ['VEGAN', 'VIP'] - assert 'john doe' in config.sections['labels'].keys() - applications.clear_labels('john doe') - assert john_doe.labels == [] - assert 'john doe' not in config.sections['labels'].keys() - - -def test_applications_get_labels(): - config_string = dedent(""" - [labels] - john doe = VEGAN, VIP - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - john_doe = person_factory('John', 'Doe') - ben_johnson = person_factory('Ben', 'Johnson') - applicants = [john_doe, ben_johnson] - - applications = Applications(applicants, config) - assert applications.get_labels('john doe') == ['VEGAN', 'VIP'] - assert applications.get_labels('ben johnson') == [] - - -def test_applications_get_all_labels(): - config_string = dedent(""" - [labels] - john doe = VEGAN, VIP - ben johnson = VIPER - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - john_doe = person_factory('John', 'Doe') - ben_johnson = person_factory('Ben', 'Johnson') - applicants = [john_doe, ben_johnson] - - applications = Applications(applicants, config) - assert applications.get_all_labels() == {'VEGAN', 'VIP', 'VIPER'} - - -def test_applications_filter_attributes(): - config_string = dedent(""" - [labels] - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname', 'nationality', 'gender']) - mario_rossi = person_factory('Mario', 'Rossi', 'Italy', 'Male') - lucia_bianchi = person_factory('Lucia', 'Bianchi', 'Italy', 'Female') - fritz_lang = person_factory('Fritz', 'Lang', 'Germany', 'Male') - applicants = [mario_rossi, fritz_lang, lucia_bianchi] - - applications = Applications(applicants, config) - assert applications.filter(nationality='Italy') == [mario_rossi, lucia_bianchi] - assert applications.filter(nationality='Italy', nonmale=True) == [lucia_bianchi] - assert applications.filter(nationality='Germany') == [fritz_lang] - assert applications.filter(nationality='NoCountryForOldMen') == [] - with raises(AttributeError): - applications.filter(dummy='Error') - - -def test_applications_filter_labels(): - config_string = dedent(""" - [labels] - mario rossi = ALFA, DELTA, MIKE - fritz lang = ZULU, DELTA, MIKE, ECHO - """) - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - mario_rossi = person_factory('Mario', 'Rossi') - fritz_lang = person_factory('Fritz', 'Lang') - applicants = [mario_rossi, fritz_lang] - - applications = Applications(applicants, config) - assert applications.filter(label='ALFA') == [mario_rossi] - assert applications.filter(label='ZULU') == [fritz_lang] - assert applications.filter(label=('ALFA', 'MIKE')) == [mario_rossi] - assert applications.filter(label=('DELTA','MIKE')) == [mario_rossi, fritz_lang] - assert applications.filter(label=('DELTA', 'MIKE', '-', 'ECHO')) == [mario_rossi] - assert applications.filter(label=('DELTA', 'MIKE', '-', 'ECHO', 'ALFA')) == [] - assert applications.filter(label='NOLABEL') == [] - - -def test_applications_iteration(): - config_string = "" - config = ConfigFile(StringIO(config_string), labels=list_of_str) - - person_factory = build_person_factory(['name', 'lastname']) - mario_rossi = person_factory('Mario', 'Rossi') - fritz_lang = person_factory('Fritz', 'Lang') - applicants = [mario_rossi, fritz_lang] - - applications = Applications(applicants, config) - result = [] - for app in applications: - result.append(app) - assert result == applications.applicants - # test that we can call len - assert len(applications) == len(applications.applicants) - assert result == list(applications) diff --git a/grader/test_applications_.py b/grader/test_applications_.py index 1deb82e..7a74eee 100644 --- a/grader/test_applications_.py +++ b/grader/test_applications_.py @@ -1,3 +1,4 @@ +import io import pathlib import os import time @@ -88,6 +89,12 @@ def get_applications_csv(tmp_path): return input +@pytest.fixture +def app(tmp_path): + csv = get_applications_csv(tmp_path) + ini = get_ini(tmp_path).filename + return Applications(csv, ini) + def test_applications_ini_read(tmp_path): ini = get_ini(tmp_path) @@ -177,15 +184,9 @@ def test_applications_ini_save(tmp_path): ini2 = ApplicationsIni(out) assert ini2['cooking_rating.some_long_key'] == 7.0 -def test_applications_object(tmp_path): - csv = get_applications_csv(tmp_path) - ini = get_ini(tmp_path).filename - - app = Applications(csv, ini) - +def test_applications_object(app): assert len(app) == 3 assert len(app.people) == 3 - assert app.ini.filename == ini vegans = app.filter(label = ['VEGAN']) assert len(vegans) == 1 @@ -241,12 +242,11 @@ def test_applications_object(tmp_path): # TODO: this should fail: # byname.lastname = ['One', 'Two'] -def test_applications_getitem(tmp_path): - csv = get_applications_csv(tmp_path) - ini = get_ini(tmp_path).filename - - app = Applications(csv, ini) + fullname = app.filter(fullname = 'Person One') + assert len(fullname) == 1 + assert fullname.fullname == ['Person One'] +def test_applications_getitem(app): assert len(app) == 3 assert app['Person One'].fullname == 'Person One' assert app['person one'].fullname == 'Person One' @@ -255,3 +255,57 @@ def test_applications_getitem(tmp_path): app[3.0] with pytest.raises(IndexError): app['Unkown Person'] + +def test_applications_labels(app): + assert app['Person One'].add_label('VIP') is True + assert app['Person One'].labels == ['PALEO', 'VIP'] + + assert app['Person One'].add_label('VIP') is False + assert app['Person One'].labels == ['PALEO', 'VIP'] + + assert app['Person One'].add_label('VEGAN') is True + assert app['Person One'].labels == ['PALEO', 'VEGAN', 'VIP'] + + assert app['Person One'].add_label('VIRULENT') is True + assert app['Person One'].labels == ['PALEO', 'VEGAN', 'VIP', 'VIRULENT'] + + assert app['Person One'].add_label('VIRULENT') is False + assert app['Person One'].labels == ['PALEO', 'VEGAN', 'VIP', 'VIRULENT'] + + assert app['Person Two'].labels == [] + + assert app['Person One'].remove_label('VIP') is True + assert app['Person One'].labels == ['PALEO', 'VEGAN', 'VIRULENT'] + + assert app['Person One'].remove_label('VIP') is False + assert app['Person One'].labels == ['PALEO', 'VEGAN', 'VIRULENT'] + + assert app['Person One'].remove_label('VEGAN') is True + assert app['Person One'].labels == ['PALEO', 'VIRULENT'] + + assert app['Person One'].remove_label('VIRULENT') is True + assert app['Person One'].labels == ['PALEO'] + + assert app['Person One'].remove_label('VIRULENT') is False + assert app['Person One'].labels == ['PALEO'] + + assert app['Person One'].remove_label('PALEO') is True + assert app['Person One'].labels == [] + + out = io.StringIO() + app.ini.save_to_file(file=out) + + assert 'john doe = VEGAN, VIP' in out.getvalue() + assert 'jędrzej marcin mirosławski piołun = UTF-8, VEGAN' in out.getvalue() + +def test_applications_all_labels(app): + assert app.all_labels() == {'PALEO', 'UTF-8', 'VEGAN'} + assert app['Person One'].add_label('VIRULENT') is True + assert app.all_labels() == {'PALEO', 'UTF-8', 'VEGAN', 'VIRULENT'} + +def test_applications_item_access(app): + assert len(app) == 3 + assert app['Person One'].fullname == 'Person One' + assert app[1].fullname == 'Person One' + with pytest.raises(TypeError): + assert app[1.0] From c363610a2a45dc9bb78fc9ed72fa3ed75f872c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 12 Apr 2025 15:51:53 +0200 Subject: [PATCH 4/4] Rename applications_ to applications The old code was not used anywhere. --- grader/applications.py | 600 ++++++++++++------ grader/applications_.py | 561 ---------------- grader/grader.py | 2 +- grader/person.py | 2 +- ..._applications_.py => test_applications.py} | 2 +- grader/test_person.py | 4 +- 6 files changed, 428 insertions(+), 743 deletions(-) delete mode 100644 grader/applications_.py rename grader/{test_applications_.py => test_applications.py} (99%) diff --git a/grader/applications.py b/grader/applications.py index 913fb63..6520023 100644 --- a/grader/applications.py +++ b/grader/applications.py @@ -1,61 +1,66 @@ -import collections +import configparser import csv +from fnmatch import fnmatch import itertools -import os +import functools import pprint import re +import os -from . import vector -from .util import ( - list_of_str, - list_of_equivs, - printf, - our_configfile, -) - - -PERSON_FACTORY = None - -def build_person_factory(fields): - class Person(collections.namedtuple('Person', fields)): - def __init__(self, *args, **kwargs): - # tuple fields are already set in __new__ - self.score = None - self.rank = None - self.highlander = None - self.samelab = False - self.labels = list_of_str() - - # Manually set applied and napplied attributes, - # in case this is the first time we run the school - # and there are no old applications laying around - try: - self.napplied = 0 - self.applied = 'N' - except AttributeError: - # we get an "AttributeError: can't set attribute" - # if the attributes are set already - pass - - try: - self.travel_grant = '' - except AttributeError: - pass - - @property - def fullname(self): - return '{p.name} {p.lastname}'.format(p=self) - - @property - def nonmale(self): - "Return true if gender is 'female' or 'other'" - return self.gender.lower() != 'male' - - return Person +from . import (person, vector, util) +from .util import printff DEBUG_MAPPINGS = False -def col_name_to_field(description, fields_to_col_names): +# CSV-file: +# List of field names and their aliases, i.e. the way those fields were called +# in some past editions +# This mapping is used to match the columns in the header of the CSV files +# Start here if you want to add a new field +KNOWN_FIELDS = { + # 'field-name' : ('alias1', 'alias2', …) + 'email' : ('email address',), + 'institute' : ('aff-uni', + 'institution', + 'affiliation[uni]', + 'University/Institute/Company'), + 'group' : ('aff-group', + 'affiliation[grp]', + 'Group/Division/Department'), + 'nationality' : ('nat',), + 'international' : ('international',), + 'name' : ('first name',), + 'affiliation' : ('country of affiliation', + 'aff-state', + 'instit loc'), + 'applied' : ('did you already apply', 'prev-application'), + 'programming' : ('estimate your programming skills',), + 'programming_description' : ('programming experience',), + 'python' : ('python skills',), + 'open_source' : ('exposure to open-source', 'opensource',), + 'open_source_description' : ('description of your contrib',), + 'motivation' : ('appropriate course for your skill profile',), + 'cv' : ('curriculum vitae',), + 'lastname' : ('last name', 'surname',), + 'born' : ('year of birth',), + 'vcs' : ('habitually use a version control system',), + 'travel_grant' : ('travel grants', 'grants'), +} + +# INI-file: +# the type of the values for the items in the sections of the applications.ini file +# These types will be enforced by ApplicationsIni.read_config_file +SECTION_TYPES = { + 'labels' : util.list_of_str, + '*_rating' : float, # all sections ending with _rating are going to be floats + 'groups_parameters' : int, + 'fields' : util.list_of_equivs, + 'motivation_score-*' : int, + } + +# This function does the real hard-work of parsing the CSV header to map columns +# to known fields +def col_name_to_field(description, overrides): """Return the name of a field for this description. Must be defined. The double dance is because we want to map: @@ -63,6 +68,10 @@ def col_name_to_field(description, fields_to_col_names): - [other] position <=> position_other, - curriculum vitae <=> Please type in a short curriculum vitae... """ + # normalize to lowercase and get rid of extraneous whitespace + description = ' '.join(description.lower().split()) + + # remove double quotes from around the string if description[0] == description[-1] == '"': # why this doesn't get stripped automatically is beyond me description = description[1:-1] @@ -72,213 +81,438 @@ def col_name_to_field(description, fields_to_col_names): # Recent versions of limesurvey set the descriptions as "KEY. Blah # blah" or "KEY[other]. Blah blah". Let's match the first part only. - desc, _, _ = description.partition('.') - desc = desc.lower() + # The format is like this when you export fro limesurvey the code as well + # as the text of the question + desc = description.split('.', maxsplit=1)[0] - m = re.match('(.*)\s*\[other\]', desc) + # match based on the different ways limesurvey implemented the 'other' value + # in specific fields. Ex: 'Position [Other]', '[Other] Position' + m = re.match(r'(.+?)\s*\[other\] | \[other\]\s*(.+)', desc, re.VERBOSE) if m: - desc = m.group(1) + # use only the non empty group + desc = m.group(1) or m.group(2) + # use the same field name with the suffix '_other', ex: position_other other = '_other' else: + # if we did not match, use the field name without the suffix, ex: position other = '' if DEBUG_MAPPINGS: print(f'looking for {desc!r}') + # look over all the column names and find fuzzy matches to decide if one is a + # clear fit for one of the known fields candidates = {} - for key, values in fields_to_col_names.items(): + for key, aliases in overrides.items(): + assert isinstance(aliases, tuple) + # normalize the name of the field + key = key.lower() if desc == key: + # we have an exact match, we can stop here if DEBUG_MAPPINGS: print('mapped exact key:', key) return key + other - for spelling in values: - if spelling == '': - continue - if desc == spelling.lower(): + for alias in aliases: + # we did not find a match for the name of the field, loop through + # all possible aliases + # normalize the alias for the field + alias = alias.lower() + if desc == alias: + # we have a match if DEBUG_MAPPINGS: - print('mapped spelling:', spelling) + print('mapped alias:', alias) return key + other - if spelling.lower() in description.lower(): - candidates[key] = len(spelling) - break # don't try other spellings for the same key + if alias in description: + # we found a fuzzy match, keep track of it for the moment + candidates[key] = len(alias) + break # don't try other aliases for the same key if not candidates: + # we do not know this name, just normalize the column name and return it if DEBUG_MAPPINGS: - print(f'NO CANDIDATE for {description}') - raise KeyError(description) + print(f'NO CANDIDATE for {desc!r}, using default name') + return desc.lower().replace(' ', '_') + other if len(candidates) == 1: + # we have found only a fuzzy match, assume it is the right one if DEBUG_MAPPINGS: - print('one spelling:', candidates) + print('one alias:', candidates) return list(candidates)[0] + other + # we have found several fuzzy matches, pick the one that matches the longest + # portion of the column name and is 10 characters longer than the second best best = sorted(candidates, key=lambda k: -candidates[k]) if candidates[best[0]] > candidates[best[1]] + 10: if DEBUG_MAPPINGS: - print('best spelling:', candidates) + print('best alias:', candidates) return best[0] + other - print(f'NO CLEARLY BEST CANDIDATE for {description}: {candidates}') + # if we land here, we can't distinguish among the fuzzy matches, bail out + print(f'NO CLEARLY BEST CANDIDATE for {description!r}: {candidates}') raise KeyError(description) + +# create the mapping from the columns of the CSV header to the known fields +# uses col_name_to_field to do the hard work @vector.vectorize -def csv_header_to_fields(header, fields_to_col_names_section): +def csv_header_to_fields(header, overrides): if DEBUG_MAPPINGS: - pprint.pprint(list(fields_to_col_names_section.items())) + print('field name overides:') + pprint.pprint(overrides) failed = None seen = {} for name in header: try: - conv = col_name_to_field(name, fields_to_col_names_section) + # convert the current column + conv = col_name_to_field(name, overrides) if DEBUG_MAPPINGS: - print(f'MAPPING: {name} → {conv}\n') + print(f'MAPPING: {name!r} → {conv!r}\n') if conv in seen: - raise ValueError(f'Both "{name}" and "{seen[conv]}" map to "{conv}".') + # we don't want to convert two different columns to the same field + raise ValueError(f'Both {name!r} and {seen[conv]!r} map to {conv!r}.') seen[conv] = name yield conv except KeyError as e: - printf(f"unknown field: '{name}'") + print(f"Unknown field: {name!r}") failed = e if failed: raise failed -def _drop_whitespace(s): - return ' '.join(s.split()) +# vectorize consumes the generator and returns a special list, which allows +# vectorized attribute access to the list elements, for example +# applications = load_applications_csv(file) +# applications.name -> ['Marcus', 'Lukas', 'Giovanni', ...] @vector.vectorize -def parse_applications_csv_file(file, fields_to_col_names_section): - printf("loading '{}'", file.name) +def load_applications_csv(file, field_name_overrides={}, relaxed=False, ini=None): + # support both file objects and path-strings + if not hasattr(file, 'read'): + file = open(file, encoding='utf-8-sig') ### support for CSV file with BOM + + print(f"loading '{file.name}'") # let's try to detect the separator csv_dialect = csv.Sniffer().sniff(file.read(32768)) # manually set doublequote (the sniffer doesn't get it automatically) csv_dialect.doublequote = True # rewind file.seek(0) - # now the CSV reader should be setup + # now the CSV reader should be set up reader = csv.reader(file, dialect=csv_dialect) csv_header = next(reader) - fields = csv_header_to_fields(csv_header, fields_to_col_names_section) + # map the columns of the header to fields + fields = csv_header_to_fields(csv_header, KNOWN_FIELDS | field_name_overrides) + assert len(fields) == len(csv_header) # sanity check assert len(set(fields)) == len(csv_header) # two columns map to the same field - person_factory = build_person_factory(fields) - global PERSON_FACTORY - if PERSON_FACTORY is None: - PERSON_FACTORY = person_factory - assert len(csv_header) == len(person_factory._fields) count = 0 - while True: - try: - entry = next(reader) - except StopIteration: - return - if not entry: - # skip empty line + for entry in reader: + if (not entry) or len(set(entry)) <= 1: + # first match: empty line at the beginning or at the end of the file + # second match: empty line in the middle of the file continue count += 1 - # strip extraneous whitespace from around and within the name - # This should be moved the the factory initializer, but it's hard with namedtuples - entry[fields.index('name')] = _drop_whitespace(entry[fields.index('name')]) - entry[fields.index('lastname')] = _drop_whitespace(entry[fields.index('lastname')]) - try: - yield person_factory(*entry) + yield person.Person.from_row(fields, entry, relaxed=relaxed, ini=ini) except Exception as exp: - print("Exception raised on entry %d:"%count, exp) - print('Detected fields:', fields) - import pdb; pdb.set_trace() - -class Applications: + print(f'Exception raised on entry {count}:', entry) + print('Detected fields:\n', fields) + raise + + +# This object allow access to the INI file, which contains Person's specific data +# which is generated by us, like labels and motivation scores, together with other +# parameters which are relevant for the interpretation of data from the CSV, like +# the muercial ratings assigned to certain skill levels, the formula to calculate +# the score and the ranking of the applicants. +class ApplicationsIni: + def __init__(self, file): + if hasattr(file, 'read'): + # we got passed some form of file object (we may be running in a test) + # we should now artificially hamper our performance so that controlling + # authorities don't get mad at us [ (C) Volkswagen ] + # file is already open + self.filename = file.name + # we don't know the modification time + self.mtime = None + else: + # we just got a file name (hopefully a pathlib.Path object) + self.filename = file - def __init__(self, applicants, config): - self.applicants = applicants - self.config = config - - if config is not None: - # Add overrides from config - for section in config.sections: - if section.endswith('_overrides'): - field = section[0:-len('_overrides')] - for fullname, value in config[section].items(): - for idx, person in enumerate(applicants): - if person.fullname.lower() == fullname: - item = {field : value} - new_fields = person._replace(**item) - new_person = PERSON_FACTORY(**new_fields._asdict()) - applicants[idx] = new_person - - # Add applicant labels from config file to applicant object - for applicant in applicants: - labels = config['labels'].get(applicant.fullname, - list_of_str()) - applicant.labels = labels + # open the file for reading, if it exists + try: + file = open(file) + print(f"loading '{self.filename}'") + except FileNotFoundError as e: + # if the file doesn't exist yet, we'll create it when writing + #print(f'warning: {e}') + file = None + # set the modification time to the beginning of the Epoch, so that + # any change will trigger our reload rule + self.mtime = 0 + else: + # store the modification time (in ns) of the file + self.mtime = self.filename.stat().st_mtime_ns + + # Track modifications to the global state, i.e. the parameters + # that apply to all people. Some per-person modifications are tracked + # without changing the global generation number. + self.generation = 0 + self.modifications_without_generation = False + + # use config parser to give us a mapping: + # { section_names : {keys : values} } + # where the values are already converted to the proper types + self.data = self.read_config_file(file) + + @vector.dictify + def read_config_file(self, file): + self.config_file_generation = self.generation + self.modifications_without_generation = False + + cp = configparser.ConfigParser(comment_prefixes='#', inline_comment_prefixes='#') + + if file is not None: + cp.read_file(file) + + # this keeps all the data from the INI file, ex: + # 'motivation_score-0' : {'firstname lastname' : -1} + # while converting the values of the keys to the types + # declared in SECTION_TYPES + for section_name, section in cp.items(): + # find the type of this particular section + typ = self._find_typ(section_name) + yield (section_name, {key:typ(value) for key, value in section.items()}) + + def has_modifications(self): + return (self.modifications_without_generation + or + self.generation > self.config_file_generation) + + def _find_typ(self, section_name): + # find the appropriate converter for a given section + for pattern, typ in SECTION_TYPES.items(): + # find the proper type for the section naming matching pattern + if fnmatch(section_name, pattern): + return typ + # just return as-is if we don't know the type for this section + return lambda x: x + + def reload_if_modified(self): + # this function reloads the INI file if its modified time is newer than + # the last one the function was called. It is not called automatically + # here. It is meant to be used in some form of command/event-loop from + # Grader itself + + if self.mtime is None: + # we won't reload something we can't get the modification time of + return False + + # guard against accidentally removing the file under our feet + try: + current = self.filename.stat().st_mtime_ns + except FileNotFoundError: + print(f'WARNING: {self.filename!r} was removed') + return False + + # don't need to reload + if current == self.mtime: + return False + + # if we are here, we have to reload the file + self.mtime = current + + # unconditionally update our generation counter, because anything may + # have been modified in the file + self.generation += 1 + + self.data = self.read_config_file(self.filename.open()) + + return True + + @functools.cache + @vector.vectorize + def identities(self): + """Return a vector of all identities used in the ini file""" + for section_name, section in self.data.items(): + match section_name.split('-', maxsplit=1): + case ('motivation_score', identity): + yield identity + + #@functools.cache + def get_ratings(self, field): + """Return a mapping: {value → rating}. we expect the field without + the suffix _rating""" + for section_name, section in self.data.items(): + match section_name.rsplit('_', maxsplit=1): + case (name, 'rating'): + if field == name: + # section is already a dictionary + return section + return None + + def save(self, filename=None): + filename = filename or self.filename + with open(filename, 'wt') as file: + self.save_to_file(file) + + def save_to_file(self, file): + # save our data to the INI file + cp = configparser.ConfigParser(comment_prefixes='#', inline_comment_prefixes='#') + cp.read_dict(self.data) + cp.write(file) + + name = getattr(file, 'name', '(tmp)') + printff(f'Saved changes to {name}') + + + def __setitem__(self, key, value): + # allow to set items in the section of the INI using a dotted form, for ex: + # to set [python_rating] -> competent = 1 you can do + # ApplicationsIni['python_rating.competent'] = 1 + section_name, key_name = key.split('.') + + if section_name not in self.data: + # create a new section if we don't find one in the INI + self.data[section_name] = {} + + # enforce types for sections we know the type of + typ = self._find_typ(section_name) + self.data[section_name][key_name] = typ(value) + + # We increase the generation number, for modifications of the state, + # but not for the per-person settings in [motivation-*] and [labels], + # because those modifications increase the generation number in Person, + # and if we increased the generation here, we would trigger recalculation + # of scores of all people whenever one person's score or labels were modified. + if section_name.startswith(('motivation_score-', 'labels')): + self.modifications_without_generation = True + else: + self.generation += 1 def __getitem__(self, key): - """Support basic iteration""" - return self.applicants[key] + # same as in __setattr__, allows access to section keys via a dotted notation + # The key is split into two parts: section and key name. + # The key names are allowed to contain dots (this is what maxsplit is for). + if '.' in key: + section_name, item = key.split('.', maxsplit=1) + section = self.data.get(section_name) + if section is None: + ans = None + else: + ans = section.get(item) + else: + ans = self.data.get(key) + # print(f'Query {key} -> {ans}') + return ans + + @vector.vectorize + def get_motivation_scores(self, fullname): + # get all motivation scores of a Person + for identity in self.identities(): + yield self.get_motivation_score(fullname, identity) + + def get_motivation_score(self, fullname, identity): + # get the motivation score of a Person as assigned to them by identity + section_name = f'motivation_score-{identity}' + key = fullname.lower() + return self[f'{section_name}.{key}'] + + def set_motivation_score(self, fullname, value, identity): + section_name = f'motivation_score-{identity}' + key = fullname.lower() + self[f'{section_name}.{key}'] = value - def __len__(self): - return len(self.applicants) + def get_labels(self, fullname): + key = fullname.lower() + return self[f'labels.{key}'] or [] - @classmethod - def from_paths(cls, config_path, csv_path, fields_to_col_names_section): - if os.path.exists(config_path): - config = our_configfile(config_path) + def set_labels(self, fullname, labels): + key = fullname.lower() + + if not labels: + self.data['labels'].pop(key) else: - config = None - printf('Warning: no configuration file {}', config_path) + self[f'labels.{key}'] = labels - with open(csv_path, newline='', encoding='utf-8-sig') as f: - applicants = parse_applications_csv_file( - f, fields_to_col_names_section) + @property + def formula(self): + return self['formula.formula'] or 'nan' - applications = cls(applicants, config) - return applications + @formula.setter + def formula(self, formula): + self['formula.formula'] = formula - def find_applicant_by_fullname(self, fullname): - for applicant in self.applicants: - if applicant.fullname.lower() == fullname.lower(): - return applicant - else: - raise ValueError('Applicant "{}" not found'.format(fullname)) - - def add_labels(self, fullname, labels): - # update applicant - applicant = self.find_applicant_by_fullname(fullname) - applicant.labels.extend(labels) - # update config file - section = self.config['labels'] - saved = section.get(fullname, list_of_str()) - saved.extend(labels) - section[fullname] = saved - - def clear_labels(self, fullname): - # update applicant - applicant = self.find_applicant_by_fullname(fullname) - applicant.labels = [] - # update config file - self.config['labels'].clear(fullname) + @property + def location(self): + return self['formula.location'] - def get_labels(self, fullname): - applicant = self.find_applicant_by_fullname(fullname) - return applicant.labels - def get_all_labels(self): - labels = set() - for applicant in self.applicants: - labels.update(applicant.labels) - return labels +# This class is a collection of applications for an edition of the school +# It can be iterated over and it can return a subset of applications matching +# certain criteria (see "filter" method) +# It keeps a reference to the INI file (if any) corresponding to the CSV file +# where applications are stored +class Applications: + def __init__(self, csv_file, ini_file=None, relaxed=False): + if ini_file is None: + # if the name of the INI file is not passed explicitly, just assume + # it is the same name as the CSV file + ini_file = csv_file.with_suffix('.ini') + self.ini = ApplicationsIni(ini_file) + + # load the applications from the CSV file and adjusting the overrides, + # the labels and the motivation scores as found in the INI file + # self.people is a list of Person objects + self.people = load_applications_csv(csv_file, + ini=self.ini, + relaxed=relaxed) + + @functools.cache + def all_nationalities(self): + return set(p.nationality for p in self.people) + + @functools.cache + def all_affiliations(self): + return set(p.affiliation for p in self.people) + + def all_labels(self): + return set(l + for p in self.people + for l in p.labels) + + def __getitem__(self, key): + """Get people by numerical index or by fullname""" + # we want to be able to do applications[0] and application["mario rossi"] + match key: + case int(key): + return self.people[key] + case str(key): + return self.filter(fullname=f'^{key.lower()}$')[0] + case _: + raise TypeError + + def __len__(self): + return len(self.people) def filter(self, **kwargs): - """Return an iterator over the applications which match certain criteria: + """Return a sequence of applications which match certain criteria: + + The returned object is a vector, i.e it can be used to extract list of + Person attributes, like: + + names_of_italians = applications.filter(nationality='Italy').fullname Examples: applications.filter(nationality='Italy') --> applicants where person.nationality=='Italy' + applications.filter(applied=True) --> + people who declared that they applied already + applications.filter(label='XXX') --> applicants labeled XXX @@ -292,24 +526,36 @@ def filter(self, **kwargs): applicants labeled XXX and YYY but nor ZZZ neither WWW + Labels are checked exactly, and other attributes are interpreted + as a case-insensitive regexp. + Note: returns all applications when called without arguments """ # first match labels + # The following code is some sort of magic that nt even Zbyszek could + # remember how it worked. It still seems to work, but super-human skills + # may be required to modify it. labels = kwargs.pop('label', None) if labels is not None: matching = [] labels = iter((labels, )) if type(labels) == str else iter(labels) accept = frozenset(itertools.takewhile(lambda x: x!='-', labels)) deny = frozenset(labels) - for p in self.applicants: + for p in self.people: labels = set(p.labels) if not (accept - labels) and not (labels & deny): matching.append(p) else: - matching = self.applicants[:] + matching = self.people[:] # now filter through attributes for attr, value in kwargs.items(): - matching = [p for p in matching if getattr(p, attr) == value] - - return matching + if isinstance(value, str): + matching = [p for p in matching + if re.search(value, getattr(p, attr), + re.IGNORECASE)] + else: + matching = [p for p in matching + if value == getattr(p, attr)] + + return vector.vector(matching) diff --git a/grader/applications_.py b/grader/applications_.py deleted file mode 100644 index 6520023..0000000 --- a/grader/applications_.py +++ /dev/null @@ -1,561 +0,0 @@ -import configparser -import csv -from fnmatch import fnmatch -import itertools -import functools -import pprint -import re -import os - -from . import (person, vector, util) -from .util import printff - -DEBUG_MAPPINGS = False - -# CSV-file: -# List of field names and their aliases, i.e. the way those fields were called -# in some past editions -# This mapping is used to match the columns in the header of the CSV files -# Start here if you want to add a new field -KNOWN_FIELDS = { - # 'field-name' : ('alias1', 'alias2', …) - 'email' : ('email address',), - 'institute' : ('aff-uni', - 'institution', - 'affiliation[uni]', - 'University/Institute/Company'), - 'group' : ('aff-group', - 'affiliation[grp]', - 'Group/Division/Department'), - 'nationality' : ('nat',), - 'international' : ('international',), - 'name' : ('first name',), - 'affiliation' : ('country of affiliation', - 'aff-state', - 'instit loc'), - 'applied' : ('did you already apply', 'prev-application'), - 'programming' : ('estimate your programming skills',), - 'programming_description' : ('programming experience',), - 'python' : ('python skills',), - 'open_source' : ('exposure to open-source', 'opensource',), - 'open_source_description' : ('description of your contrib',), - 'motivation' : ('appropriate course for your skill profile',), - 'cv' : ('curriculum vitae',), - 'lastname' : ('last name', 'surname',), - 'born' : ('year of birth',), - 'vcs' : ('habitually use a version control system',), - 'travel_grant' : ('travel grants', 'grants'), -} - -# INI-file: -# the type of the values for the items in the sections of the applications.ini file -# These types will be enforced by ApplicationsIni.read_config_file -SECTION_TYPES = { - 'labels' : util.list_of_str, - '*_rating' : float, # all sections ending with _rating are going to be floats - 'groups_parameters' : int, - 'fields' : util.list_of_equivs, - 'motivation_score-*' : int, - } - -# This function does the real hard-work of parsing the CSV header to map columns -# to known fields -def col_name_to_field(description, overrides): - """Return the name of a field for this description. Must be defined. - - The double dance is because we want to map: - - position <=> position, - - [other] position <=> position_other, - - curriculum vitae <=> Please type in a short curriculum vitae... - """ - # normalize to lowercase and get rid of extraneous whitespace - description = ' '.join(description.lower().split()) - - # remove double quotes from around the string - if description[0] == description[-1] == '"': - # why this doesn't get stripped automatically is beyond me - description = description[1:-1] - - # E.g. "Country of Affiliation:" or "Position: [Other]" - description = description.replace(':', '') - - # Recent versions of limesurvey set the descriptions as "KEY. Blah - # blah" or "KEY[other]. Blah blah". Let's match the first part only. - # The format is like this when you export fro limesurvey the code as well - # as the text of the question - desc = description.split('.', maxsplit=1)[0] - - # match based on the different ways limesurvey implemented the 'other' value - # in specific fields. Ex: 'Position [Other]', '[Other] Position' - m = re.match(r'(.+?)\s*\[other\] | \[other\]\s*(.+)', desc, re.VERBOSE) - if m: - # use only the non empty group - desc = m.group(1) or m.group(2) - # use the same field name with the suffix '_other', ex: position_other - other = '_other' - else: - # if we did not match, use the field name without the suffix, ex: position - other = '' - - if DEBUG_MAPPINGS: - print(f'looking for {desc!r}') - - # look over all the column names and find fuzzy matches to decide if one is a - # clear fit for one of the known fields - candidates = {} - for key, aliases in overrides.items(): - assert isinstance(aliases, tuple) - # normalize the name of the field - key = key.lower() - if desc == key: - # we have an exact match, we can stop here - if DEBUG_MAPPINGS: - print('mapped exact key:', key) - return key + other - for alias in aliases: - # we did not find a match for the name of the field, loop through - # all possible aliases - # normalize the alias for the field - alias = alias.lower() - if desc == alias: - # we have a match - if DEBUG_MAPPINGS: - print('mapped alias:', alias) - return key + other - if alias in description: - # we found a fuzzy match, keep track of it for the moment - candidates[key] = len(alias) - break # don't try other aliases for the same key - - if not candidates: - # we do not know this name, just normalize the column name and return it - if DEBUG_MAPPINGS: - print(f'NO CANDIDATE for {desc!r}, using default name') - return desc.lower().replace(' ', '_') + other - - if len(candidates) == 1: - # we have found only a fuzzy match, assume it is the right one - if DEBUG_MAPPINGS: - print('one alias:', candidates) - return list(candidates)[0] + other - - # we have found several fuzzy matches, pick the one that matches the longest - # portion of the column name and is 10 characters longer than the second best - best = sorted(candidates, key=lambda k: -candidates[k]) - if candidates[best[0]] > candidates[best[1]] + 10: - if DEBUG_MAPPINGS: - print('best alias:', candidates) - return best[0] + other - - # if we land here, we can't distinguish among the fuzzy matches, bail out - print(f'NO CLEARLY BEST CANDIDATE for {description!r}: {candidates}') - raise KeyError(description) - - -# create the mapping from the columns of the CSV header to the known fields -# uses col_name_to_field to do the hard work -@vector.vectorize -def csv_header_to_fields(header, overrides): - if DEBUG_MAPPINGS: - print('field name overides:') - pprint.pprint(overrides) - - failed = None - seen = {} - for name in header: - try: - # convert the current column - conv = col_name_to_field(name, overrides) - if DEBUG_MAPPINGS: - print(f'MAPPING: {name!r} → {conv!r}\n') - if conv in seen: - # we don't want to convert two different columns to the same field - raise ValueError(f'Both {name!r} and {seen[conv]!r} map to {conv!r}.') - seen[conv] = name - yield conv - except KeyError as e: - print(f"Unknown field: {name!r}") - failed = e - if failed: - raise failed - - -# vectorize consumes the generator and returns a special list, which allows -# vectorized attribute access to the list elements, for example -# applications = load_applications_csv(file) -# applications.name -> ['Marcus', 'Lukas', 'Giovanni', ...] -@vector.vectorize -def load_applications_csv(file, field_name_overrides={}, relaxed=False, ini=None): - # support both file objects and path-strings - if not hasattr(file, 'read'): - file = open(file, encoding='utf-8-sig') ### support for CSV file with BOM - - print(f"loading '{file.name}'") - # let's try to detect the separator - csv_dialect = csv.Sniffer().sniff(file.read(32768)) - # manually set doublequote (the sniffer doesn't get it automatically) - csv_dialect.doublequote = True - # rewind - file.seek(0) - # now the CSV reader should be set up - reader = csv.reader(file, dialect=csv_dialect) - csv_header = next(reader) - # map the columns of the header to fields - fields = csv_header_to_fields(csv_header, KNOWN_FIELDS | field_name_overrides) - - assert len(fields) == len(csv_header) # sanity check - assert len(set(fields)) == len(csv_header) # two columns map to the same field - - count = 0 - for entry in reader: - if (not entry) or len(set(entry)) <= 1: - # first match: empty line at the beginning or at the end of the file - # second match: empty line in the middle of the file - continue - count += 1 - - try: - yield person.Person.from_row(fields, entry, relaxed=relaxed, ini=ini) - except Exception as exp: - print(f'Exception raised on entry {count}:', entry) - print('Detected fields:\n', fields) - raise - - -# This object allow access to the INI file, which contains Person's specific data -# which is generated by us, like labels and motivation scores, together with other -# parameters which are relevant for the interpretation of data from the CSV, like -# the muercial ratings assigned to certain skill levels, the formula to calculate -# the score and the ranking of the applicants. -class ApplicationsIni: - def __init__(self, file): - if hasattr(file, 'read'): - # we got passed some form of file object (we may be running in a test) - # we should now artificially hamper our performance so that controlling - # authorities don't get mad at us [ (C) Volkswagen ] - # file is already open - self.filename = file.name - # we don't know the modification time - self.mtime = None - else: - # we just got a file name (hopefully a pathlib.Path object) - self.filename = file - - # open the file for reading, if it exists - try: - file = open(file) - print(f"loading '{self.filename}'") - except FileNotFoundError as e: - # if the file doesn't exist yet, we'll create it when writing - #print(f'warning: {e}') - file = None - # set the modification time to the beginning of the Epoch, so that - # any change will trigger our reload rule - self.mtime = 0 - else: - # store the modification time (in ns) of the file - self.mtime = self.filename.stat().st_mtime_ns - - # Track modifications to the global state, i.e. the parameters - # that apply to all people. Some per-person modifications are tracked - # without changing the global generation number. - self.generation = 0 - self.modifications_without_generation = False - - # use config parser to give us a mapping: - # { section_names : {keys : values} } - # where the values are already converted to the proper types - self.data = self.read_config_file(file) - - @vector.dictify - def read_config_file(self, file): - self.config_file_generation = self.generation - self.modifications_without_generation = False - - cp = configparser.ConfigParser(comment_prefixes='#', inline_comment_prefixes='#') - - if file is not None: - cp.read_file(file) - - # this keeps all the data from the INI file, ex: - # 'motivation_score-0' : {'firstname lastname' : -1} - # while converting the values of the keys to the types - # declared in SECTION_TYPES - for section_name, section in cp.items(): - # find the type of this particular section - typ = self._find_typ(section_name) - yield (section_name, {key:typ(value) for key, value in section.items()}) - - def has_modifications(self): - return (self.modifications_without_generation - or - self.generation > self.config_file_generation) - - def _find_typ(self, section_name): - # find the appropriate converter for a given section - for pattern, typ in SECTION_TYPES.items(): - # find the proper type for the section naming matching pattern - if fnmatch(section_name, pattern): - return typ - # just return as-is if we don't know the type for this section - return lambda x: x - - def reload_if_modified(self): - # this function reloads the INI file if its modified time is newer than - # the last one the function was called. It is not called automatically - # here. It is meant to be used in some form of command/event-loop from - # Grader itself - - if self.mtime is None: - # we won't reload something we can't get the modification time of - return False - - # guard against accidentally removing the file under our feet - try: - current = self.filename.stat().st_mtime_ns - except FileNotFoundError: - print(f'WARNING: {self.filename!r} was removed') - return False - - # don't need to reload - if current == self.mtime: - return False - - # if we are here, we have to reload the file - self.mtime = current - - # unconditionally update our generation counter, because anything may - # have been modified in the file - self.generation += 1 - - self.data = self.read_config_file(self.filename.open()) - - return True - - @functools.cache - @vector.vectorize - def identities(self): - """Return a vector of all identities used in the ini file""" - for section_name, section in self.data.items(): - match section_name.split('-', maxsplit=1): - case ('motivation_score', identity): - yield identity - - #@functools.cache - def get_ratings(self, field): - """Return a mapping: {value → rating}. we expect the field without - the suffix _rating""" - for section_name, section in self.data.items(): - match section_name.rsplit('_', maxsplit=1): - case (name, 'rating'): - if field == name: - # section is already a dictionary - return section - return None - - def save(self, filename=None): - filename = filename or self.filename - with open(filename, 'wt') as file: - self.save_to_file(file) - - def save_to_file(self, file): - # save our data to the INI file - cp = configparser.ConfigParser(comment_prefixes='#', inline_comment_prefixes='#') - cp.read_dict(self.data) - cp.write(file) - - name = getattr(file, 'name', '(tmp)') - printff(f'Saved changes to {name}') - - - def __setitem__(self, key, value): - # allow to set items in the section of the INI using a dotted form, for ex: - # to set [python_rating] -> competent = 1 you can do - # ApplicationsIni['python_rating.competent'] = 1 - section_name, key_name = key.split('.') - - if section_name not in self.data: - # create a new section if we don't find one in the INI - self.data[section_name] = {} - - # enforce types for sections we know the type of - typ = self._find_typ(section_name) - self.data[section_name][key_name] = typ(value) - - # We increase the generation number, for modifications of the state, - # but not for the per-person settings in [motivation-*] and [labels], - # because those modifications increase the generation number in Person, - # and if we increased the generation here, we would trigger recalculation - # of scores of all people whenever one person's score or labels were modified. - if section_name.startswith(('motivation_score-', 'labels')): - self.modifications_without_generation = True - else: - self.generation += 1 - - def __getitem__(self, key): - # same as in __setattr__, allows access to section keys via a dotted notation - # The key is split into two parts: section and key name. - # The key names are allowed to contain dots (this is what maxsplit is for). - if '.' in key: - section_name, item = key.split('.', maxsplit=1) - section = self.data.get(section_name) - if section is None: - ans = None - else: - ans = section.get(item) - else: - ans = self.data.get(key) - # print(f'Query {key} -> {ans}') - return ans - - @vector.vectorize - def get_motivation_scores(self, fullname): - # get all motivation scores of a Person - for identity in self.identities(): - yield self.get_motivation_score(fullname, identity) - - def get_motivation_score(self, fullname, identity): - # get the motivation score of a Person as assigned to them by identity - section_name = f'motivation_score-{identity}' - key = fullname.lower() - return self[f'{section_name}.{key}'] - - def set_motivation_score(self, fullname, value, identity): - section_name = f'motivation_score-{identity}' - key = fullname.lower() - self[f'{section_name}.{key}'] = value - - def get_labels(self, fullname): - key = fullname.lower() - return self[f'labels.{key}'] or [] - - def set_labels(self, fullname, labels): - key = fullname.lower() - - if not labels: - self.data['labels'].pop(key) - else: - self[f'labels.{key}'] = labels - - @property - def formula(self): - return self['formula.formula'] or 'nan' - - @formula.setter - def formula(self, formula): - self['formula.formula'] = formula - - @property - def location(self): - return self['formula.location'] - - -# This class is a collection of applications for an edition of the school -# It can be iterated over and it can return a subset of applications matching -# certain criteria (see "filter" method) -# It keeps a reference to the INI file (if any) corresponding to the CSV file -# where applications are stored -class Applications: - def __init__(self, csv_file, ini_file=None, relaxed=False): - if ini_file is None: - # if the name of the INI file is not passed explicitly, just assume - # it is the same name as the CSV file - ini_file = csv_file.with_suffix('.ini') - self.ini = ApplicationsIni(ini_file) - - # load the applications from the CSV file and adjusting the overrides, - # the labels and the motivation scores as found in the INI file - # self.people is a list of Person objects - self.people = load_applications_csv(csv_file, - ini=self.ini, - relaxed=relaxed) - - @functools.cache - def all_nationalities(self): - return set(p.nationality for p in self.people) - - @functools.cache - def all_affiliations(self): - return set(p.affiliation for p in self.people) - - def all_labels(self): - return set(l - for p in self.people - for l in p.labels) - - def __getitem__(self, key): - """Get people by numerical index or by fullname""" - # we want to be able to do applications[0] and application["mario rossi"] - match key: - case int(key): - return self.people[key] - case str(key): - return self.filter(fullname=f'^{key.lower()}$')[0] - case _: - raise TypeError - - def __len__(self): - return len(self.people) - - def filter(self, **kwargs): - """Return a sequence of applications which match certain criteria: - - The returned object is a vector, i.e it can be used to extract list of - Person attributes, like: - - names_of_italians = applications.filter(nationality='Italy').fullname - - Examples: - - applications.filter(nationality='Italy') --> - applicants where person.nationality=='Italy' - - applications.filter(applied=True) --> - people who declared that they applied already - - applications.filter(label='XXX') --> - applicants labeled XXX - - applications.filter(label=('XXX', 'YYY')) --> - applicants labeled XXX and YYY - - applications.filter(label=('XXX', 'YYY', '-', 'ZZZ')) --> - applicants labeled XXX and YYY but not ZZZ - - applications.filter(label=('XXX', 'YYY', '-', 'ZZZ', 'WWW')) --> - applicants labeled XXX and YYY - but nor ZZZ neither WWW - - Labels are checked exactly, and other attributes are interpreted - as a case-insensitive regexp. - - Note: returns all applications when called without arguments - """ - # first match labels - # The following code is some sort of magic that nt even Zbyszek could - # remember how it worked. It still seems to work, but super-human skills - # may be required to modify it. - labels = kwargs.pop('label', None) - if labels is not None: - matching = [] - labels = iter((labels, )) if type(labels) == str else iter(labels) - accept = frozenset(itertools.takewhile(lambda x: x!='-', labels)) - deny = frozenset(labels) - for p in self.people: - labels = set(p.labels) - if not (accept - labels) and not (labels & deny): - matching.append(p) - else: - matching = self.people[:] - - # now filter through attributes - for attr, value in kwargs.items(): - if isinstance(value, str): - matching = [p for p in matching - if re.search(value, getattr(p, attr), - re.IGNORECASE)] - else: - matching = [p for p in matching - if value == getattr(p, attr)] - - return vector.vector(matching) diff --git a/grader/grader.py b/grader/grader.py index efc10dd..b80614f 100755 --- a/grader/grader.py +++ b/grader/grader.py @@ -30,7 +30,7 @@ from .flags import flags as FLAGS from . import vector -from .applications_ import Applications +from .applications import Applications from .util import ( list_of_equivs, diff --git a/grader/person.py b/grader/person.py index 7990330..62d9b26 100644 --- a/grader/person.py +++ b/grader/person.py @@ -7,7 +7,7 @@ import math import numpy as np -from . import applications_ as applications +from . import applications # List of valid values for fields in the Person object # The values need to match with what is used in the application form diff --git a/grader/test_applications_.py b/grader/test_applications.py similarity index 99% rename from grader/test_applications_.py rename to grader/test_applications.py index 7a74eee..14e8289 100644 --- a/grader/test_applications_.py +++ b/grader/test_applications.py @@ -3,7 +3,7 @@ import os import time -from grader.applications_ import ( +from grader.applications import ( load_applications_csv, ApplicationsIni, Applications) diff --git a/grader/test_person.py b/grader/test_person.py index 011f341..caa9a40 100644 --- a/grader/test_person.py +++ b/grader/test_person.py @@ -1,12 +1,12 @@ import time from grader.person import (convert_bool, Person, FormulaProxy) -from grader.applications_ import ApplicationsIni +from grader.applications import ApplicationsIni import pytest import numpy as np -from .test_applications_ import get_ini +from .test_applications import get_ini MARCIN = dict( name = ' Jędrzej\t\t\tMarcin ',