From e812af81d2cf0c5830b72cb7dcc2f6be9ad7c116 Mon Sep 17 00:00:00 2001 From: Tiziano Zito Date: Wed, 22 May 2024 13:48:54 +0200 Subject: [PATCH 1/5] add new command dumpcsv to dump csv files with arbitraty attributes from applications This commit was cherry-picked from the heraklion branch. --- grader/grader.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/grader/grader.py b/grader/grader.py index 683b923..3dd0f42 100755 --- a/grader/grader.py +++ b/grader/grader.py @@ -925,7 +925,7 @@ def do_rank(self, args): help="don't use labels in ranking") .add_argument('-L', '--highlanders', action='store_true', help='display statistics only for highlanders') - .add_argument('-l', '--labels', +.add_argument('-l', '--labels', help='display statistics only for people with label(s).'+ 'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED') .add_argument('--edition', default='current', @@ -1161,6 +1161,34 @@ def do_save(self, args): opts = self.save_options.parse_args(args.split()) self.applications.ini.save(opts.filename) + dumpcsv_options = ( + cmd_completer.PagedArgumentParser('dumpcsv') + .add_argument('-l', '--labels', + help='only dump people with label(s).'+ + 'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED') + .add_argument('-a', '--attributes', help='comma-separated list of attributes to dump') + ) + + def do_dumpcsv(self, args): + opts = self.dumpcsv_options.parse_args(args.split()) + pool = list(self.applications) + if opts.labels: + # create label filter tuple + labels = opts.labels.split(',') + pool = self.applications.filter(label=labels) + attributes = opts.attributes.split(',') + # HEADER + header = ';'.join([f'${attr.upper()}$' for attr in attributes]) + lines = [header] + for p in pool: + line = ';'.join([f'{getattr(p, attr)}' for attr in attributes]) + lines.append(line) + with open('/tmp/grader.csv', 'wt') as fl: + fl.write('\n'.join(lines)) + fl.write('\n') + print('Dump written to /tmp/grader.csv') + + def do_write(self, args): """Write lists of mailing recipients From 79fe6d6c3c8a32852c25e0da5459fb9aeb34f981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 14 Apr 2025 09:37:53 +0200 Subject: [PATCH 2/5] test_applications: verify that we get all applications with an empty filter This will be used to make the code simpler later on. --- grader/test_applications.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/grader/test_applications.py b/grader/test_applications.py index 7831112..f3cbb89 100644 --- a/grader/test_applications.py +++ b/grader/test_applications.py @@ -188,6 +188,9 @@ def test_applications_object(app): assert len(app) == 3 assert len(app.people) == 3 + alls = app.filter() + assert len(alls) == 3 + vegans = app.filter(label = ['VEGAN']) assert len(vegans) == 1 assert vegans.name == ['Jędrzej Marcin'] From 45368884fec3876a5d4360d1fe387326fe649a71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 14 Apr 2025 09:47:02 +0200 Subject: [PATCH 3/5] grader: clean up code a bit --attributes is required, without that the verb doesn't make sense. --- grader/grader.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/grader/grader.py b/grader/grader.py index 3dd0f42..85a9b39 100755 --- a/grader/grader.py +++ b/grader/grader.py @@ -925,12 +925,12 @@ def do_rank(self, args): help="don't use labels in ranking") .add_argument('-L', '--highlanders', action='store_true', help='display statistics only for highlanders') -.add_argument('-l', '--labels', - help='display statistics only for people with label(s).'+ + .add_argument('-l', '--labels', + help='display statistics only for people with LABELS. ' 'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED') .add_argument('--edition', default='current', help="edition for which we want the stats, e.g. '2010-trento'. " - "'all' means all editions 'current' (default) means the" + "'all' means all editions. 'current' (default) means the " "latest one") ) @@ -1164,24 +1164,25 @@ def do_save(self, args): dumpcsv_options = ( cmd_completer.PagedArgumentParser('dumpcsv') .add_argument('-l', '--labels', - help='only dump people with label(s).'+ + help='only dump people with LABELS. ' 'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED') - .add_argument('-a', '--attributes', help='comma-separated list of attributes to dump') + .add_argument('-a', '--attributes', + required=True, + help='comma-separated list of attributes to dump') ) def do_dumpcsv(self, args): opts = self.dumpcsv_options.parse_args(args.split()) - pool = list(self.applications) - if opts.labels: - # create label filter tuple - labels = opts.labels.split(',') - pool = self.applications.filter(label=labels) + + labels = opts.labels.split(',') if opts.labels else [] + pool = self.applications.filter(label=labels) + attributes = opts.attributes.split(',') # HEADER - header = ';'.join([f'${attr.upper()}$' for attr in attributes]) + header = ';'.join(f'${attr.upper()}$' for attr in attributes) lines = [header] for p in pool: - line = ';'.join([f'{getattr(p, attr)}' for attr in attributes]) + line = ';'.join(f'{getattr(p, attr)}' for attr in attributes) lines.append(line) with open('/tmp/grader.csv', 'wt') as fl: fl.write('\n'.join(lines)) From 65b5e27cbcd84de4de25d274f3892c402e2cb250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 14 Apr 2025 10:02:07 +0200 Subject: [PATCH 4/5] grader: split out the function to write output csv --- grader/applications.py | 18 +++++++++++++++++- grader/grader.py | 16 ++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/grader/applications.py b/grader/applications.py index 95ae1ec..79b37f9 100644 --- a/grader/applications.py +++ b/grader/applications.py @@ -13,7 +13,7 @@ import tokenize from . import (person, vector, util) -from .util import printff +from .util import (printf, printff) DEBUG_MAPPINGS = False @@ -724,3 +724,19 @@ def find_min_max(self): min_ = min(scores) items[item] = (max_-min_) / (maxsc-minsc)*100 return minsc, maxsc, items + + def write_to_file(self, labels, attributes, filename): + pool = self.filter(label=labels) + + # HEADER + header = ';'.join(f'${attr.upper()}$' for attr in attributes) + lines = [header] + for p in pool: + line = ';'.join(f'{getattr(p, attr)}' for attr in attributes) + lines += [line] + + with open(filename, 'wt') as fl: + fl.write('\n'.join(lines)) + fl.write('\n') + + printf(f'{filename!r} written with header + {len(lines)} rows') diff --git a/grader/grader.py b/grader/grader.py index 85a9b39..49a4b0f 100755 --- a/grader/grader.py +++ b/grader/grader.py @@ -1171,23 +1171,15 @@ def do_save(self, args): help='comma-separated list of attributes to dump') ) + def do_dumpcsv(self, args): opts = self.dumpcsv_options.parse_args(args.split()) labels = opts.labels.split(',') if opts.labels else [] - pool = self.applications.filter(label=labels) - attributes = opts.attributes.split(',') - # HEADER - header = ';'.join(f'${attr.upper()}$' for attr in attributes) - lines = [header] - for p in pool: - line = ';'.join(f'{getattr(p, attr)}' for attr in attributes) - lines.append(line) - with open('/tmp/grader.csv', 'wt') as fl: - fl.write('\n'.join(lines)) - fl.write('\n') - print('Dump written to /tmp/grader.csv') + + # TODO: make the output name configurable? + self.applications.write_to_file(labels, attributes, '/tmp/grader.csv') def do_write(self, args): From b34b4dbfcd1085d3077915bf2b17e6b8ade679e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 14 Apr 2025 18:34:43 +0200 Subject: [PATCH 5/5] Rework all write functions to use the single shared implementation --- grader/applications.py | 22 +++------ grader/grader.py | 108 +++++++++++++++++------------------------ grader/util.py | 15 ++++++ 3 files changed, 66 insertions(+), 79 deletions(-) diff --git a/grader/applications.py b/grader/applications.py index 79b37f9..81e7f42 100644 --- a/grader/applications.py +++ b/grader/applications.py @@ -13,7 +13,7 @@ import tokenize from . import (person, vector, util) -from .util import (printf, printff) +from .util import (printff, write_csv_file) DEBUG_MAPPINGS = False @@ -725,18 +725,10 @@ def find_min_max(self): items[item] = (max_-min_) / (maxsc-minsc)*100 return minsc, maxsc, items - def write_to_file(self, labels, attributes, filename): + def write_filtered_csv(self, filename, labels, attributes=('name', 'lastname', 'email')): + # Pick out the people to save to file pool = self.filter(label=labels) - - # HEADER - header = ';'.join(f'${attr.upper()}$' for attr in attributes) - lines = [header] - for p in pool: - line = ';'.join(f'{getattr(p, attr)}' for attr in attributes) - lines += [line] - - with open(filename, 'wt') as fl: - fl.write('\n'.join(lines)) - fl.write('\n') - - printf(f'{filename!r} written with header + {len(lines)} rows') + write_csv_file(filename, + attributes, + [[getattr(p, attr) for attr in attributes] + for p in pool]) diff --git a/grader/grader.py b/grader/grader.py index 49a4b0f..e5d9f84 100755 --- a/grader/grader.py +++ b/grader/grader.py @@ -2,10 +2,10 @@ import argparse import collections import enum +import itertools import logging import numbers import operator -import os import pathlib import random import re @@ -29,6 +29,7 @@ list_of_float, printf, printff, + write_csv_file, ) def ellipsize(s, width): @@ -1179,7 +1180,7 @@ def do_dumpcsv(self, args): attributes = opts.attributes.split(',') # TODO: make the output name configurable? - self.applications.write_to_file(labels, attributes, '/tmp/grader.csv') + self.applications.write_to_file('/tmp/grader.csv', labels, attributes) def do_write(self, args): @@ -1196,71 +1197,50 @@ def do_write(self, args): """ if args != '': raise ValueError('no args please') - applications = self.applications - _write_file('list_confirmed.csv', - applications.filter(label=('CONFIRMED', '-', 'DECLINED', 'NEXT-YEAR'))) - - _write_file('list_invite.csv', - applications.filter(label=('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR'))) - _write_file('list_invite_reminder.csv', - applications.filter(label=('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR'))) - _write_file('list_overqualified.csv', - applications.filter(label=('OVERQUALIFIED', '-', 'CUSTOM-ANSWER'))) - _write_file('list_custom_answer.csv', - applications.filter(label=('CUSTOM-ANSWER'))) + self.applications.write_filtered_csv('list_confirmed.csv', + ('CONFIRMED', '-', 'DECLINED', 'NEXT-YEAR')) + + self.applications.write_filtered_csv('list_invite.csv', + ('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR')) + + self.applications.write_filtered_csv('list_invite_reminder.csv', + ('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR')) + + self.applications.write_filtered_csv('list_overqualified.csv', + ('OVERQUALIFIED', '-', 'CUSTOM-ANSWER')) + + self.applications.write_filtered_csv('list_custom_answer.csv', + ('CUSTOM-ANSWER',)) + # get all INVITESL? labels - all_labels = self.applications.all_labels() - invitesl = [label for label in all_labels + invitesl = [label for label in self.applications.all_labels() if label.startswith('INVITESL')] - for i, sl_label in enumerate(invitesl): - _write_file_samelab( - 'list_same_lab%d.csv'%(i+1), - applications.filter(label=(sl_label,'-', 'CONFIRMED', 'DECLINED', 'NEXT-YEAR'))) - _write_file('list_shortlist.csv', - applications.filter(label=('SHORTLIST', '-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', *invitesl))) - _write_file('list_rejected.csv', - applications.filter( - label=('-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', 'SHORTLIST', - 'OVERQUALIFIED', 'CUSTOM-ANSWER', *invitesl))) - _write_file('list_invite_nextyear.csv', - applications.filter(label=('NEXT-YEAR'))) - _write_file('list_declined.csv', - applications.filter(label=('DECLINED', '-', 'NEXT-YEAR'))) - -def _write_file(filename, persons): - header = '$NAME$;$SURNAME$;$EMAIL$' - if os.path.exists(filename): - printf("'{}' already exists. We cannot overwrite it!", filename) - return - with open(filename, 'w') as f: - f.write(header + '\n') - i = -1 - for i, person in enumerate(persons): - row = ';'.join((person.name, person.lastname, person.email)) - f.write(row + '\n') - printf("'{}' written with header + {} rows", filename, i + 1) - -def _write_file_samelab(filename, persons): - persons = list(persons) - if len(persons) == 0: - printf("No matching persons for '{}'. Check labels!", filename) - if os.path.exists(filename): - printf("'{}' already exists. We cannot overwrite it!", filename) - return - header = ';'.join('$%dNAME$;$%dSURNAME$'%(d+1,d+1) for d in range(len(persons))) + ';$EMAIL$' - with open(filename, 'w') as f: - f.write(header + '\n') - names = [] - emails = [] - i = -1 - for i, person in enumerate(persons): - names.extend([person.name, person.lastname]) - emails.append(person.email) - names = ';'.join(names) - emails = ','.join(emails) - f.write(names+';'+emails+'\n') - printf("'{}' written with header + {} entries", filename, i + 1) + + self.applications.write_filtered_csv('list_shortlist.csv', + ('SHORTLIST', '-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', *invitesl)) + + for i, sl_label in enumerate(invitesl, start=1): + persons = applications.filter(labels=(sl_label, '-', 'CONFIRMED', 'DECLINED', 'NEXT-YEAR')) + + fields = [*itertools.chain.from_iterable(('{d}name', '{d}lastname') for d in range(1, len(persons) + 1)), + 'email'] + + row = [*itertools.chain.from_iterable((p.name, p.lastname) for p in persons), + ','.join(p.email for p in persons)] + + write_csv_file('list_same_lab{i}.csv', fields, [row]) + + self.applications.write_filtered_csv('list_invite_nextyear.csv', + ('NEXT-YEAR',)) + + self.applications.write_filtered_csv('list_rejected.csv', + ('-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', 'SHORTLIST', + 'OVERQUALIFIED', 'CUSTOM-ANSWER', *invitesl)) + + self.applications.write_filtered_csv('list_declined.csv', + ('DECLINED', '-', 'NEXT-YEAR')) + class MissingRating(KeyError): def __str__(self, *args): diff --git a/grader/util.py b/grader/util.py index e2a9725..d2c6bd8 100644 --- a/grader/util.py +++ b/grader/util.py @@ -46,3 +46,18 @@ def __init__(self, arg=None): def __str__(self): return ', '.join(self) + + +def write_csv_file(filename, fields, rows): + header = ';'.join(f'${field.upper()}$' for field in fields) + lines = [header] + + for row in rows: + assert len(row) == len(fields) + lines += [';'.join(str(item) for item in row)] + + with open(filename, 'wt') as fl: + fl.write('\n'.join(lines)) + fl.write('\n') + + printf(f'{filename!r} written with header + {len(rows)} rows')