From e812af81d2cf0c5830b72cb7dcc2f6be9ad7c116 Mon Sep 17 00:00:00 2001
From: Tiziano Zito <opossumnano@gmail.com>
Date: Wed, 22 May 2024 13:48:54 +0200
Subject: [PATCH 1/5] add new command dumpcsv to dump csv files with arbitraty
 attributes from applications

This commit was cherry-picked from the heraklion branch.
---
 grader/grader.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/grader/grader.py b/grader/grader.py
index 683b923..3dd0f42 100755
--- a/grader/grader.py
+++ b/grader/grader.py
@@ -925,7 +925,7 @@ def do_rank(self, args):
                           help="don't use labels in ranking")
             .add_argument('-L', '--highlanders', action='store_true',
                           help='display statistics only for highlanders')
-            .add_argument('-l', '--labels',
+.add_argument('-l', '--labels',
                           help='display statistics only for people with label(s).'+
                                'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED')
             .add_argument('--edition', default='current',
@@ -1161,6 +1161,34 @@ def do_save(self, args):
         opts = self.save_options.parse_args(args.split())
         self.applications.ini.save(opts.filename)
 
+    dumpcsv_options = (
+            cmd_completer.PagedArgumentParser('dumpcsv')
+                .add_argument('-l', '--labels',
+                              help='only dump people with label(s).'+
+                                   'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED')
+                .add_argument('-a', '--attributes', help='comma-separated list of attributes to dump')
+    )
+
+    def do_dumpcsv(self, args):
+        opts = self.dumpcsv_options.parse_args(args.split())
+        pool = list(self.applications)
+        if opts.labels:
+            # create label filter tuple
+            labels = opts.labels.split(',')
+            pool = self.applications.filter(label=labels)
+        attributes = opts.attributes.split(',')
+        # HEADER
+        header = ';'.join([f'${attr.upper()}$' for attr in attributes])
+        lines = [header]
+        for p in pool:
+            line = ';'.join([f'{getattr(p, attr)}' for attr in attributes])
+            lines.append(line)
+        with open('/tmp/grader.csv', 'wt') as fl:
+            fl.write('\n'.join(lines))
+            fl.write('\n')
+        print('Dump written to /tmp/grader.csv')
+
+
     def do_write(self, args):
         """Write lists of mailing recipients
 

From 79fe6d6c3c8a32852c25e0da5459fb9aeb34f981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Mon, 14 Apr 2025 09:37:53 +0200
Subject: [PATCH 2/5] test_applications: verify that we get all applications
 with an empty filter

This will be used to make the code simpler later on.
---
 grader/test_applications.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/grader/test_applications.py b/grader/test_applications.py
index 7831112..f3cbb89 100644
--- a/grader/test_applications.py
+++ b/grader/test_applications.py
@@ -188,6 +188,9 @@ def test_applications_object(app):
     assert len(app) == 3
     assert len(app.people) == 3
 
+    alls = app.filter()
+    assert len(alls) == 3
+
     vegans = app.filter(label = ['VEGAN'])
     assert len(vegans) == 1
     assert vegans.name == ['Jędrzej Marcin']

From 45368884fec3876a5d4360d1fe387326fe649a71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Mon, 14 Apr 2025 09:47:02 +0200
Subject: [PATCH 3/5] grader: clean up code a bit

--attributes is required, without that the verb doesn't make sense.
---
 grader/grader.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/grader/grader.py b/grader/grader.py
index 3dd0f42..85a9b39 100755
--- a/grader/grader.py
+++ b/grader/grader.py
@@ -925,12 +925,12 @@ def do_rank(self, args):
                           help="don't use labels in ranking")
             .add_argument('-L', '--highlanders', action='store_true',
                           help='display statistics only for highlanders')
-.add_argument('-l', '--labels',
-                          help='display statistics only for people with label(s).'+
+            .add_argument('-l', '--labels',
+                          help='display statistics only for people with LABELS. '
                                'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED')
             .add_argument('--edition', default='current',
                           help="edition for which we want the stats, e.g. '2010-trento'. "
-                               "'all' means all editions 'current' (default) means the"
+                               "'all' means all editions. 'current' (default) means the "
                                "latest one")
     )
 
@@ -1164,24 +1164,25 @@ def do_save(self, args):
     dumpcsv_options = (
             cmd_completer.PagedArgumentParser('dumpcsv')
                 .add_argument('-l', '--labels',
-                              help='only dump people with label(s).'+
+                              help='only dump people with LABELS. '
                                    'Multiple labels: INVITE,CONFIRMED or INVITE,-,DECLINED')
-                .add_argument('-a', '--attributes', help='comma-separated list of attributes to dump')
+                .add_argument('-a', '--attributes',
+                              required=True,
+                              help='comma-separated list of attributes to dump')
     )
 
     def do_dumpcsv(self, args):
         opts = self.dumpcsv_options.parse_args(args.split())
-        pool = list(self.applications)
-        if opts.labels:
-            # create label filter tuple
-            labels = opts.labels.split(',')
-            pool = self.applications.filter(label=labels)
+
+        labels = opts.labels.split(',') if opts.labels else []
+        pool = self.applications.filter(label=labels)
+
         attributes = opts.attributes.split(',')
         # HEADER
-        header = ';'.join([f'${attr.upper()}$' for attr in attributes])
+        header = ';'.join(f'${attr.upper()}$' for attr in attributes)
         lines = [header]
         for p in pool:
-            line = ';'.join([f'{getattr(p, attr)}' for attr in attributes])
+            line = ';'.join(f'{getattr(p, attr)}' for attr in attributes)
             lines.append(line)
         with open('/tmp/grader.csv', 'wt') as fl:
             fl.write('\n'.join(lines))

From 65b5e27cbcd84de4de25d274f3892c402e2cb250 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Mon, 14 Apr 2025 10:02:07 +0200
Subject: [PATCH 4/5] grader: split out the function to write output csv

---
 grader/applications.py | 18 +++++++++++++++++-
 grader/grader.py       | 16 ++++------------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/grader/applications.py b/grader/applications.py
index 95ae1ec..79b37f9 100644
--- a/grader/applications.py
+++ b/grader/applications.py
@@ -13,7 +13,7 @@
 import tokenize
 
 from . import (person, vector, util)
-from .util import printff
+from .util import (printf, printff)
 
 DEBUG_MAPPINGS = False
 
@@ -724,3 +724,19 @@ def find_min_max(self):
             min_ = min(scores)
             items[item] = (max_-min_) / (maxsc-minsc)*100
         return minsc, maxsc, items
+
+    def write_to_file(self, labels, attributes, filename):
+        pool = self.filter(label=labels)
+
+        # HEADER
+        header = ';'.join(f'${attr.upper()}$' for attr in attributes)
+        lines = [header]
+        for p in pool:
+            line = ';'.join(f'{getattr(p, attr)}' for attr in attributes)
+            lines += [line]
+
+        with open(filename, 'wt') as fl:
+            fl.write('\n'.join(lines))
+            fl.write('\n')
+
+        printf(f'{filename!r} written with header + {len(lines)} rows')
diff --git a/grader/grader.py b/grader/grader.py
index 85a9b39..49a4b0f 100755
--- a/grader/grader.py
+++ b/grader/grader.py
@@ -1171,23 +1171,15 @@ def do_save(self, args):
                               help='comma-separated list of attributes to dump')
     )
 
+
     def do_dumpcsv(self, args):
         opts = self.dumpcsv_options.parse_args(args.split())
 
         labels = opts.labels.split(',') if opts.labels else []
-        pool = self.applications.filter(label=labels)
-
         attributes = opts.attributes.split(',')
-        # HEADER
-        header = ';'.join(f'${attr.upper()}$' for attr in attributes)
-        lines = [header]
-        for p in pool:
-            line = ';'.join(f'{getattr(p, attr)}' for attr in attributes)
-            lines.append(line)
-        with open('/tmp/grader.csv', 'wt') as fl:
-            fl.write('\n'.join(lines))
-            fl.write('\n')
-        print('Dump written to /tmp/grader.csv')
+
+        # TODO: make the output name configurable?
+        self.applications.write_to_file(labels, attributes, '/tmp/grader.csv')
 
 
     def do_write(self, args):

From b34b4dbfcd1085d3077915bf2b17e6b8ade679e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Mon, 14 Apr 2025 18:34:43 +0200
Subject: [PATCH 5/5] Rework all write functions to use the single shared
 implementation

---
 grader/applications.py |  22 +++------
 grader/grader.py       | 108 +++++++++++++++++------------------------
 grader/util.py         |  15 ++++++
 3 files changed, 66 insertions(+), 79 deletions(-)

diff --git a/grader/applications.py b/grader/applications.py
index 79b37f9..81e7f42 100644
--- a/grader/applications.py
+++ b/grader/applications.py
@@ -13,7 +13,7 @@
 import tokenize
 
 from . import (person, vector, util)
-from .util import (printf, printff)
+from .util import (printff, write_csv_file)
 
 DEBUG_MAPPINGS = False
 
@@ -725,18 +725,10 @@ def find_min_max(self):
             items[item] = (max_-min_) / (maxsc-minsc)*100
         return minsc, maxsc, items
 
-    def write_to_file(self, labels, attributes, filename):
+    def write_filtered_csv(self, filename, labels, attributes=('name', 'lastname', 'email')):
+        # Pick out the people to save to file
         pool = self.filter(label=labels)
-
-        # HEADER
-        header = ';'.join(f'${attr.upper()}$' for attr in attributes)
-        lines = [header]
-        for p in pool:
-            line = ';'.join(f'{getattr(p, attr)}' for attr in attributes)
-            lines += [line]
-
-        with open(filename, 'wt') as fl:
-            fl.write('\n'.join(lines))
-            fl.write('\n')
-
-        printf(f'{filename!r} written with header + {len(lines)} rows')
+        write_csv_file(filename,
+                       attributes,
+                       [[getattr(p, attr) for attr in attributes]
+                        for p in pool])
diff --git a/grader/grader.py b/grader/grader.py
index 49a4b0f..e5d9f84 100755
--- a/grader/grader.py
+++ b/grader/grader.py
@@ -2,10 +2,10 @@
 import argparse
 import collections
 import enum
+import itertools
 import logging
 import numbers
 import operator
-import os
 import pathlib
 import random
 import re
@@ -29,6 +29,7 @@
     list_of_float,
     printf,
     printff,
+    write_csv_file,
 )
 
 def ellipsize(s, width):
@@ -1179,7 +1180,7 @@ def do_dumpcsv(self, args):
         attributes = opts.attributes.split(',')
 
         # TODO: make the output name configurable?
-        self.applications.write_to_file(labels, attributes, '/tmp/grader.csv')
+        self.applications.write_to_file('/tmp/grader.csv', labels, attributes)
 
 
     def do_write(self, args):
@@ -1196,71 +1197,50 @@ def do_write(self, args):
         """
         if args != '':
             raise ValueError('no args please')
-        applications = self.applications
 
-        _write_file('list_confirmed.csv',
-                    applications.filter(label=('CONFIRMED', '-', 'DECLINED', 'NEXT-YEAR')))
-
-        _write_file('list_invite.csv',
-                    applications.filter(label=('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR')))
-        _write_file('list_invite_reminder.csv',
-                    applications.filter(label=('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR')))
-        _write_file('list_overqualified.csv',
-                    applications.filter(label=('OVERQUALIFIED', '-', 'CUSTOM-ANSWER')))
-        _write_file('list_custom_answer.csv',
-                    applications.filter(label=('CUSTOM-ANSWER')))
+        self.applications.write_filtered_csv('list_confirmed.csv',
+                                             ('CONFIRMED', '-', 'DECLINED', 'NEXT-YEAR'))
+
+        self.applications.write_filtered_csv('list_invite.csv',
+                                             ('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR'))
+
+        self.applications.write_filtered_csv('list_invite_reminder.csv',
+                                             ('INVITE', '-', 'DECLINED', 'CONFIRMED', 'NEXT-YEAR'))
+
+        self.applications.write_filtered_csv('list_overqualified.csv',
+                                             ('OVERQUALIFIED', '-', 'CUSTOM-ANSWER'))
+
+        self.applications.write_filtered_csv('list_custom_answer.csv',
+                                             ('CUSTOM-ANSWER',))
+
         # get all INVITESL? labels
-        all_labels = self.applications.all_labels()
-        invitesl = [label for label in all_labels
+        invitesl = [label for label in self.applications.all_labels()
                     if label.startswith('INVITESL')]
-        for i, sl_label in enumerate(invitesl):
-            _write_file_samelab(
-                'list_same_lab%d.csv'%(i+1),
-                applications.filter(label=(sl_label,'-', 'CONFIRMED', 'DECLINED', 'NEXT-YEAR')))
-        _write_file('list_shortlist.csv',
-                    applications.filter(label=('SHORTLIST', '-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', *invitesl)))
-        _write_file('list_rejected.csv',
-                    applications.filter(
-                        label=('-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', 'SHORTLIST',
-                               'OVERQUALIFIED', 'CUSTOM-ANSWER', *invitesl)))
-        _write_file('list_invite_nextyear.csv',
-                    applications.filter(label=('NEXT-YEAR')))
-        _write_file('list_declined.csv',
-                    applications.filter(label=('DECLINED', '-', 'NEXT-YEAR')))
-
-def _write_file(filename, persons):
-    header = '$NAME$;$SURNAME$;$EMAIL$'
-    if os.path.exists(filename):
-        printf("'{}' already exists. We cannot overwrite it!", filename)
-        return
-    with open(filename, 'w') as f:
-        f.write(header + '\n')
-        i = -1
-        for i, person in enumerate(persons):
-            row = ';'.join((person.name, person.lastname, person.email))
-            f.write(row + '\n')
-    printf("'{}' written with header + {} rows", filename, i + 1)
-
-def _write_file_samelab(filename, persons):
-    persons = list(persons)
-    if len(persons) == 0:
-        printf("No matching persons for '{}'. Check labels!", filename)
-    if os.path.exists(filename):
-        printf("'{}' already exists. We cannot overwrite it!", filename)
-        return
-    header = ';'.join('$%dNAME$;$%dSURNAME$'%(d+1,d+1) for d in range(len(persons))) + ';$EMAIL$'
-    with open(filename, 'w') as f:
-        f.write(header + '\n')
-        names = []
-        emails = []
-        i = -1
-        for i, person in enumerate(persons):
-            names.extend([person.name, person.lastname])
-            emails.append(person.email)
-        names = ';'.join(names)
-        emails = ','.join(emails)
-        f.write(names+';'+emails+'\n')
-    printf("'{}' written with header + {} entries", filename, i + 1)
+
+        self.applications.write_filtered_csv('list_shortlist.csv',
+                                             ('SHORTLIST', '-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', *invitesl))
+
+        for i, sl_label in enumerate(invitesl, start=1):
+            persons = applications.filter(labels=(sl_label, '-', 'CONFIRMED', 'DECLINED', 'NEXT-YEAR'))
+
+            fields = [*itertools.chain.from_iterable(('{d}name', '{d}lastname') for d in range(1, len(persons) + 1)),
+                      'email']
+
+            row = [*itertools.chain.from_iterable((p.name, p.lastname) for p in persons),
+                   ','.join(p.email for p in persons)]
+
+            write_csv_file('list_same_lab{i}.csv', fields, [row])
+
+        self.applications.write_filtered_csv('list_invite_nextyear.csv',
+                                             ('NEXT-YEAR',))
+
+        self.applications.write_filtered_csv('list_rejected.csv',
+                                             ('-', 'DECLINED', 'NEXT-YEAR', 'CONFIRMED', 'INVITE', 'SHORTLIST',
+                                              'OVERQUALIFIED', 'CUSTOM-ANSWER', *invitesl))
+
+        self.applications.write_filtered_csv('list_declined.csv',
+                                             ('DECLINED', '-', 'NEXT-YEAR'))
+
 
 class MissingRating(KeyError):
     def __str__(self, *args):
diff --git a/grader/util.py b/grader/util.py
index e2a9725..d2c6bd8 100644
--- a/grader/util.py
+++ b/grader/util.py
@@ -46,3 +46,18 @@ def __init__(self, arg=None):
 
     def __str__(self):
         return ', '.join(self)
+
+
+def write_csv_file(filename, fields, rows):
+    header = ';'.join(f'${field.upper()}$' for field in fields)
+    lines = [header]
+
+    for row in rows:
+        assert len(row) == len(fields)
+        lines += [';'.join(str(item) for item in row)]
+
+    with open(filename, 'wt') as fl:
+        fl.write('\n'.join(lines))
+        fl.write('\n')
+
+    printf(f'{filename!r} written with header + {len(rows)} rows')