Benchmark-v700/cpp/plotting/exp_table_builder.py at master · ITMO-PTDC-Team/Benchmark-v700 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import re
from pathlib import Path
from dataclasses import dataclass
import argparse

EXTRACT_PATTERN = r"key={key}\s.*\savg_val=(\S*)"
COLUMN_SEP = " & "
ROW_SEP = "\n\\hline\n"

DS_NAME_MAP = {
    # "splay_tree": "Splay Tree",
    # "sast": "SA2T",
    # "btree": "B-Tree",
    # "sabt": "SABT",
    # "ist": "IST",
    # "sait": "SAIT",
    # "salt": "SALT"
    "redis_zset" : "ZSET",
    "redis_sait": "SAIT",
    "redis_sabt": "SABT",
    "redis_sabpt": "SABPT",
    "redis_salt": "SALT"
}

WORKLOAD_NAME_MAP = {
    "70-30": "70/30",
    "80-20": "80/20",
    "90-10": "90/10",
    "95-05": "95/05",
    "99-01": "99/01"
}


def get_workload_name(workload):
    return WORKLOAD_NAME_MAP[workload] if workload in WORKLOAD_NAME_MAP else workload


def format_main_ops(ops, round):
    sops = f'%.{round}g' % ops
    pos = sops.find("e+0")
    if pos == -1:
        raise RuntimeError("can not find e+0 in " + sops)
    return sops[:pos] + " \cdot {10}^{" + sops[pos+3:pos+4] + "}"


def get_table_map(args) -> dict[str, dict[str, float]]:
    table_map = {ds: {} for ds in args.ds}

    for benchmark_dir in args.plotter_output_dir:
        for workload_dir in benchmark_dir.iterdir():
            workload = workload_dir.name
            if workload not in args.workload:
                continue
            for ops_dir in workload_dir.iterdir():
                if ops_dir.name == args.operations:
                    for ds_dir in ops_dir.iterdir():
                        ds = ds_dir.name
                        if not ds_dir.is_dir() or ds not in args.ds:
                            continue
                        for stat_file in ds_dir.iterdir():
                            if args.stat in stat_file.name:
                                content = stat_file.read_text()
                                pattern = EXTRACT_PATTERN.format(key=args.key)
                                value = float(re.findall(pattern, content)[0])
                                table_map[ds][workload] = value
    return table_map


@dataclass
class TableMapData:
    values_by_ds: dict[str, list[float]] # ds -> values ordered by workloads
    base_best: dict[str, float] # workload -> max value


def preprocess_table_map(table_map: dict[str, dict[str, float]], args) -> TableMapData:
    values_by_ds = {}
    base_best = {}

    for ds in args.ds:
        ds_dict = table_map[ds]
        values = []
        for workload in args.workload:
            value = ds_dict[workload]
            if ds in args.base_ds:
                if workload in base_best:
                    base_best[workload] = max(base_best[workload], value)
                else:
                    base_best[workload] = value
            values.append(value)
        values_by_ds[ds] = values

    return TableMapData(values_by_ds, base_best)


def build_table(table_map: dict[str, dict[str, float]], table_map_data: TableMapData, args) -> str:
    values_by_ds, base_best = table_map_data.values_by_ds, table_map_data.base_best

    table = []
    table.append("\\begin{tabular}{|" + " | ".join(["c" for _ in range(len(args.workload) + 1)]) + "|}")
    table.append(COLUMN_SEP + COLUMN_SEP.join(map(get_workload_name, args.workload)) + "\\\\")

    for i, ds in enumerate(args.ds):
        ds_dict = table_map[ds]
        values = values_by_ds[ds]
        if i == 0:
            pivot_dict = ds_dict
            row = [f"${format_main_ops(ds_dict[workload], args.round)}$" for workload in args.workload]
        else:
            row = []
            for workload, value in zip(args.workload, values):
                percent = round(ds_dict[workload] / pivot_dict[workload], args.round)
                cell = f"x${percent}$"
                if ds not in args.base_ds and value >= args.percent * base_best[workload]:
                    cell = "\\cellcolor{" + args.color + "!20}" + cell
                row.append(cell)
        table.append(DS_NAME_MAP[ds] + COLUMN_SEP + COLUMN_SEP.join(row) + " \\\\")

    table.append("\\end{tabular}")

    return ROW_SEP.join(table)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="script for building latex table based on results produced by plotter.py")
    parser.add_argument("-pod", "--plotter-output-dir", nargs="+", type=Path, required=True, help="directories with results generated by plotter.py")
    parser.add_argument("-ops", "--operations", type=str, required=True, help="(insert / delete / rq) proportions")
    parser.add_argument("-s", "--stat", type=str, required=True, help="stat for which table with result would be built (ex: total_throughput; total_updates)")
    parser.add_argument("-ds", nargs="+", type=str, required=True, help="data structures to include into table. Their order matches order of rows in the table. The first data structure acts as a 'baseline' - results of the rest data structures will be measured relative to the first one")
    parser.add_argument("-w", "--workload", nargs="+", type=str, required=True, help="workloads to include into table (ex: uniform, 70-30)")
    parser.add_argument("-k", "--key", type=int, required=True, help="number of keys for which table would be built")
    parser.add_argument("-b", "--base-ds", type=str, nargs="+", required=True, help="data structures that we want to beat. For example, let us have 3 trees: BTree, IST, SABT. If we want to highlight SABT cells which overcomes both BTree and IST then we pass BTree and IST as '-b' option argument")
    parser.add_argument("-r", "--round", type=int, default=2, help="rounding measure for all table cells")
    parser.add_argument("-p", "--percent", type=float, default=0.95, help="highlighting bound for good results. Let 'x' be the max value of the base data structures and 'y' be the value of the other data structure. If y >= PERCENT * x then the table cell with y would be colored (since it beats competitors)")
    parser.add_argument("-c", "--color", type=str, default="blue", help="color for highlighting table cells")

    args = parser.parse_args()
    args.base_ds = set(args.base_ds)

    table_map = get_table_map(args)

    table_map_data = preprocess_table_map(table_map, args)

    table = build_table(table_map, table_map_data, args)
    print(table)