-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathexp_table_builder.py
More file actions
144 lines (116 loc) · 6.11 KB
/
exp_table_builder.py
File metadata and controls
144 lines (116 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import re
from pathlib import Path
from dataclasses import dataclass
import argparse
EXTRACT_PATTERN = r"key={key}\s.*\savg_val=(\S*)"
COLUMN_SEP = " & "
ROW_SEP = "\n\\hline\n"
DS_NAME_MAP = {
# "splay_tree": "Splay Tree",
# "sast": "SA2T",
# "btree": "B-Tree",
# "sabt": "SABT",
# "ist": "IST",
# "sait": "SAIT",
# "salt": "SALT"
"redis_zset" : "ZSET",
"redis_sait": "SAIT",
"redis_sabt": "SABT",
"redis_sabpt": "SABPT",
"redis_salt": "SALT"
}
WORKLOAD_NAME_MAP = {
"70-30": "70/30",
"80-20": "80/20",
"90-10": "90/10",
"95-05": "95/05",
"99-01": "99/01"
}
def get_workload_name(workload):
return WORKLOAD_NAME_MAP[workload] if workload in WORKLOAD_NAME_MAP else workload
def format_main_ops(ops, round):
sops = f'%.{round}g' % ops
pos = sops.find("e+0")
if pos == -1:
raise RuntimeError("can not find e+0 in " + sops)
return sops[:pos] + " \cdot {10}^{" + sops[pos+3:pos+4] + "}"
def get_table_map(args) -> dict[str, dict[str, float]]:
table_map = {ds: {} for ds in args.ds}
for benchmark_dir in args.plotter_output_dir:
for workload_dir in benchmark_dir.iterdir():
workload = workload_dir.name
if workload not in args.workload:
continue
for ops_dir in workload_dir.iterdir():
if ops_dir.name == args.operations:
for ds_dir in ops_dir.iterdir():
ds = ds_dir.name
if not ds_dir.is_dir() or ds not in args.ds:
continue
for stat_file in ds_dir.iterdir():
if args.stat in stat_file.name:
content = stat_file.read_text()
pattern = EXTRACT_PATTERN.format(key=args.key)
value = float(re.findall(pattern, content)[0])
table_map[ds][workload] = value
return table_map
@dataclass
class TableMapData:
values_by_ds: dict[str, list[float]] # ds -> values ordered by workloads
base_best: dict[str, float] # workload -> max value
def preprocess_table_map(table_map: dict[str, dict[str, float]], args) -> TableMapData:
values_by_ds = {}
base_best = {}
for ds in args.ds:
ds_dict = table_map[ds]
values = []
for workload in args.workload:
value = ds_dict[workload]
if ds in args.base_ds:
if workload in base_best:
base_best[workload] = max(base_best[workload], value)
else:
base_best[workload] = value
values.append(value)
values_by_ds[ds] = values
return TableMapData(values_by_ds, base_best)
def build_table(table_map: dict[str, dict[str, float]], table_map_data: TableMapData, args) -> str:
values_by_ds, base_best = table_map_data.values_by_ds, table_map_data.base_best
table = []
table.append("\\begin{tabular}{|" + " | ".join(["c" for _ in range(len(args.workload) + 1)]) + "|}")
table.append(COLUMN_SEP + COLUMN_SEP.join(map(get_workload_name, args.workload)) + "\\\\")
for i, ds in enumerate(args.ds):
ds_dict = table_map[ds]
values = values_by_ds[ds]
if i == 0:
pivot_dict = ds_dict
row = [f"${format_main_ops(ds_dict[workload], args.round)}$" for workload in args.workload]
else:
row = []
for workload, value in zip(args.workload, values):
percent = round(ds_dict[workload] / pivot_dict[workload], args.round)
cell = f"x${percent}$"
if ds not in args.base_ds and value >= args.percent * base_best[workload]:
cell = "\\cellcolor{" + args.color + "!20}" + cell
row.append(cell)
table.append(DS_NAME_MAP[ds] + COLUMN_SEP + COLUMN_SEP.join(row) + " \\\\")
table.append("\\end{tabular}")
return ROW_SEP.join(table)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="script for building latex table based on results produced by plotter.py")
parser.add_argument("-pod", "--plotter-output-dir", nargs="+", type=Path, required=True, help="directories with results generated by plotter.py")
parser.add_argument("-ops", "--operations", type=str, required=True, help="(insert / delete / rq) proportions")
parser.add_argument("-s", "--stat", type=str, required=True, help="stat for which table with result would be built (ex: total_throughput; total_updates)")
parser.add_argument("-ds", nargs="+", type=str, required=True, help="data structures to include into table. Their order matches order of rows in the table. The first data structure acts as a 'baseline' - results of the rest data structures will be measured relative to the first one")
parser.add_argument("-w", "--workload", nargs="+", type=str, required=True, help="workloads to include into table (ex: uniform, 70-30)")
parser.add_argument("-k", "--key", type=int, required=True, help="number of keys for which table would be built")
parser.add_argument("-b", "--base-ds", type=str, nargs="+", required=True, help="data structures that we want to beat. For example, let us have 3 trees: BTree, IST, SABT. If we want to highlight SABT cells which overcomes both BTree and IST then we pass BTree and IST as '-b' option argument")
parser.add_argument("-r", "--round", type=int, default=2, help="rounding measure for all table cells")
parser.add_argument("-p", "--percent", type=float, default=0.95, help="highlighting bound for good results. Let 'x' be the max value of the base data structures and 'y' be the value of the other data structure. If y >= PERCENT * x then the table cell with y would be colored (since it beats competitors)")
parser.add_argument("-c", "--color", type=str, default="blue", help="color for highlighting table cells")
args = parser.parse_args()
args.base_ds = set(args.base_ds)
table_map = get_table_map(args)
table_map_data = preprocess_table_map(table_map, args)
table = build_table(table_map, table_map_data, args)
print(table)