From d97b10a893a61a59ca2715a0b2173400ccf50dbf Mon Sep 17 00:00:00 2001 From: Jonathan Hadida Date: Sun, 4 Dec 2022 22:15:09 -0800 Subject: [PATCH 1/2] Fix issue with firstrow headers in the case of list of dict data, and enhance headers by allowing list or dict inputs as well. --- tabulate.py | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/tabulate.py b/tabulate.py index 6e918b9..2a8b227 100644 --- a/tabulate.py +++ b/tabulate.py @@ -17,6 +17,7 @@ _float_type = float _text_type = unicode _binary_type = str + _string_type = basestring else: from itertools import zip_longest as izip_longest from functools import reduce, partial @@ -25,6 +26,7 @@ _float_type = float _text_type = str _binary_type = bytes + _string_type = str __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] @@ -527,24 +529,31 @@ def _normalize_tabular_data(tabular_data, headers): elif (len(rows) > 0 and isinstance(rows[0], dict)): # dict or OrderedDict - uniq_keys = set() # implements hashed lookup - keys = [] # storage for set + if headers == "firstrow": - firstdict = rows[0] if len(rows) > 0 else {} - keys.extend(firstdict.keys()) - uniq_keys.update(keys) + headers = rows[0] if len(rows) > 0 else {} rows = rows[1:] - for row in rows: - for k in row.keys(): - #Save unique items in input order - if k not in uniq_keys: - keys.append(k) - uniq_keys.add(k) + + if isinstance(headers,_string_type): + # list unique keys in input order + uniq_keys = set() # implements hashed lookup + keys = [] # storage for set + for row in rows: + for k in row.keys(): + if k not in uniq_keys: + keys.append(k) + uniq_keys.add(k) + elif hasattr(headers,'keys') and hasattr(headers,'values'): + # dict-like { key => header name } + keys = list(headers.keys()) + headers = list(headers.values()) + else: + # list-like [key1, key2, ...] + keys = list(headers) + if headers == 'keys': headers = keys - elif headers == "firstrow" and len(rows) > 0: - headers = [firstdict.get(k, k) for k in keys] - headers = list(map(_text_type, headers)) + rows = [[row.get(k) for k in keys] for row in rows] elif headers == "keys" and len(rows) > 0: # keys are column indices @@ -560,10 +569,10 @@ def _normalize_tabular_data(tabular_data, headers): # pad with empty headers for initial columns if necessary if headers and len(rows) > 0: - nhs = len(headers) + nhead = len(headers) ncols = len(rows[0]) - if nhs < ncols: - headers = [""]*(ncols - nhs) + headers + if nhead != ncols: + raise RuntimeError('Number of headers and columns do not match.') return rows, headers @@ -878,4 +887,4 @@ def _format_table(fmt, headers, rows, colwidths, colaligns): if fmt.linebelow and "linebelow" not in hidden: lines.append(_build_line(padded_widths, colaligns, fmt.linebelow)) - return "\n".join(lines) + return "\n".join(lines) \ No newline at end of file From 1cf1cb4b29e7f96dd40d12b01507265c98abd42d Mon Sep 17 00:00:00 2001 From: Jonathan Hadida Date: Sun, 4 Dec 2022 22:32:02 -0800 Subject: [PATCH 2/2] Update README to reflect latest changes to headers option --- README.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.rst b/README.rst index bfd36ab..3b11b3a 100644 --- a/README.rst +++ b/README.rst @@ -87,6 +87,31 @@ If ``headers="firstrow"``, then the first row of data is used:: Alice 24 Bob 19 +When the data is a list of dict, ``headers="firstrow"`` can also be +used to assign different column names to each corresponding dict key: + + >>> print tabulate([{"name": "Name", "age": "Age"}, + {"name": "Alice", "age": 24}, + {"name": "Bob", "age": 19}], + ... headers="firstrow") + Name Age + ------ ----- + Alice 24 + Bob 19 + +Furthermore with a list of dict, you can also specify ``headers`` as +a dict (similar to the example above), or as a list of keys. Either way, +the specified keys can be a subset of all the keys present across the +dataset: + + >>> print tabulate([{"foo": 1, "bar": 2}, + {"foo": 3, "bar": 4, "baz": 5}], + ... headers=["bar","foo"]) + bar foo + ----- ----- + 2 1 + 4 3 + If ``headers="keys"``, then the keys of a dictionary/dataframe, or column indices are used. It also works for NumPy record arrays and