diff --git a/Readme.md b/Readme.md index 5ee2db9..7e9bb01 100644 --- a/Readme.md +++ b/Readme.md @@ -11,7 +11,7 @@ report quality tables with a simple API. ```python ( df - .pipe(PrettyPandas) + .summarize .as_currency('GBP', subset='A') .as_percent(subset='B') .total() diff --git a/docs/source/conf.py b/docs/source/conf.py index e0dc892..9e33655 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,9 +58,9 @@ # built documents. # # The short X.Y version. -version = '0.0.4' +version = '0.0.5' # The full version, including alpha/beta/rc tags. -release = '0.0.4' +release = '0.0.5' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/index.rst b/docs/source/index.rst index 70cce7c..39c5de6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,9 +12,11 @@ create report qualitiy tables with a simple API. .. code-block:: python + import prettypandas + ( df - .pipe(PrettyPandas) + .summarize .as_currency('GBP', subset='A') .as_percent(subset='B') .total() diff --git a/docs/source/prettypandas.summarizer.rst b/docs/source/prettypandas.summarizer.rst index a1daf69..eb7e7bf 100644 --- a/docs/source/prettypandas.summarizer.rst +++ b/docs/source/prettypandas.summarizer.rst @@ -1,4 +1,3 @@ - prettypandas.summarize module ============================= diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index f80f08b..ec986e8 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -23,23 +23,16 @@ simple: .. code-block:: python - PrettyPandas(df).total() + df.summarize.total() .. image:: _static/Images/total@2x.png :width: 311px -Or additionally if you want to use Pandas fluent API: - -.. code-block:: python - - df.pipe(PrettyPandas).total() - - PrettyPandas follows a fluent API so you can chain multiple summaries easily: .. code-block:: python - df.pipe(PrettyPandas).total().average() + df.summarize.total().average() .. image:: _static/Images/average@2x.png :width: 334px @@ -49,7 +42,7 @@ on --- 0 for columns, 1 for rows, and ``None`` for both. .. code-block:: python - PrettyPandas(df).total(axis=1) + df.summarize.total(axis=1) .. image:: _static/Images/alt_axis@2x.png :width: 349px @@ -59,7 +52,7 @@ You can even mix and match summaries applied to different axis. Creating a Custom Summary ^^^^^^^^^^^^^^^^^^^^^^^^^ -The :py:meth:`summary ` method creates a custom summary +The :py:meth:`using ` method creates a custom summary from a function which takes an array-like structure as a list. .. code-block:: python @@ -67,7 +60,7 @@ from a function which takes an array-like structure as a list. def count_greater_than_zero(column): return (column > 0).sum() - PrettyPandas(df).summary(count_greater_than_zero, title="> 0") + df.summarize.using(count_greater_than_zero, title="> 0") .. image:: _static/Images/custom_fn@2x.png :width: 287px @@ -89,7 +82,7 @@ it back to a Pandas native DataFrame. ( df - .pipe(PrettyPandas) + .summarize .total(axis=1) .to_frame() ) @@ -105,7 +98,8 @@ percentages, and apply a backgrouned gradient to a table: .. code-block:: python ( - df.pipe(PrettyPandas) + df + .summarize .as_percent(precision=0) .median() .style @@ -162,14 +156,14 @@ single column, or multiple columns. .. code-block:: python - PrettyPandas(df).as_percent(subset='A') # Format just column A + df.summarize.as_percent(subset='A') # Format just column A .. image:: _static/Images/format_a@2x.png :width: 301px .. code-block:: python - PrettyPandas(df).as_percent(subset=['A', 'B']) # Format columns A and B + df.summarize.as_percent(subset=['A', 'B']) # Format columns A and B .. image:: _static/Images/format_a_b@2x.png :width: 363px @@ -183,7 +177,7 @@ argument needs to take in a `pandas.Index` to specify the row. .. code-block:: python # Format the row with row-index 3 - PrettyPandas(df).as_percent(subset=pd.IndexSlice[3,:], precision=2) + df.summarize.as_percent(subset=pd.IndexSlice[3,:], precision=2) .. image:: _static/Images/format_row@2x.png :width: 294px @@ -199,7 +193,8 @@ The following example shows how to select rows in a multi-index: second_row_idx = pd.IndexSlice[1, :] ( - df.pipe(PrettyPandas) + df + .summarize .as_currency(subset=first_row_idx) .as_percent(subset=second_row_idx) .total(axis=1) diff --git a/prettypandas/formatters.py b/prettypandas/formatters.py index 759992f..f204771 100644 --- a/prettypandas/formatters.py +++ b/prettypandas/formatters.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from numbers import Number, Integral from functools import partial, wraps import locale diff --git a/prettypandas/summarizer.py b/prettypandas/summarizer.py index 1b31c66..e15ade9 100644 --- a/prettypandas/summarizer.py +++ b/prettypandas/summarizer.py @@ -1,20 +1,23 @@ from __future__ import unicode_literals +from collections import OrderedDict +import functools +import itertools from operator import methodcaller import pandas as pd from .formatters import as_percent, as_currency, as_unit, LOCALE_OBJ def _axis_is_rows(axis): - return axis == 0 or axis == 'rows' + return axis in [0, 'rows', 'row', 'down'] def _axis_is_cols(axis): - return axis == 1 or axis == 'columns' or axis == 'index' + return axis in [1, 'columns', 'cols', 'col', 'index', 'across'] class Aggregate(object): - """Aggreagte + """Aggregate Wrapper to calculate aggregate row on datafame. @@ -45,7 +48,6 @@ def __init__( self.title = title self.subset = subset self.axis = axis - self.func = func self.args = args self.kwargs = kwargs @@ -59,9 +61,11 @@ def apply(self, df): if _axis_is_cols(self.axis): df = df.loc[self.subset] - result = df.agg(self.func, axis=self.axis, *self.args, **self.kwargs) - result.name = self.title - return result + return ( + df + .agg(self.func, axis=self.axis, *self.args, **self.kwargs) + .rename(self.title) + ) class Formatter(object): @@ -82,11 +86,26 @@ def __init__(self, formatter, args, kwargs): self.args = args self.kwargs = kwargs + @staticmethod + def _replace_errors_with_empty_string(fn): + """Attempt to format value and if failed use empty string""" + + @functools.wraps(fn) + def caller(*args, **kwargs): + try: + return fn(*args, **kwargs) + except Exception: + return '' + return caller + def apply(self, styler): """Apply Summary over Pandas Styler""" - return styler.format(self.formatter, *self.args, **self.kwargs) + formatter = self._replace_errors_with_empty_string(self.formatter) + return styler.format(formatter, *self.args, **self.kwargs) +@pd.api.extensions.register_series_accessor('summarize') +@pd.api.extensions.register_dataframe_accessor('summarize') class PrettyPandas(object): """PrettyPandas @@ -108,32 +127,31 @@ def __init__(self, formatters=None, *args, **kwargs): - - self.data = data - self.summary_rows = summary_rows or [] - self.summary_cols = summary_cols or [] - self.formatters = formatters or [] + self._data = data + self._summary_rows = summary_rows or [] + self._summary_cols = summary_cols or [] + self._formatters = formatters or [] def _copy(self): return self.__class__( - self.data, - summary_rows=self.summary_rows[:], - summary_cols=self.summary_cols[:], - formatters=self.formatters[:], + self._data, + summary_rows=self._summary_rows[:], + summary_cols=self._summary_cols[:], + formatters=self._formatters[:], ) def _add_formatter(self, formatter): new = self._copy() - new.formatters += [formatter] + new._formatters += [formatter] return new def _add_summary(self, agg): new = self._copy() if _axis_is_rows(agg.axis): - new.summary_rows += [agg] + new._summary_rows += [agg] elif _axis_is_cols(agg.axis): - new.summary_cols += [agg] + new._summary_cols += [agg] else: raise ValueError("Invalid axis supplied.") @@ -142,33 +160,32 @@ def _add_summary(self, agg): def _cleaned_aggregates(self, summaries): titles = set() for agg in summaries: - title = agg.title - i = 1 - while agg.title in titles: - agg.title = "{}_{}".format(title, i) - i += 1 + original_title = agg.title + + for i in itertools.count(2): + if agg.title in titles: + agg.title = "{} {}".format(original_title, i) + else: + break titles.add(agg.title) yield agg @property def _cleaned_summary_rows(self): - return list(self._cleaned_aggregates(self.summary_rows)) + return list(self._cleaned_aggregates(self._summary_rows)) @property def _cleaned_summary_cols(self): - return list(self._cleaned_aggregates(self.summary_cols)) + return list(self._cleaned_aggregates(self._summary_cols)) def _apply_summaries(self): """Add all summary rows and columns.""" def as_frame(r): - if isinstance(r, pd.Series): - return r.to_frame() - else: - return r + return r.to_frame() if isinstance(r, pd.Series) else r - df = self.data + df = as_frame(self._data).copy() if df.index.nlevels > 1: raise ValueError( @@ -176,16 +193,17 @@ def as_frame(r): "MultiIndex." ) - _df = df - if self.summary_rows: - rows = pd.concat([agg.apply(_df) - for agg in self._cleaned_summary_rows], axis=1).T - df = pd.concat([df, as_frame(rows)], axis=0) + unaltered_df = df + if self._summary_rows: + rows = pd.DataFrame(OrderedDict([ + (agg.title, agg.apply(unaltered_df)) + for agg in self._cleaned_summary_rows + ])).T + df = pd.concat([df, rows]) - if self.summary_cols: - cols = pd.concat([agg.apply(_df) - for agg in self._cleaned_summary_cols], axis=1) - df = pd.concat([df, as_frame(cols)], axis=1) + if self._summary_cols: + for agg in self._cleaned_summary_cols: + df[agg.title] = agg.apply(unaltered_df) return df @@ -220,7 +238,7 @@ def handle_na(df): .applymap(lambda r: 'font-weight: 900', subset=col_ix) ) - for formatter in self.formatters: + for formatter in self._formatters: styler = formatter.apply(styler) return styler @@ -237,6 +255,16 @@ def __str__(self): def __repr__(self): return str(self.frame) + def using(self, func, title, axis=0, subset=None, *args, **kwargs): + return self.summary( + func=func, + title=title, + axis=axis, + subset=subset, + *args, + **kwargs + ) + def summary(self, func=methodcaller('sum'), title='Total', @@ -244,12 +272,12 @@ def summary(self, subset=None, *args, **kwargs): - """Add multiple summary rows or columns to the dataframe. + """Add a summary row or column to the dataframe. Parameters ---------- :param func: function to be used for a summary. - :param titles: Title for this summary column. + :param title: Title for this summary column. :param axis: Same as numpy and pandas axis argument. A value of None will cause the summary to be applied to both rows and columns. diff --git a/setup.py b/setup.py index 95006a2..8433419 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='prettypandas', - version='0.0.4', + version='0.0.5', description='Pandas Styler for Report Quality Tables.', long_description=long_description, diff --git a/test/test_pretty_pandas.py b/test/test_pretty_pandas.py index e2023ea..51066a9 100644 --- a/test/test_pretty_pandas.py +++ b/test/test_pretty_pandas.py @@ -1,22 +1,32 @@ +from __future__ import unicode_literals + import copy import pytest import numpy as np import pandas as pd +from six import string_types from prettypandas import PrettyPandas @pytest.fixture() def dataframe(): np.random.seed(24) - df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) - df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), - columns=list('BCDE'))], - axis=1) + df = pd.DataFrame({ + 'A': np.linspace(1, 10, 10), + 'B': np.random.normal(10, 4), + 'C': np.random.normal(10, 4), + 'D': np.random.normal(10, 4), + }) return df +@pytest.fixture() +def series(dataframe): + return dataframe.A + + @pytest.fixture() def prettyframe(dataframe): return PrettyPandas(dataframe) @@ -31,14 +41,14 @@ def test_creation(dataframe): assert True p1 = PrettyPandas(dataframe) - assert p1.summary_rows == [] - assert p1.summary_cols == [] - assert p1.formatters == [] + assert p1._summary_rows == [] + assert p1._summary_cols == [] + assert p1._formatters == [] p2 = PrettyPandas(dataframe, summary_rows=['test']) - assert p2.summary_rows == ['test'] - assert p1.summary_cols == [] - assert p1.formatters == [] + assert p2._summary_rows == ['test'] + assert p1._summary_cols == [] + assert p1._formatters == [] def test_data_safety(dataframe): @@ -48,12 +58,12 @@ def test_data_safety(dataframe): df.total()._apply_summaries() assert all(dataframe == df1) - assert all(df.data == df1) + assert all(df._data == df1) def test_summary(dataframe): p1 = PrettyPandas(dataframe).total() - actual = list(p1.data.sum()) + actual = list(p1._data.sum()) r = p1._apply_summaries() row = r.iloc[-1] @@ -68,24 +78,24 @@ def test_summary_fns(dataframe): PrettyPandas(dataframe).min() out = PrettyPandas(dataframe).total() - assert len(out.summary_rows) == 1 - assert len(out.summary_cols) == 0 + assert len(out._summary_rows) == 1 + assert len(out._summary_cols) == 0 out = PrettyPandas(dataframe).total(axis=1) - assert len(out.summary_rows) == 0 - assert len(out.summary_cols) == 1 + assert len(out._summary_rows) == 0 + assert len(out._summary_cols) == 1 out = PrettyPandas(dataframe).total(axis=None) - assert len(out.summary_rows) == 1 - assert len(out.summary_cols) == 1 + assert len(out._summary_rows) == 1 + assert len(out._summary_cols) == 1 out = PrettyPandas(dataframe).min().max() - assert len(out.summary_rows) == 2 - assert len(out.summary_cols) == 0 + assert len(out._summary_rows) == 2 + assert len(out._summary_cols) == 0 out = PrettyPandas(dataframe).min().max(axis=1) - assert len(out.summary_rows) == 1 - assert len(out.summary_cols) == 1 + assert len(out._summary_rows) == 1 + assert len(out._summary_cols) == 1 def test_mulitindex(): @@ -95,4 +105,47 @@ def test_mulitindex(): 'C': [6, 7]}) with pytest.raises(ValueError): - output = PrettyPandas(df.set_index(['A', 'B'])).total(axis=1)._apply_summaries() + PrettyPandas(df.set_index(['A', 'B'])).total(axis=1)._apply_summaries() + + +def test_series_works(series): + PrettyPandas(series).total() + assert True + + +def test_summaries_are_applied_in_order(dataframe): + df = dataframe.summarize + N = 100 + + for i in range(N): + df = df.total() + df = df.to_frame() + + generated_columns = [ + c for c in df.index + if isinstance(c, string_types) and c.startswith('Total') + ] + + expected_columns = ( + ['Total'] + + ['Total {}'.format(i + 1) for i in range(1, N)] + ) + + assert generated_columns == expected_columns + + +def test_pandas_extension(dataframe, series): + ext_df = dataframe.summarize.total() + normal_df = PrettyPandas(dataframe).total() + + pd.testing.assert_frame_equal( + ext_df.to_frame(), + normal_df.to_frame() + ) + + ext_s = series.summarize.total() + normal_s = PrettyPandas(series).total() + pd.testing.assert_frame_equal( + ext_s.to_frame(), + normal_s.to_frame() + )