diff --git a/dp_wizard/shiny/panels/dataset_panel/__init__.py b/dp_wizard/shiny/panels/dataset_panel/__init__.py index fcc7b6e8..1654d2c8 100644 --- a/dp_wizard/shiny/panels/dataset_panel/__init__.py +++ b/dp_wizard/shiny/panels/dataset_panel/__init__.py @@ -336,7 +336,7 @@ def input_files_upload_ui(): @render.ui def csv_message_ui(): return data_source.csv_message_ui( - csv_column_mismatch_calc=csv_column_mismatch_calc, + csv_column_mismatch=csv_column_mismatch_calc(), csv_messages=csv_info().get_messages(), ) diff --git a/dp_wizard/shiny/panels/dataset_panel/data_source.py b/dp_wizard/shiny/panels/dataset_panel/data_source.py index 222369a9..7421c11f 100644 --- a/dp_wizard/shiny/panels/dataset_panel/data_source.py +++ b/dp_wizard/shiny/panels/dataset_panel/data_source.py @@ -37,13 +37,12 @@ def context_code_sample(): # pragma: no cover def csv_message_ui( - csv_column_mismatch_calc, + csv_column_mismatch, csv_messages: list[str], ): # pragma: no cover messages = [f"- {m}" for m in csv_messages] - mismatch = csv_column_mismatch_calc() - if mismatch: - just_public, just_private = mismatch + if csv_column_mismatch: + just_public, just_private = csv_column_mismatch if just_public: messages.append( "- Only the public data contains: " diff --git a/dp_wizard/types.py b/dp_wizard/types.py index 93360e07..11706222 100644 --- a/dp_wizard/types.py +++ b/dp_wizard/types.py @@ -114,6 +114,37 @@ def __init__(self, path: Path | None): # Only used as initial value return + if not path.exists(): + self._errors.append(f"No such file: {path}") + return + + try: + size = path.stat().st_size + except Exception: + self._errors.append(f"Could not stat file: {path}") + return + M = 1024 * 1024 + error_m = 100 + warn_m = 10 + assert error_m > warn_m + if size > error_m * M: + self._errors.append( + re.sub( + r"\s+", + " ", + f""" + DP Wizard is an interactive tool, and {size // M}M + would be too slow. DP Wizard is limited to {error_m}M, + although the OpenDP Library itself doesn't have such a limit. + """, + ).strip() + ) + return + if size > warn_m * M: + self._warnings.append( + f"Files larger than {warn_m}M may be slow to process." + ) + try: if path.suffix != ".csv": path = convert_to_csv(path)