diff --git a/docs/download_example.ipynb b/docs/download_example.ipynb index f5e41de..895f169 100644 --- a/docs/download_example.ipynb +++ b/docs/download_example.ipynb @@ -82,7 +82,9 @@ "id": "fdf5ec65-a98b-412b-b5c6-3b4bb23e8312", "metadata": {}, "source": [ - "## Getting available files and metadata in table form." + "## Getting available files and metadata in data frame form.\n", + "The output is a polars data frame and has all the functionality contained within. More details can be found on \n", + "the official docs page: https://docs.pola.rs/" ] }, { @@ -94,6 +96,36 @@ "source": [ "toolviper.utils.data.list_files()" ] + }, + { + "cell_type": "markdown", + "id": "2b47b5ed-9409-471c-ab54-6a793f4052c5", + "metadata": {}, + "source": [ + "#### Example of pre-selection on the file metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bebfac78-a3f5-46ca-a840-d222308a72b8", + "metadata": {}, + "outputs": [], + "source": [ + "files = toolviper.utils.data.list_files()\n", + "\n", + "files[files.telescope == \"VLA\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b04a312-043a-4957-a16d-39c6d73c1fa6", + "metadata": {}, + "outputs": [], + "source": [ + "files.filter(items=[\"file\", \"path\", \"telescope\"])" + ] } ], "metadata": { @@ -112,7 +144,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/docs/file-manifest-update.ipynb b/docs/file-manifest-update.ipynb index c983e0a..76579f2 100644 --- a/docs/file-manifest-update.ipynb +++ b/docs/file-manifest-update.ipynb @@ -202,7 +202,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.9" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 7c1d677..2362d37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,10 @@ dependencies = [ 'numpy', 'psutil', 'rich', + 'pandas', + 'itables', 'requests', + 'tabulate', 'tqdm', ] diff --git a/src/toolviper/utils/data/cloudflare.py b/src/toolviper/utils/data/cloudflare.py index e37361d..dfbe1b6 100644 --- a/src/toolviper/utils/data/cloudflare.py +++ b/src/toolviper/utils/data/cloudflare.py @@ -14,6 +14,10 @@ import toolviper.utils.logger as logger from toolviper.utils import parameter +from collections import defaultdict +from toolviper.utils.parameter import is_notebook +import pandas as pd + colorize = console.Colorize() PROGRESS_MAX_CHARACTERS = 28 @@ -196,7 +200,89 @@ def worker(progress: Progress, task_id: TaskID, task: dict, decompress=True) -> os.remove(fullname) -def list_files() -> None: +class ToolviperFiles: + def __init__(self, manifest, dataframe=None): + + self.manifest = manifest + self.dataframe = dataframe + self.notebook_mode = False + + if is_notebook(): + import itables + + self.notebook_mode = True + + itables.init_notebook_mode() + + def __call__(self): + if not self.notebook_mode: + return print(self.dataframe) + + else: + return self.dataframe + + def print(self) -> Union[None, pd.DataFrame]: + if not self.notebook_mode: + import tabulate + + print( + tabulate.tabulate( + self.dataframe, showindex=False, headers=self.dataframe.columns + ) + ) + return None + + return self.dataframe + + @classmethod + def from_manifest(cls, manifest: str): + meta_data_path = pathlib.Path(manifest) + + # Verify that the download metadata exist and update if not. + # _verify_metadata_file() + + with open(meta_data_path) as json_file: + + file_meta_data = json.load(json_file) + + files = file_meta_data["metadata"].keys() + + data = defaultdict(list) + data["file"] = list(files) + + for file_, metadata_ in file_meta_data["metadata"].items(): + for key_, value_ in metadata_.items(): + if key_ == "file": + continue + + # I think we could do this with a JSON ENCODER + # but this is easier since the file is small + # and everything is a string already + + if value_ == "size": + value_ = int(value_) + + data[key_].append(value_) + + return cls(manifest=manifest, dataframe=pd.DataFrame(data)) + + +def list_files(truncate=None) -> pd.DataFrame: + + pd.set_option("display.max_rows", truncate) + pd.set_option("display.colheader_justify", "left") + + meta_data_path = pathlib.Path(__file__).parent.joinpath( + ".cloudflare/file.download.json" + ) + + table = ToolviperFiles.from_manifest(str(meta_data_path)) + + return table.print() + + +# This version of the function is now deprecated +def list_files_() -> None: """ List all files in cloudflare """ @@ -218,7 +304,7 @@ def list_files() -> None: with open(meta_data_path) as json_file: file_meta_data = json.load(json_file) - table.add_column("file", style="blue") + table.add_column("file", style="blue", no_wrap=False) table.add_column("dtype", style="green") table.add_column("telescope", style="green") table.add_column("size", style="green")