Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions docs/download_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@
"id": "fdf5ec65-a98b-412b-b5c6-3b4bb23e8312",
"metadata": {},
"source": [
"## Getting available files and metadata in table form."
"## Getting available files and metadata in data frame form.\n",
"The output is a polars data frame and has all the functionality contained within. More details can be found on \n",
"the official docs page: https://docs.pola.rs/"
]
},
{
Expand All @@ -94,6 +96,36 @@
"source": [
"toolviper.utils.data.list_files()"
]
},
{
"cell_type": "markdown",
"id": "2b47b5ed-9409-471c-ab54-6a793f4052c5",
"metadata": {},
"source": [
"#### Example of pre-selection on the file metadata"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bebfac78-a3f5-46ca-a840-d222308a72b8",
"metadata": {},
"outputs": [],
"source": [
"files = toolviper.utils.data.list_files()\n",
"\n",
"files[files.telescope == \"VLA\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b04a312-043a-4957-a16d-39c6d73c1fa6",
"metadata": {},
"outputs": [],
"source": [
"files.filter(items=[\"file\", \"path\", \"telescope\"])"
]
}
],
"metadata": {
Expand All @@ -112,7 +144,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
"version": "3.12.12"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/file-manifest-update.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
"version": "3.12.12"
}
},
"nbformat": 4,
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ dependencies = [
'numpy',
'psutil',
'rich',
'pandas',
'itables',
'requests',
'tabulate',
'tqdm',
]

Expand Down
90 changes: 88 additions & 2 deletions src/toolviper/utils/data/cloudflare.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
import toolviper.utils.logger as logger
from toolviper.utils import parameter

from collections import defaultdict
from toolviper.utils.parameter import is_notebook
import pandas as pd

colorize = console.Colorize()

PROGRESS_MAX_CHARACTERS = 28
Expand Down Expand Up @@ -196,7 +200,89 @@ def worker(progress: Progress, task_id: TaskID, task: dict, decompress=True) ->
os.remove(fullname)


def list_files() -> None:
class ToolviperFiles:
def __init__(self, manifest, dataframe=None):

self.manifest = manifest
self.dataframe = dataframe
self.notebook_mode = False

if is_notebook():
import itables

self.notebook_mode = True

itables.init_notebook_mode()

def __call__(self):
if not self.notebook_mode:
return print(self.dataframe)

else:
return self.dataframe

def print(self) -> Union[None, pd.DataFrame]:
if not self.notebook_mode:
import tabulate

print(
tabulate.tabulate(
self.dataframe, showindex=False, headers=self.dataframe.columns
)
)
return None

return self.dataframe

@classmethod
def from_manifest(cls, manifest: str):
meta_data_path = pathlib.Path(manifest)

# Verify that the download metadata exist and update if not.
# _verify_metadata_file()

with open(meta_data_path) as json_file:

file_meta_data = json.load(json_file)

files = file_meta_data["metadata"].keys()

data = defaultdict(list)
data["file"] = list(files)

for file_, metadata_ in file_meta_data["metadata"].items():
for key_, value_ in metadata_.items():
if key_ == "file":
continue

# I think we could do this with a JSON ENCODER
# but this is easier since the file is small
# and everything is a string already

if value_ == "size":
value_ = int(value_)

data[key_].append(value_)

return cls(manifest=manifest, dataframe=pd.DataFrame(data))


def list_files(truncate=None) -> pd.DataFrame:

pd.set_option("display.max_rows", truncate)
pd.set_option("display.colheader_justify", "left")

meta_data_path = pathlib.Path(__file__).parent.joinpath(
".cloudflare/file.download.json"
)

table = ToolviperFiles.from_manifest(str(meta_data_path))

return table.print()


# This version of the function is now deprecated
def list_files_() -> None:
"""
List all files in cloudflare
"""
Expand All @@ -218,7 +304,7 @@ def list_files() -> None:
with open(meta_data_path) as json_file:
file_meta_data = json.load(json_file)

table.add_column("file", style="blue")
table.add_column("file", style="blue", no_wrap=False)
table.add_column("dtype", style="green")
table.add_column("telescope", style="green")
table.add_column("size", style="green")
Expand Down
Loading