diff --git a/mecsimcalc/__init__.py b/mecsimcalc/__init__.py index eee59c0..693afcc 100644 --- a/mecsimcalc/__init__.py +++ b/mecsimcalc/__init__.py @@ -1,6 +1,6 @@ # included in __all__: import using "from mecsimcalc import *" or "import mecsimcalc" -from .file_utils.general_utils import input_to_file +from .file_utils.general_utils import input_to_file, metadata_to_filetype from .file_utils.image_utils import input_to_PIL, file_to_PIL, print_image @@ -33,6 +33,7 @@ "input_to_dataframe", "file_to_dataframe", "input_to_file", + "metadata_to_filetype" # Deprecated "input_to_PIL", "table_to_dataframe", "print_dataframe", diff --git a/mecsimcalc/file_utils/general_utils.py b/mecsimcalc/file_utils/general_utils.py index 06c47a6..a70266e 100644 --- a/mecsimcalc/file_utils/general_utils.py +++ b/mecsimcalc/file_utils/general_utils.py @@ -3,8 +3,9 @@ import re from typing import Union, Tuple from mimetypes import guess_type, guess_extension +from warnings import warn -# This is only nessesary for python 3.6 +# This is only necessary for python 3.6 EXTENSION_MAP = { ".jpe": ".jpg", ".htm": ".html", @@ -13,7 +14,7 @@ } def input_to_file( - input_file: str, get_file_extension: bool = False + input_file: str, get_file_extension: bool = False, metadata: bool = False ) -> Union[io.BytesIO, Tuple[io.BytesIO, str]]: """ >>> input_to_file( @@ -68,6 +69,47 @@ def input_to_file( extension = guess_extension(guess_type(meta_data)[0]) extension = EXTENSION_MAP.get(extension, extension) # Only necessary for python 3.6 - + + # Deprecated + if metadata: + return (file_data, meta_data) + return (file_data, extension) if get_file_extension else file_data +# Deprecated +def metadata_to_filetype(metadata: str) -> str: + """ + # Deprecated + >>> metadata_to_filetype(metadata: str) -> str + + Extracts the file type from the metadata string. + + Parameters + ---------- + metadata : str + A metadata string typically in the form `Data:;base64,` + + Returns + ------- + * `str` : + The extracted file type (e.g., 'csv'). For a Microsoft Excel file, it returns 'xlsx'. + + Examples + -------- + >>> input_file = inputs["input_file"] + >>> open_file, metadata = msc.input_to_file(input_file, metadata=True) + >>> file_type = msc.metadata_to_filetype(metadata) + >>> print(file_type) + 'jpeg' + """ + warn("metadata_to_filetype is deprecated. Use guess_extension instead.", DeprecationWarning) + + # Extract mime type from metadata + match = re.search(r"/(.+);base64,", metadata) + file_type = match[1] if match else "" + + # Convert the file type to a more common format + if file_type == "vnd.openxmlformats-officedocument.spreadsheetml.sheet": + file_type = "xlsx" + + return file_type diff --git a/mecsimcalc/file_utils/image_utils.py b/mecsimcalc/file_utils/image_utils.py index f052fdc..6c219a9 100644 --- a/mecsimcalc/file_utils/image_utils.py +++ b/mecsimcalc/file_utils/image_utils.py @@ -44,7 +44,7 @@ def file_to_PIL(file: io.BytesIO) -> Image.Image: def input_to_PIL( - input_file: str, get_file_extension: bool = False + input_file: str, get_file_extension: bool = False, get_file_type: bool = False ) -> Union[Image.Image, Tuple[Image.Image, str]]: """ >>> input_to_PIL( @@ -84,6 +84,9 @@ def input_to_PIL( (image is now ready to be used with Pillow functions) """ + # get_file_type is deprecated + get_file_extension = get_file_extension or get_file_type + # Get file extension from metadata file_data = input_to_file(input_file) image = file_to_PIL(file_data) @@ -103,6 +106,7 @@ def print_image( download: bool = False, download_text: str = "Download Image", download_file_name: str = "myimg", + download_file_type: str = None, ) -> Union[str, Tuple[str, str]]: """ >>> print_image( @@ -158,6 +162,8 @@ def print_image( } """ + # download_file_type is deprecated + # get metadata (file type) from image dummy_filename = f"dummy.{image.format.lower()}" mime_type, _ = guess_type(dummy_filename) diff --git a/mecsimcalc/file_utils/spreadsheet_utils.py b/mecsimcalc/file_utils/spreadsheet_utils.py index f04077c..e0ba8ca 100644 --- a/mecsimcalc/file_utils/spreadsheet_utils.py +++ b/mecsimcalc/file_utils/spreadsheet_utils.py @@ -51,7 +51,7 @@ def file_to_dataframe(file: io.BytesIO) -> pd.DataFrame: def input_to_dataframe( - input_file: str, get_file_extension: bool = False + input_file: str, get_file_extension: bool = False, get_file_type: bool = False ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]: """ >>> input_to_dataframe( @@ -83,6 +83,9 @@ def input_to_dataframe( 0 1 2 3 1 4 5 6 """ + # get_file_type is deprecated + get_file_extension = get_file_extension or get_file_type + # converts input file into a dataframe file_data, file_extension = input_to_file(input_file, get_file_extension=True)