Prepare AgML 0.7.4 #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

amogh7joshi merged 10 commits into main from dev

Apr 22, 2025

README.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -203,7 +203,7 @@ to your desired specification. @@
     [cucumber_disease_classification](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/cucumber_disease_classification.md) | Image Classification | 7689 |
     [iNatAg](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/iNatAg.md) | Image Classification | 4720903 |
     [iNatAg-mini](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/iNatAg-mini.md) | Image Classification | 560844 |
+    [soybean_insect_classification](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/soybean_insect_classification.md) | Image Classification | 6410 |
     ## iNatAg and iNatAg-mini
@@ Expand All @@
     loader = agml.data.AgMLDataLoader.from_parent("iNatAg", filters={"common_name": "..."})
     ```
     ## Usage Information
     ### Using Public Agricultural Data
@@ Expand Down Expand Up @@
     See the [contributing guidelines](/CONTRIBUTING.md) for more information.
     ## Funding
-    This project is partly funded by the [National AI Institute for Food Systems](https://aifs.ucdavis.edu).
+    This project is partly funded by the [National AI Institute for Food Systems](https://aifs.ucdavis.edu).

agml/_assets/public_datasources.json

-Original file line number
+Diff line change
@@ Expand Up / @@ -1057,6 +1057,11 @@ @@
                 "continent": "worldwide",
                 "country": "worldwide"
             },
+            "sensor_modality": "rgb",
+            "real_synthetic": "real",
+            "platform": "handheld/ground",
+            "input_data_format": "jpg",
+            "annotation_format": "image",
             "n_images": "42",
             "docs_url": "https://data.nal.usda.gov/dataset/data-multi-species-fruit-flower-detection-using-refined-semantic-segmentation-network",
             "external_image_sources": [],
@@ Expand Down Expand Up / @@ -2462,5 +2467,40 @@ @@
             },
             "parent_dataset": "",
             "extra_metadata": {}
+        },
+        "soybean_insect_classification": {
+            "ml_task": "image_classification",
+            "ag_task": "pest_classification",
+            "location": {
+                "continent": "south_america",
+                "country": "brazil"
+            },
+            "sensor_modality": "rgb",
+            "real_synthetic": "real",
+            "platform": "uav",
+            "input_data_format": "jpg",
+            "annotation_format": "directory_names",
+            "n_images": "6410",
+            "docs_url": "https://data.mendeley.com/datasets/bycbh73438/1",
+            "classes": {
+                "0": "Caterpillar",
+                "1": "Diabrotica_speciosa",
+                "2": "Healthy"
+            },
+            "external_image_sources": [],
+            "parent_dataset": "",
+            "extra_metadata": {},
+            "stats": {
+                "mean": [
+.4129256308078766,
+.515846848487854,
+.3347107470035553
+                ],
+                "std": [
+.22007715702056885,
+.22993944585323334,
+.20302338898181915
+                ]
+            }
         }
     }

agml/_assets/shape_info.pickle

Binary file not shown.

agml/_assets/source_citations.json

-Original file line number
+Diff line change
@@ Expand Up / @@ -262,5 +262,9 @@ @@
         "cucumber_disease_classification": {
             "license": "CC BY 4.0",
             "citation": "Sultana, Nusrat; Shorif, Sumaita Binte ; Akter, Morium ; Uddin, Mohammad Shorif  (2022), “Cucumber Disease Recognition Dataset”, Mendeley Data, V1, doi: 10.17632/y6d3z6f8z9.1"
+        },
+        "soybean_insect_classification": {
+            "license": "CC BY 4.0",
+            "citation": "Mignoni, Maria Eloisa (2021), “Images of Soybean Leaves”, Mendeley Data, V1, doi: 10.17632/bycbh73438.1"
         }
     }

agml/_internal/preprocess.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -1585,6 +1585,9 @@ def sunflower_disease_classification(self, dataset_name): @@
         def cucumber_disease_classification(self, dataset_name):
             pass
+        def soybean_insect_classification(self, dataset_name):
+            pass
     if __name__ == '__main__':
         # Initialize program arguments.
         ap = argparse.ArgumentParser()
@@ Expand Down @@

agml/data/metadata.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,9 @@ @@
     from typing import Iterable
     import yaml
+    from rich.console import Console
+    from rich.table import Table
     import agml.utils.logging as logging
     from agml.framework import AgMLSerializable
@@ Expand Down Expand Up / @@ -280,12 +283,12 @@ def summary(self): @@
             as `print(loader.info.summary())`, just `loader.info.summary()`.
             """
-            def _bold(msg):
-                return "\033[1m" + msg + "\033[0m"
+            console = Console()
-            def _bold_yaml(msg):  # noqa
-                return "<|>" + msg + "<|>"
+            table = Table(title="Dataset Summary")
+            table.add_column("Attribute", justify="right", style="bold cyan")
+            table.add_column("Value", justify="left", style="bold white")
             _SWITCH_NAMES = {
                 "ml_task": "Machine Learning Task",
                 "ag_task": "Agricultural Task",
@@ Expand All / @@ -294,7 +297,6 @@ def _bold_yaml(msg): # noqa @@
                 "docs_url": "Documentation",
             }
-            formatted_metadata = {}
             for key, value in self._metadata.items():
                 name = key.replace("_", " ").title()
                 if key in _SWITCH_NAMES.keys():
@@ Expand All / @@ -303,18 +305,10 @@ def _bold_yaml(msg): # noqa @@
                     value = {int(k): v for k, v in value.items()}
                 if name == "Number of Images":
                     value = int(value)
-                formatted_metadata[_bold_yaml(name)] = value
-            stream = io.StringIO()
-            yaml.dump(formatted_metadata, stream, sort_keys=False)
-            content = stream.getvalue()
-            content = re.sub("<\\|>(.*?)<\\|>", _bold(r"\1"), content)
-            header = "=" * 20 + " DATASET SUMMARY " + "=" * 20
-            print(header)
-            print(_bold("Name") + f": {self._name}")
-            print(content, end="")
-            print("=" * 57)
-            sys.stdout.flush()
+                table.add_row(name, f"{value}")
+            console.print(table)
         def citation_summary(self):
             """Prints out a summary of the citation information of the dataset.
@@ Expand Down @@

agml/data/public.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,7 @@ @@
     import functools
+    from rich.console import Console
     import numpy as np
     from agml.backend.config import data_save_path
@@ Expand Down Expand Up / @@ -122,8 +123,30 @@ def _matches(self, meta, filters): @@
                         return False
             return True
-        def print_result(self):
-            return "[%s]" % ", ".join(self._current_filtered_source)
+        def __repr__(self):
+            """Prints a formatted table of the filtered datasets using rich."""
+            console = Console()
+            if not self._current_filtered_source:
+                console.print("[bold yellow]No datasets found matching the criteria.[/]")
+                return
+            table = Table(title="Filtered Datasets")
+            table.add_column("Dataset Name", style="cyan")
+            table.add_column("ML Task")
+            table.add_column("Location")
+            table.add_column("# Images")
+            for source_name in self._current_filtered_source:
+                meta = self._sources[source_name]
+                table.add_row(
+                    source_name,
+                    meta.get("ml_task", "N/A"),  # Handle missing 'ml_task'
+                    f"{meta['location']['continent']}, {meta['location']['country']}" if "location" in meta else "N/A",
+                    str(meta.get("n_images", "N/A")),  # prints N/A if number of images are not there.
+                )
+            console.print(table)
         def result(self):
             """Returns the filtered datasets as DatasetMetadata objects.
@@ Expand Down @@

agml/utils/data.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -19,6 +19,10 @@
  
    import shutil

    import sys

    from rich.console import Console

    from rich.panel import Panel

    from rich.text import Text

    @functools.lru_cache(maxsize=None)

    def load_public_sources() -> dict:

    @@ -98,67 +102,106 @@ def maybe_you_meant(name, msg, source=None) -> str:
  
    def copyright_print(name, location=None):

        """Prints out license/copyright info after a dataset download."""

        content = load_citation_sources()[name]

        license = content["license"]  # noqa

        license_info = content["license"]  # noqa

        citation = content["citation"]

        def _bold(msg):  # noqa

            return "\033[1m" + msg + "\033[0m"

        # Construct the title text

        title_text = Text(f"Dataset: {name}", style="bold cyan")

        if location is None:

            first_msg = "Citation information for " + _bold(name) + ".\n"

        if location is not None:

            location_text = Text.assemble(("You have just downloaded ", "bold cyan"), (name, "green"))

        else:

            first_msg = "You have just downloaded " + _bold(name) + ".\n"

            location_text = Text.assemble(("Citation information for", "bold cyan"), (name, "green"))

        _LICENSE_TO_URL = {

            'CC BY-SA 4.0': 'https://creativecommons.org/licenses/by-sa/4.0/',

            'CC BY-SA 3.0': 'https://creativecommons.org/licenses/by-sa/3.0/',

            'CC BY-NC 3.0': 'https://creativecommons.org/licenses/by-nc/3.0/',

            'CC BY-NC SA 3.0': 'https://creativecommons.org/licenses/by-nc/3.0/',

            'MIT': 'https://opensource.org/licenses/MIT',

            'GPL-3.0': 'https://opensource.org/licenses/GPL-3.0',

            'US Public Domain': 'https://www.usa.gov/government-works',

            'CC0: Public Domain': 'https://creativecommons.org/publicdomain/zero/1.0/',

            'Apache 2.0': 'https://www.apache.org/licenses/LICENSE-2.0',

            "CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",

            "CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",

            "CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",

            "CC BY-NC SA 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",

            "CC BY-NC-SA 4.0": "https://creativecommons.org/licenses/by-nc/4.0/",

            "MIT": "https://opensource.org/licenses/MIT",

            "GPL-3.0": "https://opensource.org/licenses/GPL-3.0",

            "US Public Domain": "https://www.usa.gov/government-works",

            "CC0: Public Domain": "https://creativecommons.org/publicdomain/zero/1.0/",

            "Apache 2.0": "https://www.apache.org/licenses/LICENSE-2.0",

            'CC BY-NC 4.0': 'https://creativecommons.org/licenses/by-nc/4.0/',

            'CC BY-NC-SA 4.0': 'https://creativecommons.org/licenses/by-nc-sa/4.0/',

            'CC BY 4.0': 'https://creativecommons.org/licenses/by/4.0/deed.en'}

        if license == '':

            license_msg = "This dataset has " \

                          + _bold("no license") + ".\n"

        else:

            license_msg = "This dataset is licensed under the " + _bold(license) + " license.\n"

            license_msg += "To learn more, visit: " + _LICENSE_TO_URL[license] + "\n"

            'CC BY 4.0': 'https://creativecommons.org/licenses/by/4.0/deed.en'

        }

        # Create license message

        if not license_info:  # Handle empty license

            license_msg = Text("License: None specified", style="yellow")

        else:

            license_msg = Text.assemble(("This dataset is licensed under ", "bold cyan"), (f"{license_info}", "green"))

            if license_info in _LICENSE_TO_URL:

                license_more_info = Text(" To learn more about this license, visit ", style="bold cyan")

                license_url = Text(f"{_LICENSE_TO_URL[license_info]}", justify="center", style="white")

            else:

                license_more_info = ""

                license_url = ""

        # Create citation message

        if citation == "":

            citation_msg = "This dataset has no associated citation."

            citation_msg = Text("This dataset has no associated citation.", style="yellow")

            citation_url = ""

        else:

            citation_msg = "When using this dataset, please cite the following:\n\n"

            citation_msg += citation

            citation_msg = (Text("When using this dataset, please cite the following: \n", style="bold cyan"),)

            citation_url = Text(citation, justify="center", style="white")

        # Dataset documentation message

        docs = load_public_sources()[name]["docs_url"]

        docs_msg = "\nYou can find additional information about " "this dataset at:\n" + docs

        columns = shutil.get_terminal_size((80, 24)).columns

        max_print_length = max(

            min(

                columns,

                max([len(i) for i in [*citation_msg.split("\n"), *license_msg.split("\n")]]),

        docs_msg = Text("You can find additional information about this dataset at: ", style="bold  cyan")

        docs_url = Text(docs, justify="center", style="white")

        combined_message = Text.assemble(

            title_text,

            "\n\n",

            location_text,

            "\n\n",

            license_msg,

            " ",

            license_more_info,

            license_url,

            "\n\n",

            citation_msg,

            citation_url,

            "\n\n",

            docs_msg,

            docs_url,

        )

        console = Console()

        # Create and print the rich Panel

        panel = Panel(

            combined_message,

            title="Copyright, Citation, and Documenation Information",

            subtitle=title_text,

            border_style="bright_yellow",

            highlight=True,

            expand=False,  # Prevent unnecessary whitespace

        )

        console.print(panel)

        # Instructions on how to reprint (using rich Text)

        instructions = Text.assemble(

            ("\nThis message will ",),

            (

                "not ",

                "bold",

            ),

            columns,

            ("be automatically shown again. To view this message again,  in an AgMLDataLoader run "),

            ("`loader.info.citation_summary()` "),

            (" Otherwise, you can use `agml.data.source(<dataset_name>).citation_summary()`.",),

        )

        print("\n" + "=" * max_print_length)

        print(first_msg)

        print(license_msg)

        print(citation_msg)

        print(docs_msg)

        print(

            "\nThis message will " + _bold("not") + " be automatically shown\n"

            "again. To view this message again, in an AgMLDataLoader\n"

            + "run `loader.info.citation_summary()`. Otherwise, you\n"

            + "can use `agml.data.source(<name>).citation_summary().`"

        warning_panel = Panel(

            instructions,

            title="Note",

            border_style="yellow",

            expand=False,

            highlight=True,

        )

        console.print(warning_panel)

        if location is not None:

            print(f"\nYou can find your dataset at {location}.")

        print("=" * max_print_length)

        sys.stdout.flush()

            console.print(f"\n [bold] You can find your dataset at {location}.")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Prepare AgML 0.7.4 #76

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!