diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 6beef2e..7744ca8 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -26,12 +26,12 @@ jobs: pip install --find-links dist esat sphinx-apidoc -o docs esat sphinx-apidoc -o docs eval - sphinx-build . _build + sphinx-build -b html . docs/_build/html - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} with: publish_branch: gh-pages github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: _build + publish_dir: docs/_build/html force_orphan: true \ No newline at end of file diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cb59114..9cbacfa 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -10,40 +10,11 @@ env: CIBW_BUILD: "cp310-* cp311-* cp312-*" jobs: -# build_wheels: -# name: Build wheels on ${{ matrix.os }} -# runs-on: ${{ matrix.os }} -# strategy: -# matrix: -# os: [ ubuntu-latest, windows-latest, macos-13, macos-latest ] -# steps: -# - uses: actions/checkout@v4 -# - uses: actions/setup-python@v5 -# with: -# python-version: "3.12" -# - name: Install cibuildwheel -# run: python -m pip install cibuildwheel==2.23.3 -# - name: Install setuptools -# shell: bash -# run: | -# python -m pip install --upgrade pip -# curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=stable -# echo "PATH=${PATH}:${HOME}/.cargo/bin" >> $GITHUB_ENV -# rustc --version -# python -m pip install build setuptools setuptools-rust -# - name: Build wheels -# run: python -m cibuildwheel --output-dir wheelhouse -# - uses: actions/upload-artifact@v4 -# with: -# name: esat-wheels-${{ matrix.os }}-py${{matrix.python_version}} -# path: ./wheelhouse/*.whl - build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: - # macos-13 is an intel runner, macos-14 is apple silicon os: [ubuntu-latest, macos-13, macos-14] python_version: ["3.10", "3.11", "3.12"] steps: @@ -52,23 +23,28 @@ jobs: with: python-version: ${{ matrix.python_version }} - name: Install setuptools - run: | - python -m pip install --upgrade pip - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=stable - echo "PATH=${PATH}:${HOME}/.cargo/bin" >> "$GITHUB_ENV" - rustc --version - python -m pip install build setuptools setuptools-rust - - name: Build wheels with cibuildwheel run: | python -m pip install --upgrade pip + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=stable + echo "PATH=${PATH}:${HOME}/.cargo/bin" >> "$GITHUB_ENV" + rustc --version + python -m pip install build setuptools setuptools-rust + - name: Build CPU wheels with cibuildwheel + run: | python -m pip install cibuildwheel==2.22.0 python -m cibuildwheel --output-dir wheelhouse -# - name: Build wheels -# run: -# python -m build --outdir wheelhouse - uses: actions/upload-artifact@v4 with: - name: esat-wheels-${{ matrix.os }}-py${{matrix.python_version}} + name: esat-wheels-cpu-${{ matrix.os }}-py${{ matrix.python_version }} + path: ./wheelhouse/*.whl + - name: Build GPU wheels with cibuildwheel + run: | + rm -rf wheelhouse + python -m pip install cibuildwheel==2.22.0 + python -m cibuildwheel --output-dir wheelhouse + - uses: actions/upload-artifact@v4 + with: + name: esat-wheels-gpu-${{ matrix.os }}-py${{ matrix.python_version }} path: ./wheelhouse/*.whl build_windows_wheel: @@ -89,14 +65,20 @@ jobs: rustup target add aarch64-pc-windows-msvc i686-pc-windows-msvc x86_64-pc-windows-msvc echo "${HOME}/.cargo/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append python -m pip install build setuptools setuptools-rust - - name: Build wheels with cibuildwheel + - name: Build CPU wheels with cibuildwheel run: | - python -m pip install --upgrade pip python -m pip install cibuildwheel==2.22.0 python -m cibuildwheel --output-dir wheelhouse -# - name: Build wheels -# run: python -m build --outdir wheelhouse - uses: actions/upload-artifact@v4 with: - name: esat-wheels-windows-py${{matrix.python_version}} + name: esat-wheels-cpu-windows-py${{ matrix.python_version }} path: ./wheelhouse/*.whl + - name: Build GPU wheels with cibuildwheel + run: | + Remove-Item -Recurse -Force wheelhouse + python -m pip install cibuildwheel==2.22.0 + python -m cibuildwheel --output-dir wheelhouse + - uses: actions/upload-artifact@v4 + with: + name: esat-wheels-gpu-windows-py${{ matrix.python_version }} + path: ./wheelhouse/*.whl \ No newline at end of file diff --git a/.gitignore b/.gitignore index da9c131..d165fb2 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ docs/_downloads/ docs/_sources/ docs/_static/ docs/html/ +build/ notebooks/.ipynb_checkpoints/ notebooks/old/.ipynb_checkpoints/ @@ -38,4 +39,4 @@ notebooks/old/.ipynb_checkpoints/ *.dat profile.html profile.json -data/test_output/ \ No newline at end of file +data/test_output/ diff --git a/Cargo.toml b/Cargo.toml index 0cd0dc5..1c3b133 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,13 +14,15 @@ path = "rust/lib.rs" crate-type = ["cdylib"] [dependencies] -numpy = "0.20.0" -nalgebra = {version="0.32.4", features = ["rayon"]} -ndarray = "0.15.6" -rayon = "1.9.0" -pyo3 = "0.20.3" -indicatif = "0.17" -console = "0.15.8" +numpy = "0.25.0" +nalgebra = "0.33.2" +ndarray = "0.16.1" +pyo3 = { version = "0.25.1", features = ["extension-module"] } +indicatif = "0.18.0" +console = "0.16.0" +# GPU support +candle-core = "0.9.1" +#candle-core = { version = "0.9.1", features = ["cuda"] } [features] extension-module = ["pyo3/extension-module", "pyo3/generate-import-lib"] @@ -33,3 +35,5 @@ debug = false [profile.release] opt-level = 3 debug = false + + diff --git a/doc-requirements.txt b/doc-requirements.txt index 7c21423..40bb9ac 100644 --- a/doc-requirements.txt +++ b/doc-requirements.txt @@ -7,6 +7,7 @@ plotly==6.0.1 psutil==7.0.0 scipy==1.15.2 sphinx==8.2.3 +sphinx-book-theme==1.1.4 sphinx-click==6.0.0 sphinx-rtd-theme==3.0.2 tabulate==0.9.0 diff --git a/docs/doctrees/README.doctree b/docs/doctrees/README.doctree index eb80aa7..985ea60 100644 Binary files a/docs/doctrees/README.doctree and b/docs/doctrees/README.doctree differ diff --git a/docs/doctrees/docs/cli.doctree b/docs/doctrees/docs/cli.doctree index 48169cc..96a0a66 100644 Binary files a/docs/doctrees/docs/cli.doctree and b/docs/doctrees/docs/cli.doctree differ diff --git a/docs/doctrees/docs/esat.cli.doctree b/docs/doctrees/docs/esat.cli.doctree index ed280b5..8202d91 100644 Binary files a/docs/doctrees/docs/esat.cli.doctree and b/docs/doctrees/docs/esat.cli.doctree differ diff --git a/docs/doctrees/docs/esat.data.doctree b/docs/doctrees/docs/esat.data.doctree index eff8de3..23bfc1f 100644 Binary files a/docs/doctrees/docs/esat.data.doctree and b/docs/doctrees/docs/esat.data.doctree differ diff --git a/docs/doctrees/docs/esat.doctree b/docs/doctrees/docs/esat.doctree index 45bd5b7..e567ba7 100644 Binary files a/docs/doctrees/docs/esat.doctree and b/docs/doctrees/docs/esat.doctree differ diff --git a/docs/doctrees/docs/esat.error.doctree b/docs/doctrees/docs/esat.error.doctree index bc98b61..8194f40 100644 Binary files a/docs/doctrees/docs/esat.error.doctree and b/docs/doctrees/docs/esat.error.doctree differ diff --git a/docs/doctrees/docs/esat.model.doctree b/docs/doctrees/docs/esat.model.doctree index 3a78059..5a3fe68 100644 Binary files a/docs/doctrees/docs/esat.model.doctree and b/docs/doctrees/docs/esat.model.doctree differ diff --git a/docs/doctrees/docs/esat.rotational.doctree b/docs/doctrees/docs/esat.rotational.doctree index 654373e..3ca9048 100644 Binary files a/docs/doctrees/docs/esat.rotational.doctree and b/docs/doctrees/docs/esat.rotational.doctree differ diff --git a/docs/doctrees/docs/eval.doctree b/docs/doctrees/docs/eval.doctree index eb558d8..2d0c5a2 100644 Binary files a/docs/doctrees/docs/eval.doctree and b/docs/doctrees/docs/eval.doctree differ diff --git a/docs/doctrees/docs/modules.doctree b/docs/doctrees/docs/modules.doctree index 7765efd..b71566c 100644 Binary files a/docs/doctrees/docs/modules.doctree and b/docs/doctrees/docs/modules.doctree differ diff --git a/docs/doctrees/environment.pickle b/docs/doctrees/environment.pickle index b486646..e6e3380 100644 Binary files a/docs/doctrees/environment.pickle and b/docs/doctrees/environment.pickle differ diff --git a/docs/doctrees/index.doctree b/docs/doctrees/index.doctree index 9cb605e..87b1d0c 100644 Binary files a/docs/doctrees/index.doctree and b/docs/doctrees/index.doctree differ diff --git a/docs/html/.buildinfo b/docs/html/.buildinfo index 0840b3c..06633b1 100644 --- a/docs/html/.buildinfo +++ b/docs/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 -# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 08826985b7a8d85c7fec8fc3270a1630 +# This file records the configuration used when building these files. When it is not found, a full rebuild will be done. +config: f688b6899233aa9a295b9d35a66d85ad tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/html/README.html b/docs/html/README.html index 0c58ec8..af9e390 100644 --- a/docs/html/README.html +++ b/docs/html/README.html @@ -8,7 +8,7 @@ -
Last Update: 08-15-2024
+Last Update: 04-16-2025
The Python API and CLI documentation can be found at the Github ESAT IO site: https://quanted.github.io/esat/
+The Python API and CLI documentation can be found at the GitHub ESAT IO site: https://quanted.github.io/esat/
The ESAT python package contains all compiled code and required dependencies and can be installed using pip
-pip install esat-VERSION-OS.whl
+pip install esat
-where the VERSION is the code version, such as 2024.1.0, and OS is operating system targeted
-compiled version, such as cp312-cp312-win_amd64. If an error message appears during installation stating that the
-package is not supported check that the correct OS and python version are being installed for that system.
-When the package becomes available on pypi, the package will be able to be installed with
-pip install esat
+which will install the latest version that supports and is available for your python version and OS.
+Development versions of ESAT can be found on the GitHub actions page, for logged-in users,
+under the ‘Build and Publish Wheel’ workflow. The latest version of the package will be available as an artifact for
+download in the ‘Artifacts’ section of the completed workflow. There wheel files can be found for specific versions
+of python and supported operating systems.
+If an error message appears during installation stating that the
+package is not supported check that the correct OS and python version are being installed for that system. The python
+wheels can be installed directly using
+pip install <wheel file name>
The esat python package is recommended to be installed in its own dedicated python virtual environment or conda environment.
@@ -619,7 +625,7 @@ Disclaimer
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/docs/cli.html b/docs/html/docs/cli.html
index 29151e9..6aec7b1 100644
--- a/docs/html/docs/cli.html
+++ b/docs/html/docs/cli.html
@@ -8,7 +8,7 @@
- Command Line Interface — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Command Line Interface — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -28,16 +28,16 @@
-
-
+
+
-
-
+
+
@@ -131,7 +131,7 @@
- Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -629,7 +629,7 @@ plot-q
Default:
-Aux
+'Aux'
@@ -1618,7 +1618,7 @@ setup
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/docs/esat.cli.html b/docs/html/docs/esat.cli.html
index 71db6ca..4b81a65 100644
--- a/docs/html/docs/esat.cli.html
+++ b/docs/html/docs/esat.cli.html
@@ -8,7 +8,7 @@
- esat.cli package — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ esat.cli package — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -28,16 +28,16 @@
-
-
+
+
-
-
+
+
@@ -132,7 +132,7 @@
- Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -326,8 +326,11 @@ Contents
ModelAnalysis
+ModelAnalysis.aggregate_factors_for_plotting()
ModelAnalysis.calculate_statistics()
ModelAnalysis.features_metrics()
+ModelAnalysis.plot_all_factors()
+ModelAnalysis.plot_all_factors_3d()
ModelAnalysis.plot_estimated_observed()
ModelAnalysis.plot_estimated_timeseries()
ModelAnalysis.plot_factor_composition()
@@ -343,12 +346,19 @@ Contents
esat.data.datahandler module
DataHandler
+DataHandler.aggregate_output()
DataHandler.get_data()
DataHandler.load_dataframe()
+DataHandler.merge()
+DataHandler.plot_2d_histogram()
DataHandler.plot_data_uncertainty()
+DataHandler.plot_feature_correlation_heatmap()
DataHandler.plot_feature_data()
DataHandler.plot_feature_timeseries()
+DataHandler.plot_ridgeline()
+DataHandler.plot_superimposed_histograms()
DataHandler.set_category()
+DataHandler.split_locations()
@@ -386,7 +396,7 @@ Submodulesesat.data.analysis module#
-
-class esat.data.analysis.BatchAnalysis(batch_sa: BatchSA, data_handler: DataHandler = None)#
+class esat.data.analysis.BatchAnalysis(batch_sa: BatchSA, data_handler: DataHandler = None)#
Bases: object
Class for running batch solution analysis.
@@ -396,7 +406,7 @@ Submodules
-
-plot_loss()#
+plot_loss(show: bool = True)#
Plot the loss value for each model in the batch solution as it changes over time.
A model will stop updating if the convergence criteria is met, which can be identified by the models that stop
before reaching max iterations. The ideal loss curve should represent a y=1/x hyperbola, but because of the
@@ -405,7 +415,7 @@
Submodules
-
-plot_loss_distribution()#
+plot_loss_distribution(show: bool = True)#
Plot the distribution of batch model Q(True) and Q(Robust).
A very broad distribution is often a result of a ‘loose’ convergence criteria, increasing converge_n and
decreasing converge_delta will narrow the criteria. If the Q(True) and Q(Robust) distributions are very similar
@@ -415,20 +425,16 @@
Submodules
-
-plot_temporal_residuals(feature_idx: int)#
-Plot the temporal residuals for a specified feature, by index, of all models in the SA batch.
-
-- Parameters:
-feature_idx (int) – The index of the feature to plot.
-
-
+plot_temporal_residuals(feature_idx: int, show: bool = True)#
+Plot the temporal residuals for a specified feature.
+Only the best model’s residuals are visible initially; others are legendonly.
-
-class esat.data.analysis.ModelAnalysis(datahandler: DataHandler, model: SA, selected_model: int = None)#
+class esat.data.analysis.ModelAnalysis(datahandler: DataHandler, model: SA, selected_model: int = None)#
Bases: object
Class for running model analysis and generating plots.
A collection of model statistic methods and plot generation functions.
@@ -441,6 +447,20 @@ Submodules
+-
+aggregate_factors_for_plotting()#
+Aggregate each factor’s V_prime_k for plotting, reducing to max_samples using DataHandler’s binning.
+
+- Returns:
+Dictionary mapping factor index to aggregated V_prime_k DataFrame.
+
+- Return type:
+dict
+
+
+
+
-
calculate_statistics(results: ndarray = None)#
@@ -473,24 +493,61 @@ Submodules
+-
+plot_all_factors(factor_list: list = None, H: ndarray = None, W: ndarray = None, show: bool = True)#
+Create a vertical set of subplots for all factor profiles, similar to plot_factor_profile.
+
+- Parameters:
+
+factor_list (list) – A list of factor indices to plot, if None will plot all factors.
+H (np.ndarray) – Overrides the factor profile matrix in the ESAT model used for the plot.
+W (np.ndarray) – Overrides the factor contribution matrix in the ESAT model used for the plot.
+
+
+
+
+
+
+-
+plot_all_factors_3d(H=None, W=None, show: bool = True, plot_type: str = 'profile')#
+Create a 3D bar plot of the factor profiles and their contributions.
+:param H: The factor profile matrix, if None will use the model’s H matrix.
+:type H: np.ndarray, optional
+:param W: The factor contribution matrix, if None will use the model’s W matrix.
+:type W: np.ndarray, optional
+:param show: If True, the plot will be displayed. Default is True.
+:type show: bool
+:param plot_type: Should be either “profile”, “conc”, or “both”.
+:type plot_type: str
+
+
-
-plot_estimated_observed(feature_idx: int)#
-Create a plot that shows the estimates concentrations of a feature vs the observed concentrations.
+plot_estimated_observed(feature_idx: int = None, feature_name: str = None, show: bool = True)#
+Create a plot that shows the estimated concentrations of a feature vs the observed concentrations.
- Parameters:
-feature_idx (int) – The index of the feature to plot.
+
+feature_idx (int, optional) – The index of the feature to plot.
+feature_name (str, optional) – The name of the feature to plot.
+show (bool) – If True, the plot will be displayed. Default is True.
+
-
-plot_estimated_timeseries(feature_idx: int)#
-Create a plot that shows the estimated values of a timeseries for a specific feature, selected by feature index.
+plot_estimated_timeseries(feature_idx: int = None, feature_name: str = None, show: bool = True)#
+Create a plot that shows the estimated values of a timeseries for a specific feature.
- Parameters:
-feature_idx (int) – The index of the feature to plot.
+
+feature_idx (int, optional) – The index of the feature to plot.
+feature_name (str, optional) – The name of the feature to plot.
+show (bool) – If True, the plot will be displayed. Default is True.
+
@@ -503,13 +560,14 @@ Submodules
-plot_factor_contributions(feature_idx: int, contribution_threshold: float = 0.05)#
+plot_factor_contributions(feature_idx: int, contribution_threshold: float = 0.05, show: bool = True)#
Create a plot of the factor contributions and the normalized contribution.
- Parameters:
feature_idx (int) – The index of the feature to plot.
contribution_threshold (float) – The contribution percentage of a factor above which to include on the plot.
+show (bool) – If True, the plot will be displayed. Default is True.
@@ -517,13 +575,13 @@ Submodules
-plot_factor_fingerprints(grouped: bool = False)#
+plot_factor_fingerprints(grouped: bool = False, show: bool = True)#
Create a stacked bar plot of the factor profile, fingerprints.
-
-plot_factor_profile(factor_idx: int, H: ndarray = None, W: ndarray = None)#
+plot_factor_profile(factor_idx: int, H: ndarray = None, W: ndarray = None, show: bool = True)#
Create a bar plot of a factor profile.
- Parameters:
@@ -531,6 +589,7 @@ Submodules
-
-plot_g_space(factor_1: int, factor_2: int)#
+plot_g_space(factor_1: int, factor_2: int, show: bool = True)#
Create a scatter plot showing a factor contributions vs another factor contributions.
- Parameters:
factor_1 (int) – The index of the factor to plot along the x-axis.
factor_2 (int) – The index of the factor to plot along the y-axis.
+show (bool) – If True, the plot will be displayed. Default is True.
@@ -571,22 +631,18 @@ Submodules
-
-plot_residual_histogram(feature_idx: int, abs_threshold: float = 3.0, est_V: ndarray = None)#
+plot_residual_histogram(feature_idx: int = None, feature_name: str = None, abs_threshold: float = 3.0, est_V: ndarray = None, show: bool = True)#
Create a plot of a histogram of the residuals for a specific feature.
- Parameters:
-feature_idx (int) – The index of the feature for the plot.
+feature_idx (int, optional) – The index of the feature to plot.
+feature_name (str, optional) – The name of the feature to plot.
abs_threshold (float) – The function generates a list of residuals that exceed this limit, the absolute value of the limit.
est_V (np.ndarray) – Overrides the use of the ESAT model’s WH matrix in the residual calculation. Default = None.
+show (bool) – If True, the plot will be displayed. Default is True.
-- Returns:
-The list of residuals that exceed the absolute value of the threshold, as a pd.DataFrame
-
-- Return type:
-pd.DataFrame
-
@@ -597,7 +653,7 @@ Submodulesesat.data.datahandler module#
-
-class esat.data.datahandler.DataHandler(input_path: str, uncertainty_path: str, index_col: str = None, drop_col: list = None, sn_threshold: float = 2.0, load: bool = True)#
+class esat.data.datahandler.DataHandler(input_path: str, uncertainty_path: str, index_col: str = None, drop_col: list = None, drop_nans: bool = True, loc_cols: str | list = None, sn_threshold: float = 2.0, load: bool = True, loc_metadata: dict = None, max_plotting_n: int = 10000)#
Bases: object
The class for cleaning and preparing input datasets for use in ESAT.
The DataHandler class is intended to provide a standardized way of cleaning and preparing data from file to ESAT
@@ -614,11 +670,20 @@
Submodules
+-
+aggregate_output(output_array: ndarray) → DataFrame#
+Aggregate an output numpy array using the same bins/labels as used in _aggregate_data.
+Returns a pandas DataFrame.
+
+
-
get_data()#
@@ -629,7 +694,7 @@ Submodules
-
-static load_dataframe(input_df: DataFrame, uncertainty_df: DataFrame)#
+static load_dataframe(input_df: DataFrame, uncertainty_df: DataFrame)#
Pass in pandas dataframes for the input and uncertainty datasets, instead of using files.
- Parameters:
@@ -647,20 +712,80 @@ Submodules
+-
+merge(data_handlers: list, source_labels: list)#
+Merge a list of DataHandler instances into this DataHandler instance.
+All instances must have the same features as the current instance.
+Adds a ‘source_label’ column indicating the origin of each row.
+
+- Parameters:
+
+data_handlers (list) – A list of DataHandler instances to merge.
+source_labels (list) – A list of labels (str) indicating the source of each DataHandler.
+
+
+- Returns:
+True if merging was successful, otherwise False.
+
+- Return type:
+bool
+
+
+
+
+
+-
+plot_2d_histogram(x_col: str, y_col: str, show: bool = True, nbins: int = 100)#
+Plots a 2D histogram of two features in the input data.
+:param x_col: The name of the feature to plot on the x-axis.
+:type x_col: str
+:param y_col: The name of the feature to plot on the y-axis.
+:type y_col: str
+:param show: Whether to display the plot immediately.
+:type show: bool
+:param nbins: The number of bins to use for the histogram in both x and y dimensions.
+:type nbins: int
+
+- Returns:
+The Plotly figure object containing the 2D histogram.
+
+- Return type:
+Plotly.graph_objects.Figure
+
+
+
+
-
-plot_data_uncertainty(feature_idx)#
-Create a plot of the data vs the uncertainty for a specified feature, by the feature index.
+plot_data_uncertainty(show: bool = True, include_menu: bool = True, feature_idx: int = None)#
+Create a plot of the data vs the uncertainty for a specified feature, with a dropdown menu for feature selection.
+
+
+
+-
+plot_feature_correlation_heatmap(method: str = 'pearson', show: bool = True)#
+Plots a correlation heatmap for the features in the DataFrame.
- Parameters:
-feature_idx (int) – The index of the feature, column, of the input and uncertainty dataset to plot.
+
+df (pd.DataFrame) – The input DataFrame with features as columns.
+method (str) – Correlation method: ‘pearson’, ‘spearman’, or ‘kendall’.
+show (bool) – Whether to display the plot immediately.
+
+
+- Returns:
+The Plotly heatmap figure.
+
+- Return type:
+plotly.graph_objects.Figure
-
-plot_feature_data(x_idx, y_idx)#
+plot_feature_data(x_idx, y_idx, show: bool = True)#
Create a plot of a data feature, column, vs another data feature, column. Specified by the feature indices.
- Parameters:
@@ -674,7 +799,7 @@ Submodules
-
-plot_feature_timeseries(feature_selection)#
+plot_feature_timeseries(feature_selection, show: bool = True)#
Create a plot of a feature, or list of features, as a timeseries.
- Parameters:
@@ -683,6 +808,37 @@ Submodules
+-
+plot_ridgeline(log_x=True, fill=False, max_height=800, min_spacing=0.5, max_spacing=1.5, nbins=500, show=True)#
+Create a ridgeline plot of the feature distributions in the input data.
+
+- Parameters:
+
+log_x (bool) – Whether to use a logarithmic scale for the x-axis.
+fill (bool) – Whether to fill the area under the curves.
+max_height (int) – The maximum height of the plot in pixels.
+min_spacing (float) – The minimum spacing between the ridgelines.
+max_spacing (float) – The maximum spacing between the ridgelines.
+nbins (int) – The number of bins to use for the histogram in the x-axis.
+show (bool) – Whether to display the plot immediately.
+
+
+- Returns:
+The Plotly figure object containing the ridgeline plot.
+
+- Return type:
+plotly.graph_objects.Figure
+
+
+
+
+
+-
+plot_superimposed_histograms(show: bool = True, nbins: int = 50)#
+Plots superimposed histograms for each feature in the input data using a colormap.
+
+
-
set_category(feature: str, category: str = 'strong')#
@@ -705,6 +861,21 @@ Submodules
+-
+split_locations()#
+When the input data has location information, this function returns splits the data and uncertainty into
+separate DataHandler instances for each location.
+
+- Returns:
+A list of DataHandler instances, one for each unique location in the input data.
+
+- Return type:
+list
+
+
+
+
@@ -712,7 +883,7 @@ Submodulesesat.data.test_tools module#
-
-class esat.data.test_tools.CompareAnalyzer(input_df, pmf_profile_df, pmf_contributions_df, ls_profile_df, ws_profile_df, ls_mapping, ws_mapping, ls_contributions_df, ws_contributions_df, features, datetimestamps)#
+class esat.data.test_tools.CompareAnalyzer(input_df, pmf_profile_df, pmf_contributions_df, ls_profile_df, ws_profile_df, ls_mapping, ws_mapping, ls_contributions_df, ws_contributions_df, features, datetimestamps)#
Bases: object
Compare ESAT output with the PMF5 output.
@@ -807,8 +978,11 @@ SubmodulesModelAnalysis
+ModelAnalysis.aggregate_factors_for_plotting()
ModelAnalysis.calculate_statistics()
ModelAnalysis.features_metrics()
+ModelAnalysis.plot_all_factors()
+ModelAnalysis.plot_all_factors_3d()
ModelAnalysis.plot_estimated_observed()
ModelAnalysis.plot_estimated_timeseries()
ModelAnalysis.plot_factor_composition()
@@ -824,12 +998,19 @@ Submodulesesat.data.datahandler module
DataHandler
+DataHandler.aggregate_output()
DataHandler.get_data()
DataHandler.load_dataframe()
+DataHandler.merge()
+DataHandler.plot_2d_histogram()
DataHandler.plot_data_uncertainty()
+DataHandler.plot_feature_correlation_heatmap()
DataHandler.plot_feature_data()
DataHandler.plot_feature_timeseries()
+DataHandler.plot_ridgeline()
+DataHandler.plot_superimposed_histograms()
DataHandler.set_category()
+DataHandler.split_locations()
@@ -871,7 +1052,7 @@ Submodules
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/docs/esat.error.html b/docs/html/docs/esat.error.html
index 5f9652c..0b1610d 100644
--- a/docs/html/docs/esat.error.html
+++ b/docs/html/docs/esat.error.html
@@ -8,7 +8,7 @@
- esat.error package — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ esat.error package — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -28,16 +28,16 @@
-
-
+
+
-
-
+
+
@@ -132,7 +132,7 @@
- Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -392,7 +392,7 @@ Submodulesesat.error.bootstrap module#
-
-class esat.error.bootstrap.Bootstrap(sa: SA, feature_labels: list, model_selected: int = -1, bootstrap_n: int = 20, block_size: int = 10, threshold: float = 0.6, seed: int = None)#
+class esat.error.bootstrap.Bootstrap(sa: SA, feature_labels: list = None, model_selected: int = -1, bootstrap_n: int = 20, block_size: int = 10, threshold: float = 0.6, parallel: bool = True, cpus: int = -1, seed: int = None)#
Bases: object
The Bootstrap (BS) method is used to detect and estimate disproportionate effects of a small set of data samples on
the solution. The BS method assembles dataset by randomly selecting blocks of consecutive samples from the original
@@ -414,6 +414,9 @@
Submodules
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved BS SA pickle file.
- Parameters:
@@ -588,7 +591,7 @@ Submodulesesat.error.bs_disp module#
-
-class esat.error.bs_disp.BSDISP(sa: SA, feature_labels: list, model_selected: int = -1, bootstrap: Bootstrap = None, bootstrap_n: int = 20, block_size: int = 10, threshold: float = 0.6, max_search: int = 50, threshold_dQ: float = 0.1, features: list = None, seed: int = None)#
+class esat.error.bs_disp.BSDISP(sa: SA, feature_labels: list, model_selected: int = -1, bootstrap: Bootstrap = None, bootstrap_n: int = 20, block_size: int = 10, threshold: float = 0.6, max_search: int = 50, threshold_dQ: float = 0.1, features: list = None, seed: int = None, cores: int = 0)#
Bases: object
The Bootstrap-Displacement (BS-DISP) method combines both the Bootstrap and Displacement methods to estimate the
errors with both random and rotational ambiguity. For each BS run/dataset, the DISP method is run on that dataset.
@@ -622,7 +625,7 @@ Submodules
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved BS-DISP SA pickle file.
- Parameters:
@@ -685,12 +688,15 @@ Submodules
-
-run(parallel: bool = True, keep_H: bool = True, reuse_seed: bool = True, block: bool = True, overlapping: bool = False)#
+run(parallel: bool = False, keep_H: bool = True, reuse_seed: bool = True, block: bool = True, overlapping: bool = False)#
Run the BS-DISP error estimation method. If no prior BS run had been completed, this will execute a BS run and
then a DISP for each of the BS runs.
- Parameters:
+parallel (bool) – Used to specify which part of the BS-DISP method to run in parallel. If True, the BS-DISP will run the DISP
+instances in parallel otherwise each instance will be run sequentially with parallelization occuring inside
+each DISP run. Default is False and is more optimal the more features are being analyzed.
keep_H (bool) – When retraining the SA models using the resampled input and uncertainty datasets, keep the base model H
matrix instead of reinitializing. The W matrix is always reinitialized when SA is run on the BS datasets.
Default = True
@@ -739,7 +745,7 @@ Submodulesesat.error.displacement module#
-
-class esat.error.displacement.Displacement(sa: SA, feature_labels: list, model_selected: int = -1, max_search: int = 50, threshold_dQ: float = 0.1, features: list = None)#
+class esat.error.displacement.Displacement(sa: SA, feature_labels: list, model_selected: int = 1, max_search: int = 50, threshold_dQ: float = 0.1, features: list = None, cores: int = -1, parallel: bool = True)#
Bases: object
The displacement method (DISP) for error estimation explores the rotational ambiguity in the solution by assessing
the largest range of source profile values without an appreciable increase in the loss value (Q).
@@ -771,7 +777,7 @@ Submodules
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved DISP SA pickle file.
- Parameters:
@@ -834,7 +840,7 @@ Submodules
-
-run(batch: int = -1)#
+run(batch: int = 1)#
Run the DISP method on the provided SA model.
- Parameters:
@@ -876,7 +882,7 @@ Submodulesesat.error.error module#
-
-class esat.error.error.Error(bs: Bootstrap = None, disp: Displacement = None, bsdisp: BSDISP = None)#
+class esat.error.error.Error(bs: Bootstrap = None, disp: Displacement = None, bsdisp: BSDISP = None)#
Bases: object
Calculate the summary error statistics from bootstrap, displacement and BS-DISP results.
Calculate the combined error summary statistics from various error estimation methods.
@@ -1032,7 +1038,7 @@ Submodules
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/docs/esat.html b/docs/html/docs/esat.html
index 5c3b8cd..f2a96b7 100644
--- a/docs/html/docs/esat.html
+++ b/docs/html/docs/esat.html
@@ -8,7 +8,7 @@
- esat package — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ esat package — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -28,16 +28,16 @@
-
-
+
+
-
-
+
+
@@ -132,7 +132,7 @@
- Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -330,8 +330,9 @@ Contents
esat.utils module
esat.estimator module
@@ -385,8 +386,11 @@ SubpackagesModelAnalysis
+ModelAnalysis.aggregate_factors_for_plotting()
ModelAnalysis.calculate_statistics()
ModelAnalysis.features_metrics()
+ModelAnalysis.plot_all_factors()
+ModelAnalysis.plot_all_factors_3d()
ModelAnalysis.plot_estimated_observed()
ModelAnalysis.plot_estimated_timeseries()
ModelAnalysis.plot_factor_composition()
@@ -402,12 +406,19 @@ Subpackagesesat.data.datahandler module
DataHandler
+DataHandler.aggregate_output()
DataHandler.get_data()
DataHandler.load_dataframe()
+DataHandler.merge()
+DataHandler.plot_2d_histogram()
DataHandler.plot_data_uncertainty()
+DataHandler.plot_feature_correlation_heatmap()
DataHandler.plot_feature_data()
DataHandler.plot_feature_timeseries()
+DataHandler.plot_ridgeline()
+DataHandler.plot_superimposed_histograms()
DataHandler.set_category()
+DataHandler.split_locations()
@@ -495,6 +506,9 @@ SubpackagesBatchSA.train()
+configure_logging()
+is_picklable()
+logging_listener()
esat.model.ls_nmf module
@@ -504,7 +518,6 @@ Subpackagesesat.model.optimization module
esat.model.recombinator module
OptimalBlockLength
OptimalBlockLength.b_star_cb
@@ -619,6 +632,44 @@ Submodulesesat.utils.compare_all_factors(matrix1, matrix2)#
+
+-
+esat.utils.memory_estimate(n_features, n_samples, factors, cores: int = None)#
+Estimate the memory usage of the algorithm.
+
+- Parameters:
+
+n_features – Number of features.
+n_samples – Number of samples.
+factors – Number of factors.
+
+
+- Returns:
+Estimated memory usage in bytes.
+
+- Return type:
+int
+
+
+
+
+
+-
+esat.utils.min_timestep(data: DataFrame)#
+Find the minimum timestep in a dataframe.
+
+- Parameters:
+data – Dataframe to be searched.
+
+- Returns:
+Minimum timestep.
+
+- Return type:
+int
+
+
+
+
-
esat.utils.np_encoder(object)#
@@ -636,17 +687,12 @@ Submodules
--
-esat.utils.solution_bump(profile: ndarray, contribution: ndarray, bump_range: tuple = (0.9, 1.1), seed: int = 42)#
-
-
esat.estimator module#
-
-class esat.estimator.FactorEstimator(V: ndarray, U: ndarray, seed: int = 42, test_percent: float = 0.1, k_coef: float = 1.0)#
+class esat.estimator.FactorEstimator(V: ndarray, U: ndarray, seed: int = 42, test_percent: float = 0.1, k_coef: float = 1.0)#
Bases: object
Factor search uses a Monte Carlo sampling approach for testing different factor counts using cross-validation
testing. Both a train and a test MSE are calculated for each model in the search. These MSE values are averaged
@@ -770,8 +816,9 @@
Submodulesesat.utils module
- esat.estimator module
@@ -326,6 +326,9 @@ Contents
BatchSA.train()
+configure_logging()
+is_picklable()
+logging_listener()
esat.model.ls_nmf module
@@ -335,7 +338,6 @@ Contents
-esat.model.optimization module
esat.model.recombinator module
OptimalBlockLength
OptimalBlockLength.b_star_cb
@@ -386,7 +388,7 @@ Submodulesesat.model.batch_sa module#
-
-class esat.model.batch_sa.BatchSA(V: ndarray, U: ndarray, factors: int, models: int = 20, method: str = 'ls-nmf', seed: int = 42, H: ndarray = None, W: ndarray = None, H_ratio: float = 0.9, init_method: str = 'column_mean', init_norm: bool = True, fuzziness: float = 5.0, max_iter: int = 20000, converge_delta: float = 0.1, converge_n: int = 100, best_robust: bool = True, robust_mode: bool = False, robust_n: int = 200, robust_alpha: float = 4.0, parallel: bool = True, optimized: bool = True, verbose: bool = True)#
+class esat.model.batch_sa.BatchSA(V: ndarray, U: ndarray, factors: int, models: int = 20, method: str = 'ls-nmf', seed: int = 42, H: ndarray = None, W: ndarray = None, init_method: str = 'column_mean', init_norm: bool = True, max_iter: int = 20000, converge_delta: float = 0.1, converge_n: int = 100, best_robust: bool = True, parallel: bool = True, cores: int = None, hold_h: bool = False, delay_h: int = -1, verbose: bool = True, progress_callback: callable = None, use_gpu: bool = False)#
Bases: object
The batch SA class is used to create multiple SA models, using the same input configuration and different
random seeds for initialization of W and H matrices.
@@ -403,30 +405,18 @@ Submodules
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved Batch SA pickle file.
- Parameters:
@@ -499,16 +489,49 @@ Submodules
+-
+esat.model.batch_sa.configure_logging(log_queue)#
+Configures logging for a child process to send log messages to the log queue.
+
+- Parameters:
+log_queue (multiprocessing.Queue) – The queue to send log messages to.
+
+
+
+
+
+-
+esat.model.batch_sa.is_picklable(obj)#
+
+
+
+-
+esat.model.batch_sa.logging_listener(log_queue)#
+Sets up a logging listener to handle log messages from a multiprocessing.Queue.
+
+- Parameters:
+log_queue (multiprocessing.Queue) – The queue to receive log messages from child processes.
+
+- Returns:
+The logging listener that listens for log messages.
+
+- Return type:
+QueueListener
+
+
+
+
esat.model.ls_nmf module#
-
-class esat.model.ls_nmf.LSNMF#
+class esat.model.ls_nmf.LSNMF#
Bases: object
-
-static update(V: ndarray, We: ndarray, W: ndarray, H: ndarray)#
+static update(V: ndarray, We: ndarray, W: ndarray, H: ndarray, hold_h: bool = False)#
The update procedure for the least-squares nmf (ls-nmf) algorithm.
The ls-nmf algorithm is described in the publication ‘LS-NMF: A modified non-negative matrix factorization
algorithm utilizing uncertainty estimates’ (https://doi.org/10.1186/1471-2105-7-175).
@@ -519,6 +542,7 @@ SubmodulesReturns:
@@ -532,15 +556,12 @@ Submodules
-esat.model.optimization module#
esat.model.recombinator module#
-
-class esat.model.recombinator.OptimalBlockLength(b_star_sb, b_star_cb)#
+class esat.model.recombinator.OptimalBlockLength(b_star_sb, b_star_cb)#
Bases: NamedTuple
-
@@ -634,7 +655,7 @@
esat.model.optimization moduleesat.model.sa module#
-
-class esat.model.sa.SA(V: ndarray, U: ndarray, factors: int, method: str = 'ls-nmf', seed: int = 42, optimized: bool = True, parallelized: bool = True, verbose: bool = False)#
+class esat.model.sa.SA(V: ndarray, U: ndarray, factors: int, method: str = 'ls-nmf', seed: int = 42, parallel: bool = True, verbose: bool = False, use_gpu: bool = True)#
Bases: object
The primary Source Apportionment model object which holds and manages the configuration, data,
and meta-data for executing and analyzing ESAT output.
@@ -654,10 +675,7 @@ esat.model.optimization module
-
-initialize(H: ndarray = None, W: ndarray = None, init_method: str = 'column_mean', init_norm: bool = True, fuzziness: float = 5.0, H_ratio: float = 0.9)#
+initialize(H: ndarray = None, W: ndarray = None, init_method: str = 'column_mean', init_norm: bool = True)#
Initialize the factor profile (H) and factor contribution matrices (W).
The W and H matrices can be created using several methods or be passed in by the user. The shapes of these
-matrices are W: (M, factors) and H: (factors: N). There are three methods for initializing the W and H matrices:
+matrices are W: (M, factors) and H: (factors: N). There are two methods for initializing the W and H matrices:
1) K Means Clustering (‘kmeans’), which will cluster the input dataset into the number of factors set, then assign
the contributions of to those factors, the H matrix is calculated from the centroids of those clusters.
-2) Fuzzy C-Means Clustering (‘cmeans’), which will cluster the input dataset in the same way as kmeans but sets
-the contributions based upon the ratio of the distance to the clusters.
-3) A random sampling based upon the square root of the mean of the features (columns), the default method.
+2) A random sampling based upon the square root of the mean of the features (columns), the default method.
- Parameters:
@@ -684,14 +700,10 @@ esat.model.optimization module
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved SA pickle file.
- Parameters:
@@ -747,7 +759,7 @@ esat.model.optimization module
-
-train(max_iter: int = 20000, converge_delta: float = 0.1, converge_n: int = 100, model_i: int = -1, robust_mode: bool = False, robust_n: int = 200, robust_alpha: float = 4, update_step: str = None, bump: bool = False, bump_n: int = 10, bump_range: tuple = (0.9, 1.1))#
+train(max_iter: int = 20000, converge_delta: float = 0.1, converge_n: int = 100, model_i: int = 1, robust_mode: bool = False, robust_n: int = 200, robust_alpha: float = 4, hold_h: bool = False, delay_h: int = -1, update_step: str = None, progress_callback: callable = None)#
Train the SA model by iteratively updating the W and H matrices reducing the loss value Q until convergence.
The train method runs the update algorithm until the convergence criteria is met or the maximum number
of iterations is reached. The stopping conditions are specified by the input parameters to the train method. The
@@ -778,6 +790,8 @@
esat.model.optimization moduleesat.model.ws_nmf module#
-
-class esat.model.ws_nmf.WSNMF#
+class esat.model.ws_nmf.WSNMF#
Bases: object
-
-static update(V: ndarray, We: ndarray, W: ndarray, H: ndarray)#
+static update(V: ndarray, We: ndarray, W: ndarray, H: ndarray)#
Weighted Semi-NMF algorithm.
The details of the semi-nmf algorithm are described in ‘Convex and Semi-Nonnegative Matrix Factorizations’
(https://doi.org/10.1109/TPAMI.2008.277). The algorithm described here does not include the use of uncertainty
@@ -883,6 +897,9 @@
esat.model.optimization moduleBatchSA.train()
+configure_logging()
+is_picklable()
+logging_listener()
esat.model.ls_nmf module
@@ -892,7 +909,6 @@ esat.model.optimization moduleesat.model.optimization module
esat.model.recombinator module
OptimalBlockLength
OptimalBlockLength.b_star_cb
@@ -947,7 +963,7 @@ esat.model.optimization module
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/docs/esat.rotational.html b/docs/html/docs/esat.rotational.html
index aa3792c..6658c9c 100644
--- a/docs/html/docs/esat.rotational.html
+++ b/docs/html/docs/esat.rotational.html
@@ -8,7 +8,7 @@
- esat.rotational package — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ esat.rotational package — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -28,16 +28,16 @@
-
-
+
+
-
-
+
+
@@ -132,7 +132,7 @@
- Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -370,7 +370,7 @@ Submodulesesat.rotational.constrained module#
-
-class esat.rotational.constrained.ConstrainedModel(base_model: SA, data_handler: DataHandler, softness: float = 1.0)#
+class esat.rotational.constrained.ConstrainedModel(base_model: SA, data_handler: DataHandler, softness: float = 1.0)#
Bases: object
The constrained model class that creates an instead of a constrained model using a base SA model.
The constrained model takes as input a previously completed SA instance, the solution of a base model run
@@ -471,7 +471,7 @@
Submodules
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved Constrained SA pickle file.
- Parameters:
@@ -649,7 +649,7 @@ Submodules
-
-class esat.rotational.constrained.Constraint(constraint_type: str, index: tuple, target: str, target_values=None)#
+class esat.rotational.constrained.Constraint(constraint_type: str, index: tuple, target: str, target_values=None)#
Bases: object
The constrained model takes a base SA solution and finds a new solution that applies both constraints and
expressions. The Constraint class objects are created from the add_constraint method in the constrained model class.
@@ -821,7 +821,7 @@
Submodules
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/docs/eval.html b/docs/html/docs/eval.html
index 0593fe7..28348a2 100644
--- a/docs/html/docs/eval.html
+++ b/docs/html/docs/eval.html
@@ -8,7 +8,7 @@
- eval package — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ eval package — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -28,16 +28,16 @@
-
-
+
+
-
-
+
+
@@ -132,7 +132,7 @@
- Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -327,6 +327,11 @@ Contents
FactorCompare.print_results()
+FactorCompareV2
+
eval.simulator module
@@ -365,7 +370,7 @@ Submoduleseval.factor_comparison module#
-
-class eval.factor_comparison.FactorCompare(input_df: DataFrame, uncertainty_df: DataFrame, base_profile_df: DataFrame, base_contribution_df: DataFrame, batch_sa: BatchSA, sa_output_file: str = None, method: str = 'all')#
+class eval.factor_comparison.FactorCompare(input_df: DataFrame, uncertainty_df: DataFrame, base_profile_df: DataFrame, base_contribution_df: DataFrame, batch_sa: BatchSA, sa_output_file: str = None, method: str = 'all')#
Bases: object
Compare the results between a single base solution and a collection of solutions. Used for comparing the output of
ESAT to those generated by PMF5 and by the Simulator for comparing the output of models using synthetic data and
@@ -388,7 +393,7 @@
Submodules
-
-static calculate_correlation(factor1, factor2)#
+static calculate_correlation(factor1, factor2)#
Calculates the correlation between two factors.
@@ -411,7 +416,7 @@ Submodules
-
-static load_pmf_output(factors: int, input_df: DataFrame, uncertainty_df: DataFrame, pmf_profile_file: str, pmf_contribution_file: str, batch_sa: BatchSA)#
+static load_pmf_output(factors: int, input_df: DataFrame, uncertainty_df: DataFrame, pmf_profile_file: str, pmf_contribution_file: str, batch_sa: BatchSA)#
Load the output of a completed PMF5 base model, specifying the profile and contribution files.
- Parameters:
@@ -447,12 +452,52 @@ Submodules
+-
+class eval.factor_comparison.FactorCompareV2(base_model: SA, models: list, in_notebook: bool = False)#
+Bases: object
+Comparing factors between a base model and a list of other models, providing a mapping between the base model and
+each model in the models list.
+
+- Parameters:
+
+base_model (BaseModel) – The base model to compare against.
+models (list) – A list of models to compare against the base model.
+
+
+
+
+-
+calculate_correlation_matrix(in_notebook: bool = False)#
+Correlation matrices are calculated between a reference base model and a collection of separate models
+(independent or perturbed). The correlation metrics used for comparison are implemented as defined in the
+publication https://doi.org/10.1021/es800085t.
+
+- Parameters:
+in_notebook (bool) – If True, the function will display a progress bar formatted for Jupyter notebooks.
+
+
+
+
+
+-
+determine_map(method: str = 'raae')#
+Determine the factor mapping between the base model and a collection of models.
+
+- Parameters:
+method (str) – Correlation method to use, options include: “corr”, “raae”, “emc”.
+
+
+
+
+
+
eval.simulator module#
-
-class eval.simulator.Simulator(seed: int, factors_n: int, features_n: int, samples_n: int, outliers: bool = True, outlier_p: float = 0.1, outlier_mag: float = 2.0, contribution_max: int = 10, noise_mean_min: float = 0.1, noise_mean_max: float = 0.12, noise_scale: float = 0.02, uncertainty_mean_min: float = 0.05, uncertainty_mean_max: float = 0.05, uncertainty_scale: float = 0.01, verbose: bool = True)#
+class eval.simulator.Simulator(seed: int, factors_n: int, features_n: int, samples_n: int, outliers: bool = True, outlier_p: float = 0.1, outlier_mag: float = 2.0, contribution_max: int = 10, noise_mean_min: float = 0.1, noise_mean_max: float = 0.12, noise_scale: float = 0.02, uncertainty_mean_min: float = 0.05, uncertainty_mean_max: float = 0.05, uncertainty_scale: float = 0.01, verbose: bool = True)#
Bases: object
The ESAT Simulator provides methods for generating customized synthetic source profiles and datasets. These
synthetic datasets can then be passed to SA or BatchSA instances. The results of those model runs can be evaluated
@@ -533,7 +578,7 @@
Submodules
-
-static load(file_path: str)#
+static load(file_path: str)#
Load a previously saved ESAT Simulator pickle file.
- Parameters:
@@ -684,6 +729,11 @@ SubmodulesFactorCompare.print_results()
+FactorCompareV2
+
eval.simulator module
@@ -344,8 +344,9 @@ Python APIesat.utils module
esat.estimator module
@@ -398,13 +399,15 @@ Python APISubmodules
esat.model.batch_sa module
esat.model.ls_nmf module
-esat.model.optimization module
esat.model.recombinator module
OptimalBlockLength
lam()
@@ -437,6 +440,7 @@ Python APISubmodules
eval.factor_comparison module
eval.simulator module
@@ -505,7 +509,7 @@ Python API
- © Copyright 2024, EPA.
+ © Copyright 2025, EPA.
diff --git a/docs/html/genindex.html b/docs/html/genindex.html
index d452226..83a12ed 100644
--- a/docs/html/genindex.html
+++ b/docs/html/genindex.html
@@ -7,7 +7,7 @@
- Index — Environmental Source Apportionment Toolkit (ESAT) 2024.0.2 documentation
+ Index — Environmental Source Apportionment Toolkit (ESAT) 2025.0.2 documentation
@@ -27,16 +27,16 @@
-
-
+
+
-
-
+
+
@@ -500,10 +500,14 @@ A
@@ -532,6 +536,8 @@ C