diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b23483f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,51 @@
+# Python bytecode and cache
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual environments
+.venv/
+venv/
+env/
+.python-version
+
+# Environment variables/secrets
+.env
+.env.*
+
+# Build and distribution artifacts
+build/
+dist/
+.eggs/
+*.egg-info/
+
+# Testing and coverage
+.pytest_cache/
+.mypy_cache/
+.tox/
+.nox/
+.coverage
+.coverage.*
+coverage.xml
+htmlcov/
+
+# IDE/editor settings
+.idea/
+.vscode/
+.ropeproject/
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Jupyter
+.ipynb_checkpoints/
+
+# Caches
+.cache/
+
+# PIP and packaging
+pip-wheel-metadata/
+
+# Misc
+*.log
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..dd4c951
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,7 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="PROJECT_PROFILE" value="Default" />
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/ipfn.iml b/.idea/ipfn.iml
new file mode 100644
index 0000000..014f736
--- /dev/null
+++ b/.idea/ipfn.iml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.12 (ipfn)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..a8f3eb3
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (ipfn)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (ipfn)" project-jdk-type="Python SDK" />
+  <component name="PythonCompatibilityInspectionAdvertiser">
+    <option name="version" value="3" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..f6758a3
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/ipfn.iml" filepath="$PROJECT_DIR$/.idea/ipfn.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 0000000..fba81c1
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+import time
+import numpy as np
+import pandas as pd
+from ipfn import ipfn
+
+# Optional Polars import to benchmark polars backend when available
+try:
+    import polars as pl  # type: ignore
+except Exception:
+    pl = None
+
+
+def time_run(name, builder_fn, repeats=1000, algorithms=("legacy", "optimized")):
+    print(f"\n=== {name} ===")
+    times = {algo: [] for algo in algorithms}
+    results = {}
+    for algo in algorithms:
+        for _ in range(repeats):
+            original, aggregates, dimensions, weight_col = builder_fn()
+            # If benchmarking polars and original is pandas, convert to Polars to begin with
+            if (pl is not None) and (algo == 'polars'):
+                if isinstance(original, pd.DataFrame):
+                    try:
+                        original = pl.from_pandas(original)
+                    except Exception:
+                        original = pl.DataFrame(original)
+            IPF = ipfn(original, aggregates, dimensions, convergence_rate=1e-6, algorithm=algo)
+            t0 = time.perf_counter()
+            out = IPF.iteration()
+            t1 = time.perf_counter()
+            times[algo].append(t1 - t0)
+            results[algo] = out
+        print(f"{algo:9s} avg time over {repeats}: {np.mean(times[algo]):.6f}s (min {np.min(times[algo]):.6f}s, max {np.max(times[algo]):.6f}s)")
+    return results
+
+
+def wikipedia_2d_example():
+    def build():
+        m = np.array([[40, 30, 20, 10], [35, 50, 100, 75], [30, 80, 70, 120], [20, 30, 40, 50]], dtype=float)
+        xip = np.array([150, 300, 400, 150], dtype=float)
+        xpj = np.array([200, 300, 400, 100], dtype=float)
+        aggregates = [xip, xpj]
+        dimensions = [[0], [1]]
+        return m.copy(), aggregates, dimensions, 'total'
+    return build
+
+
+def three_d_example():
+    def build():
+        m = np.zeros((2, 4, 3), dtype=float)
+        m[0,0,0] = 1; m[0,0,1] = 2; m[0,0,2] = 1
+        m[0,1,0] = 3; m[0,1,1] = 5; m[0,1,2] = 5
+        m[0,2,0] = 6; m[0,2,1] = 2; m[0,2,2] = 2
+        m[0,3,0] = 1; m[0,3,1] = 7; m[0,3,2] = 2
+        m[1,0,0] = 5; m[1,0,1] = 4; m[1,0,2] = 2
+        m[1,1,0] = 5; m[1,1,1] = 5; m[1,1,2] = 5
+        m[1,2,0] = 3; m[1,2,1] = 8; m[1,2,2] = 7
+        m[1,3,0] = 2; m[1,3,1] = 7; m[1,3,2] = 6
+        xipp = np.array([52, 48], dtype=float)
+        xpjp = np.array([20, 30, 35, 15], dtype=float)
+        xppk = np.array([35, 40, 25], dtype=float)
+        xijp = np.array([[9, 17, 19, 7], [11, 13, 16, 8]], dtype=float)
+        xpjk = np.array([[7, 9, 4], [8, 12, 10], [15, 12, 8], [5, 7, 3]], dtype=float)
+        aggregates = [xipp, xpjp, xppk, xijp, xpjk]
+        dimensions = [[0], [1], [2], [0, 1], [1, 2]]
+        return m.copy(), aggregates, dimensions, 'total'
+    return build
+
+
+def pandas_example():
+    def build():
+        age = [30, 30, 30, 30, 40, 40, 40, 40, 50, 50, 50, 50]
+        distance = [10, 20, 30, 40, 10, 20, 30, 40, 10, 20, 30, 40]
+        m = [8., 4., 6., 7., 3., 6., 5., 2., 9., 11., 3., 1.]
+        df = pd.DataFrame({'age': age, 'distance': distance, 'total': m})
+        xip = df.groupby('age')['total'].sum()
+        xip.loc[30] = 20
+        xip.loc[40] = 18
+        xip.loc[50] = 22
+        xpj = df.groupby('distance')['total'].sum()
+        xpj.loc[10] = 18
+        xpj.loc[20] = 16
+        xpj.loc[30] = 12
+        xpj.loc[40] = 14
+        dimensions = [['age'], ['distance']]
+        aggregates = [xip, xpj]
+        return df.copy(), aggregates, dimensions, 'total'
+    return build
+
+
+def main():
+    print("Benchmarking IPFN: legacy vs optimized (+ polars if available)")
+    time_run('Wikipedia 2D (NumPy)', wikipedia_2d_example(), repeats=10)
+    time_run('3D example (NumPy)', three_d_example(), repeats=10)
+    algos_pd = ("legacy", "optimized") + (("polars",) if pl is not None else tuple())
+    time_run('Pandas example', pandas_example(), repeats=10, algorithms=algos_pd)
+    # New required 6D/1500-rows benchmark with 100 repetitions and time/CPU/RAM metrics
+    benchmark_6d_1500(repeats=100)
+
+
+if __name__ == '__main__':
+    main()
+
+
+# ---- 6D 1500-rows benchmark with time/CPU/RAM measurements ----
+
+def _build_6d_dataset_1500(seed: int = 42):
+    import numpy as np
+    import pandas as pd
+
+    rng = np.random.RandomState(seed)
+    n_rows = 1500
+    # Define 6 dimensions with varying number of categories
+    dims = {
+        'd1': [f'a{i}' for i in range(5)],
+        'd2': [f'b{i}' for i in range(5)],
+        'd3': [f'c{i}' for i in range(5)],
+        'd4': [f'd{i}' for i in range(5)],
+        'd5': [f'e{i}' for i in range(3)],
+        'd6': [f'f{i}' for i in range(4)],
+    }
+    data = {}
+    for k, cats in dims.items():
+        data[k] = rng.choice(cats, size=n_rows, replace=True)
+    # Positive weights
+    data['total'] = rng.gamma(shape=2.0, scale=5.0, size=n_rows).astype(float)
+    df = pd.DataFrame(data)
+
+    # Build single-dimension marginals with a small random perturbation and rescale to preserve total
+    total_sum = df['total'].sum()
+    aggregates = []
+    dimensions = []
+    for k in dims.keys():
+        grp = df.groupby(k)['total'].sum()
+        # Perturb each group by up to ±10%
+        noise = rng.uniform(0.9, 1.1, size=len(grp))
+        target = grp.values * noise
+        # Rescale to match original total sum for stability
+        target *= (total_sum / target.sum())
+        agg = pd.Series(target, index=grp.index, name='total')
+        aggregates.append(agg)
+        dimensions.append([k])
+
+    return df, aggregates, dimensions, 'total'
+
+
+def _measure_ipfn_run(algo: str, original, aggregates, dimensions, weight_col='total'):
+    import time
+    import copy as _copy
+    import tracemalloc
+
+    # Optional psutil import
+    try:
+        import psutil  # type: ignore
+        proc = psutil.Process()
+    except Exception:
+        psutil = None  # type: ignore
+        proc = None
+
+    # Prepare input per algorithm (convert to Polars if requested and available)
+    local_original = original.copy(deep=True) if hasattr(original, 'copy') else _copy.deepcopy(original)
+    if (pl is not None) and (algo == 'polars'):
+        try:
+            import polars as _pl  # type: ignore
+            if not isinstance(local_original, _pl.DataFrame):
+                try:
+                    local_original = _pl.from_pandas(local_original)
+                except Exception:
+                    local_original = _pl.DataFrame(local_original)
+        except Exception:
+            pass
+
+    # Measurements
+    cpu_t0 = time.process_time()
+    wall_t0 = time.perf_counter()
+
+    tracemalloc.start()
+    rss_before = proc.memory_info().rss if proc else None
+
+    IPF = ipfn(local_original, _copy.deepcopy(aggregates), _copy.deepcopy(dimensions), convergence_rate=1e-6, algorithm=algo)
+    _ = IPF.iteration()
+
+    current, peak = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+
+    wall_t1 = time.perf_counter()
+    cpu_t1 = time.process_time()
+    rss_after = proc.memory_info().rss if proc else None
+
+    return {
+        'wall_time_s': wall_t1 - wall_t0,
+        'cpu_time_s': cpu_t1 - cpu_t0,
+        'peak_tracemalloc_bytes': int(peak),
+        'rss_before_bytes': int(rss_before) if rss_before is not None else None,
+        'rss_after_bytes': int(rss_after) if rss_after is not None else None,
+        'rss_delta_bytes': (int(rss_after) - int(rss_before)) if (rss_before is not None and rss_after is not None) else None,
+    }
+
+
+def benchmark_6d_1500(repeats: int = 100):
+    print("\nBenchmark: 6D dataset with 1500 rows, 100 runs per algorithm")
+    # Determine algorithms: always legacy and optimized; include polars when available
+    algos = ["legacy", "optimized"] + (["polars"] if pl is not None else [])
+
+    original, aggregates, dimensions, weight_col = _build_6d_dataset_1500()
+
+    import numpy as _np
+    results_summary = {}
+    for algo in algos:
+        wall_times = []
+        cpu_times = []
+        peaks = []
+        rss_deltas = []
+        rss_after = []
+        for _ in range(repeats):
+            m = _measure_ipfn_run(algo, original, aggregates, dimensions, weight_col)
+            wall_times.append(m['wall_time_s'])
+            cpu_times.append(m['cpu_time_s'])
+            peaks.append(m['peak_tracemalloc_bytes'])
+            if m['rss_delta_bytes'] is not None:
+                rss_deltas.append(m['rss_delta_bytes'])
+            if m['rss_after_bytes'] is not None:
+                rss_after.append(m['rss_after_bytes'])
+        summary = {
+            'wall_time_avg_s': float(_np.mean(wall_times)),
+            'wall_time_min_s': float(_np.min(wall_times)),
+            'wall_time_max_s': float(_np.max(wall_times)),
+            'cpu_time_avg_s': float(_np.mean(cpu_times)),
+            'peak_tracemalloc_avg_bytes': int(_np.mean(peaks)),
+        }
+        if rss_deltas:
+            summary['rss_delta_avg_bytes'] = float(_np.mean(rss_deltas))
+        if rss_after:
+            summary['rss_after_avg_bytes'] = float(_np.mean(rss_after))
+        results_summary[algo] = summary
+
+        # Pretty print summary
+        print(f"{algo:9s} | wall avg {summary['wall_time_avg_s']:.6f}s (min {summary['wall_time_min_s']:.6f}, max {summary['wall_time_max_s']:.6f}) | "
+              f"cpu avg {summary['cpu_time_avg_s']:.6f}s | peak_py_mem avg {summary['peak_tracemalloc_avg_bytes']/1_048_576:.3f} MiB" +
+              (f" | rss_delta avg {summary['rss_delta_avg_bytes']/1_048_576:.3f} MiB | rss_after avg {summary['rss_after_avg_bytes']/1_048_576:.3f} MiB" if 'rss_delta_avg_bytes' in summary else ""))
+
+    return results_summary
diff --git a/ipfn.egg-info/PKG-INFO b/ipfn.egg-info/PKG-INFO
new file mode 100644
index 0000000..dade528
--- /dev/null
+++ b/ipfn.egg-info/PKG-INFO
@@ -0,0 +1,239 @@
+Metadata-Version: 2.4
+Name: ipfn
+Version: 1.4.4
+Summary: Iterative Proportional Fitting with N dimensions, for python
+Home-page: https://github.com/Dirguis/ipfn.git
+Author: Damien Forthommme
+Author-email: damien2227@hotmail.com
+License: MIT
+Keywords: iterative proportional fitting ipfp biproportional ras raking scaling
+Platform: Any
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Software Development :: Build Tools
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Requires-Dist: pandas>=0.22.0
+Requires-Dist: numpy
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: platform
+Dynamic: requires-dist
+Dynamic: summary
+
+ipfn
+=======================
+
+Iterative proportional fitting is an algorithm used is many different fields such as economics or social sciences, to alter results in such a way that aggregates along one or several dimensions match known marginals (or aggregates along these same dimensions).
+
+The algorithm exists in 2 versions:
+
+*   numpy version, which the fastest by far
+*   pandas version, which is much slower but easier to use than the numpy version
+
+
+The algorithm recognizes the input variable type and and uses the appropriate version to solve the problem. To install the package:
+
+*   pip install ipfn
+*   pip install git+http://github.com/dirguis/ipfn@master
+
+For more information and examples, please visit:
+
+*   `wikipedia page on ipf <https://en.wikipedia.org/wiki/Iterative_proportional_fitting>`_
+*   `slides explaining the methodology and links to specific examples <http://www.demog.berkeley.edu/~eddieh/IPFDescription/AKDOLWDIPFTWOD.pdf>`_
+*   https://github.com/Dirguis/ipfn
+
+----
+
+If you want to test the package, clone the repo and from the main folder, run:
+
+*   py.test --verbose --color=yes tests/tests.py
+
+----
+
+The project is similar to the ipfp package available for R and tests have been run to ensure same results.
+
+----
+
+Input Variables:
+  * original: numpy darray matrix or dataframe to perform the ipfn on.
+  * aggregates: list of numpy array or darray or pandas dataframe/series. The aggregates are the same as the marginals.
+They are the target values that we want along one or several axis when aggregating along one or several axes.
+  * dimensions: list of lists with integers if working with numpy objects, or column names if working with pandas objects.
+Preserved dimensions along which we sum to get the corresponding aggregates.
+  * convergence_rate: if there are many aggregates/marginal, it could be useful to loosen the convergence criterion.
+  * max_iteration: Integer. Maximum number of iterations allowed.
+  * verbose: integer 0, 1 or 2. Each case number includes the outputs of the previous case numbers.
+
+    * 0: Updated matrix returned.
+
+    * 1: Flag with the output status (0 for failure and 1 for success).
+
+    * 2: dataframe with iteration numbers and convergence rate information at all steps.
+
+  * rate_tolerance: float value. If above 0.0, like 0.001, the algorithm will stop once the difference between the conv_rate variable of 2 consecutive iterations is below that specified value.
+
+
+Wikipedia example with Numpy:
+-----------------------------
+To illustrate Iterative Proportional Fitting, Wikipedia uses an example `here <https://en.wikipedia.org/wiki/Iterative_proportional_fitting#Example>`_
+
+Below is that example solved with IPFN::
+
+    import numpy as np
+    from ipfn import ipfn
+
+    m = [[40, 30, 20, 10], [35, 50, 100, 75], [30, 80, 70, 120], [20, 30, 40, 50]]
+    m = np.array(m)
+    xip = np.array([150, 300, 400, 150])
+    xpj = np.array([200, 300, 400, 100])
+
+    aggregates = [xip, xpj]
+    dimensions = [[0], [1]]
+
+    IPF = ipfn.ipfn(m, aggregates, dimensions, convergence_rate=1e-6)
+    m = IPF.iteration()
+    print(m)
+
+
+Example with the numpy version of the algorithm:
+------------------------------------------------
+Please, follow the example below to run the package. Several additional examples in addition to the one listed below, are listed in the ipfn.py script. This example is taken from `<http://www.demog.berkeley.edu/~eddieh/IPFDescription/AKDOLWDIPFTHREED.pdf>`_
+
+First, let us define a matrix of N=3 dimensions, the matrix being of specific size 2*4*3 and populate that matrix with some values ::
+
+    from ipfn import ipfn
+    import numpy as np
+    import pandas as pd
+
+    m = np.zeros((2,4,3))
+    m[0,0,0] = 1
+    m[0,0,1] = 2
+    m[0,0,2] = 1
+    m[0,1,0] = 3
+    m[0,1,1] = 5
+    m[0,1,2] = 5
+    m[0,2,0] = 6
+    m[0,2,1] = 2
+    m[0,2,2] = 2
+    m[0,3,0] = 1
+    m[0,3,1] = 7
+    m[0,3,2] = 2
+    m[1,0,0] = 5
+    m[1,0,1] = 4
+    m[1,0,2] = 2
+    m[1,1,0] = 5
+    m[1,1,1] = 5
+    m[1,1,2] = 5
+    m[1,2,0] = 3
+    m[1,2,1] = 8
+    m[1,2,2] = 7
+    m[1,3,0] = 2
+    m[1,3,1] = 7
+    m[1,3,2] = 6
+
+Now, let us define some marginals::
+
+  xipp = np.array([52, 48])
+  xpjp = np.array([20, 30, 35, 15])
+  xppk = np.array([35, 40, 25])
+  xijp = np.array([[9, 17, 19, 7], [11, 13, 16, 8]])
+  xpjk = np.array([[7, 9, 4], [8, 12, 10], [15, 12, 8], [5, 7, 3]])
+
+I used the letter p to denote the dimension(s) being summed over
+
+For this specific example, they all have to be less than N=3 dimensions and be consistent with the dimensions of contingency table m. For example, the marginal along the first dimension will be made of 2 elements. We want the sum of elements in m for dimensions 2 and 3 to equal the marginal::
+
+    m[0,:,:].sum() == xipp[0]
+    m[1,:,:].sum() == xipp[1]
+
+Define the aggregates list and the corresponding list of dimension to indicate the algorithm which dimension(s) to sum over for each aggregate::
+
+    aggregates = [xipp, xpjp, xppk, xijp, xpjk]
+    dimensions = [[0], [1], [2], [0, 1], [1, 2]]
+
+Finally, run the algorithm::
+
+    IPF = ipfn.ipfn(m, aggregates, dimensions)
+    m = IPF.iteration()
+    print(xijp[0,0])
+    print(m[0, 0, :].sum())
+
+
+Example with the pandas version of the algorithm:
+-------------------------------------------------
+In the same fashion, we can run a similar example, but using a dataframe::
+
+    from ipfn import ipfn
+    import numpy as np
+    import pandas as pd
+
+    m      = np.array([1., 2., 1., 3., 5., 5., 6., 2., 2., 1., 7., 2.,
+                   5., 4., 2., 5., 5., 5., 3., 8., 7., 2., 7., 6.], )
+    dma_l  = [501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501,
+              502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502]
+    size_l = [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4,
+              1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]
+
+    age_l  = ['20-25','30-35','40-45',
+              '20-25','30-35','40-45',
+              '20-25','30-35','40-45',
+              '20-25','30-35','40-45',
+              '20-25','30-35','40-45',
+              '20-25','30-35','40-45',
+              '20-25','30-35','40-45',
+              '20-25','30-35','40-45']
+
+    df = pd.DataFrame()
+    df['dma'] = dma_l
+    df['size'] = size_l
+    df['age'] = age_l
+    df['total'] = m
+
+    xipp = df.groupby('dma')['total'].sum()
+    xpjp = df.groupby('size')['total'].sum()
+    xppk = df.groupby('age')['total'].sum()
+    xijp = df.groupby(['dma', 'size'])['total'].sum()
+    xpjk = df.groupby(['size', 'age'])['total'].sum()
+    # xppk = df.groupby('age')['total'].sum()
+
+    xipp.loc[501] = 52
+    xipp.loc[502] = 48
+
+    xpjp.loc[1] = 20
+    xpjp.loc[2] = 30
+    xpjp.loc[3] = 35
+    xpjp.loc[4] = 15
+
+    xppk.loc['20-25'] = 35
+    xppk.loc['30-35'] = 40
+    xppk.loc['40-45'] = 25
+
+    xijp.loc[501] = [9, 17, 19, 7]
+    xijp.loc[502] = [11, 13, 16, 8]
+
+    xpjk.loc[1] = [7, 9, 4]
+    xpjk.loc[2] = [8, 12, 10]
+    xpjk.loc[3] = [15, 12, 8]
+    xpjk.loc[4] = [5, 7, 3]
+
+    aggregates = [xipp, xpjp, xppk, xijp, xpjk]
+    dimensions = [['dma'], ['size'], ['age'], ['dma', 'size'], ['size', 'age']]
+
+    IPF = ipfn.ipfn(df, aggregates, dimensions)
+    df = IPF.iteration()
+
+    print(df)
+    print(df.groupby('size')['total'].sum(), xpjp)
+
+Added notes:
+------------
+
+To call the algorithm in a program, execute::
+
+    from ipfn import ipfn
diff --git a/ipfn.egg-info/SOURCES.txt b/ipfn.egg-info/SOURCES.txt
new file mode 100644
index 0000000..3bd2981
--- /dev/null
+++ b/ipfn.egg-info/SOURCES.txt
@@ -0,0 +1,12 @@
+MANIFEST.in
+README.rst
+setup.cfg
+setup.py
+ipfn/__init__.py
+ipfn/ipfn.py
+ipfn.egg-info/PKG-INFO
+ipfn.egg-info/SOURCES.txt
+ipfn.egg-info/dependency_links.txt
+ipfn.egg-info/requires.txt
+ipfn.egg-info/top_level.txt
+tests/tests.py
\ No newline at end of file
diff --git a/ipfn.egg-info/dependency_links.txt b/ipfn.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/ipfn.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/ipfn.egg-info/requires.txt b/ipfn.egg-info/requires.txt
new file mode 100644
index 0000000..de86843
--- /dev/null
+++ b/ipfn.egg-info/requires.txt
@@ -0,0 +1,2 @@
+pandas>=0.22.0
+numpy
diff --git a/ipfn.egg-info/top_level.txt b/ipfn.egg-info/top_level.txt
new file mode 100644
index 0000000..ecb7b5f
--- /dev/null
+++ b/ipfn.egg-info/top_level.txt
@@ -0,0 +1 @@
+ipfn
diff --git a/ipfn/__init__.py b/ipfn/__init__.py
index b2ac7d2..5a54c47 100644
--- a/ipfn/__init__.py
+++ b/ipfn/__init__.py
@@ -1,3 +1,3 @@
-from ipfn import ipfn
+from .ipfn import ipfn
 
 __all__ = ['ipfn']
diff --git a/ipfn/ipfn.py b/ipfn/ipfn.py
index 6b2181b..a09b9cb 100755
--- a/ipfn/ipfn.py
+++ b/ipfn/ipfn.py
@@ -5,11 +5,17 @@
 from itertools import product
 import copy
 
+# Optional Polars support
+try:
+    import polars as pl  # type: ignore
+except Exception:  # pragma: no cover - polars optional at runtime
+    pl = None
+
 
 class ipfn(object):
 
     def __init__(self, original, aggregates, dimensions, weight_col='total',
-                 convergence_rate=1e-5, max_iteration=500, verbose=0, rate_tolerance=1e-8):
+                 convergence_rate=1e-5, max_iteration=500, verbose=0, rate_tolerance=1e-8, algorithm='optimized'):
         """
         Initialize the ipfn class
 
@@ -32,6 +38,8 @@ def __init__(self, original, aggregates, dimensions, weight_col='total',
 
         rate_tolerance: float value. If above 0.0, like 0.001, the algorithm will stop once the difference between the conv_rate variable of 2 consecutive iterations is below that specified value
 
+        algorithm: 'optimized' (default) uses vectorized numpy implementation; 'legacy' uses the original slice-iteration algorithm (slower). Only affects numpy path.
+
         For examples, please open the ipfn script or look for help on functions ipfn_np and ipfn_df
         """
         self.original = original
@@ -44,6 +52,9 @@ def __init__(self, original, aggregates, dimensions, weight_col='total',
             raise(ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}"))
         self.verbose = verbose
         self.rate_tolerance = rate_tolerance
+        if algorithm not in ('optimized', 'legacy', 'polars'):
+            raise(ValueError(f"algorithm must be 'optimized', 'legacy' or 'polars' but got {algorithm}"))
+        self.algorithm = algorithm
 
     @staticmethod
     def index_axis_elem(dims, axes, elems):
@@ -58,20 +69,9 @@ def index_axis_elem(dims, axes, elems):
                     idx += (np.s_[:],)
         return idx
 
-    def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
+    def _ipfn_np_legacy(self, m, aggregates, dimensions, weight_col='total'):
         """
-        Runs the ipfn method from a matrix m, aggregates/marginals and the dimension(s) preserved.
-        For example:
-        from ipfn import ipfn
-        import numpy as np
-        m = np.array([[8., 4., 6., 7.], [3., 6., 5., 2.], [9., 11., 3., 1.]], )
-        xip = np.array([20., 18., 22.])
-        xpj = np.array([18., 16., 12., 14.])
-        aggregates = [xip, xpj]
-        dimensions = [[0], [1]]
-
-        IPF = ipfn(m, aggregates, dimensions)
-        m = IPF.iteration()
+        Legacy numpy implementation retained for benchmarking.
         """
 
         # Check that the inputs are numpay arrays of floats
@@ -93,13 +93,10 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
         dim = len(m.shape)
         product_elem = []
         tables = [m]
-        # TODO: do we need to persist all these dataframe? Or maybe we just need to persist the table_update and table_current
-        # and then update the table_current to the table_update to the latest we have. And create an empty zero dataframe for table_update (Evelyn)
         for inc in range(steps - 1):
             tables.append(np.array(np.zeros(m.shape)))
         original = copy.copy(m)
 
-        # Calculate the new weights for each dimension
         for inc in range(steps):
             if inc == (steps - 1):
                 table_update = m
@@ -113,28 +110,16 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
                 idx = self.index_axis_elem(dim, dimensions[inc], item)
                 table_current_slice = table_current[idx]
                 mijk = table_current_slice.sum()
-                # TODO: Directly put it as xijk = aggregates[inc][item] (Evelyn)
                 xijk = aggregates[inc]
                 xijk = xijk[item]
                 if mijk == 0:
-                    # table_current_slice += 1e-5
-                    # TODO: Basically, this part would remain 0 as always right? Cause if the sum of the slice is zero, then we only have zeros in this slice.
-                    # TODO: you could put it as table_update[idx] = table_current_slice (since multiplication on zero is still zero)
                     table_update[idx] = table_current_slice
                 else:
-                    # TODO: when inc == steps - 1, this part is also directly updating the dataframe m (Evelyn)
-                    # If we are not going to persist every table generated, we could still keep this part to directly update dataframe m
                     table_update[idx] = table_current_slice * 1.0 * xijk / mijk
-                # For debug purposes
-                # if np.isnan(table_update).any():
-                #     print(idx)
-                #     sys.exit(0)
             product_elem = []
 
-        # Check the convergence rate for each dimension
         max_conv = 0
         for inc in range(steps):
-            # TODO: this part already generated before, we could somehow persist it. But it's not important (Evelyn)
             for dimension in dimensions[inc]:
                 product_elem.append(range(m.shape[dimension]))
             for item in product(*product_elem):
@@ -142,46 +127,80 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
                 ori_ijk = aggregates[inc][item]
                 m_slice = m[idx]
                 m_ijk = m_slice.sum()
-                # print('Current vs original', abs(m_ijk/ori_ijk - 1))
                 if abs(m_ijk / ori_ijk - 1) > max_conv:
                     max_conv = abs(m_ijk / ori_ijk - 1)
-
             product_elem = []
 
         return m, max_conv
 
-    def ipfn_df(self, df, aggregates, dimensions, weight_col='total'):
+    def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
         """
-        Runs the ipfn method from a dataframe df, aggregates/marginals and the dimension(s) preserved.
-        For example:
-        from ipfn import ipfn
-        import pandas as pd
-        age = [30, 30, 30, 30, 40, 40, 40, 40, 50, 50, 50, 50]
-        distance = [10,20,30,40,10,20,30,40,10,20,30,40]
-        m = [8., 4., 6., 7., 3., 6., 5., 2., 9., 11., 3., 1.]
-        df = pd.DataFrame()
-        df['age'] = age
-        df['distance'] = distance
-        df['total'] = m
-
-        xip = df.groupby('age')['total'].sum()
-        xip.loc[30] = 20
-        xip.loc[40] = 18
-        xip.loc[50] = 22
-        xpj = df.groupby('distance')['total'].sum()
-        xpj.loc[10] = 18
-        xpj.loc[20] = 16
-        xpj.loc[30] = 12
-        xpj.loc[40] = 14
-        dimensions = [['age'], ['distance']]
-        aggregates = [xip, xpj]
-
-        IPF = ipfn(df, aggregates, dimensions)
-        df = IPF.iteration()
-
-        print(df)
-        print(df.groupby('age')['total'].sum(), xip)"""
+        Optimized numpy implementation using vectorized broadcasting.
+        """
+        # Ensure numpy arrays of floats
+        for i, aggregate in enumerate(aggregates):
+            if not isinstance(aggregate, np.ndarray):
+                aggregates[i] = np.array(aggregate, dtype=float)
+            elif aggregate.dtype not in [float, float]:
+                aggregates[i] = aggregate.astype(float)
+        if not isinstance(m, np.ndarray):
+            m = np.array(m, dtype=float)
+        elif m.dtype not in [float, float]:
+            m = m.astype(float)
 
+        steps = len(aggregates)
+        dim = m.ndim
+
+        # Main proportional fitting passes
+        for inc in range(steps):
+            axes_keep = dimensions[inc]
+            # axes to sum (all except axes_keep)
+            sum_axes = tuple(ax for ax in range(dim) if ax not in axes_keep)
+
+            # Current marginal sums with dimensions kept for broadcasting
+            denom = m.sum(axis=sum_axes, keepdims=True)
+
+            # Target reshaped for broadcasting on original m shape
+            target = np.array(aggregates[inc], dtype=float)
+            reshape_shape = [1] * dim
+            for k, ax in enumerate(axes_keep):
+                reshape_shape[ax] = target.shape[k]
+            target_b = target.reshape(reshape_shape)
+
+            # Compute ratio into denom buffer to avoid extra allocation
+            np.divide(target_b, denom, out=denom, where=denom != 0)
+            # When denom == 0, keep ratio 1.0 (no change)
+            if np.any(denom == 0):
+                denom[denom == 0] = 1.0
+
+            # Scale m in-place
+            m *= denom
+
+        # Convergence computation
+        max_conv = 0.0
+        for inc in range(steps):
+            axes_keep = dimensions[inc]
+            sum_axes = tuple(ax for ax in range(dim) if ax not in axes_keep)
+            current = m.sum(axis=sum_axes)
+            target = np.array(aggregates[inc], dtype=float)
+            with np.errstate(divide='ignore', invalid='ignore'):
+                diff = np.abs(current / target - 1.0)
+            if np.any(target == 0):
+                # If target==0 and current>0, set inf; if both 0, set 0
+                zmask = target == 0
+                zval = np.where(current == 0, 0.0, np.inf)
+                diff = np.where(zmask, zval, diff)
+            # nanmax handles potential NaNs from 0/0
+            local_max = np.nanmax(diff)
+            if local_max > max_conv:
+                max_conv = float(local_max)
+
+        return m, max_conv
+
+    def _ipfn_df_legacy(self, df, aggregates, dimensions, weight_col='total'):
+        """
+        Legacy pandas implementation retained for benchmarking.
+        """
         steps = len(aggregates)
         tables = [df]
         for inc in range(steps - 1):
@@ -235,7 +254,6 @@ def ipfn_df(self, df, aggregates, dimensions, weight_col='total'):
             table_update.reset_index(inplace=True)
             table_current.reset_index(inplace=True)
             inc += 1
-            feat_l = []
 
         # Calculate the max convergence rate
         max_conv = 0
@@ -250,9 +268,140 @@ def ipfn_df(self, df, aggregates, dimensions, weight_col='total'):
 
         return table_update, max_conv
 
+    def ipfn_df(self, df, aggregates, dimensions, weight_col='total'):
+        """
+        Runs the ipfn method from a dataframe df, aggregates/marginals and the dimension(s) preserved.
+        Vectorized implementation to reduce Python-level loops and memory usage.
+        """
+        steps = len(aggregates)
+
+        # Calculate the new weights for each dimension directly on df
+        for inc, features in enumerate(dimensions):
+            # Current group sums
+            tmp = df.groupby(features, sort=False)[weight_col].sum()
+            xijk = aggregates[inc]
+            # Align target to current group index if needed
+            if isinstance(xijk, (pd.Series, pd.DataFrame)):
+                if isinstance(xijk, pd.DataFrame):
+                    if xijk.shape[1] != 1:
+                        raise(ValueError('Aggregate DataFrame must have a single column'))
+                    xijk = xijk.iloc[:, 0]
+                xijk_aligned = xijk.reindex(tmp.index)
+            else:
+                xijk_aligned = pd.Series(np.array(xijk, dtype=float).ravel(), index=tmp.index)
+
+            ratio = pd.Series(1.0, index=tmp.index, dtype=float)
+            nonzero = tmp != 0
+            ratio.loc[nonzero] = (xijk_aligned.loc[nonzero].astype(float) / tmp.loc[nonzero].astype(float))
+
+            ratio_df = ratio.rename('ipf_ratio').reset_index()
+            merged = df.merge(ratio_df, on=features, how='left', sort=False)
+            df[weight_col] = merged[weight_col].astype(float) * merged['ipf_ratio'].astype(float)
+
+        # Calculate the max convergence rate
+        max_conv = 0
+        for inc, features in enumerate(dimensions):
+            tmp = df.groupby(features, sort=False)[weight_col].sum()
+            ori_ijk = aggregates[inc]
+            if isinstance(ori_ijk, pd.DataFrame):
+                ori_ijk = ori_ijk.iloc[:, 0]
+            if isinstance(ori_ijk, pd.Series):
+                ori_ijk = ori_ijk.reindex(tmp.index)
+            with np.errstate(divide='ignore', invalid='ignore'):
+                temp_conv = np.nanmax(np.abs(tmp.values / np.array(ori_ijk, dtype=float) - 1))
+            if temp_conv > max_conv:
+                max_conv = temp_conv
+
+        return df, max_conv
+
+    @staticmethod
+    def _aggregate_to_pl_df(xijk, features, target_col: str = 'target'):
+        """Convert aggregate (pandas Series/DataFrame or numpy) to a Polars DataFrame with given feature columns and target column."""
+        if pl is None:
+            raise RuntimeError("Polars is not available")
+        # pandas Series/DataFrame path
+        if isinstance(xijk, pd.DataFrame):
+            if xijk.shape[1] != 1:
+                raise(ValueError('Aggregate DataFrame must have a single column'))
+            xijk = xijk.iloc[:, 0]
+        if isinstance(xijk, pd.Series):
+            df_pd = xijk.rename(target_col).reset_index()
+            # Ensure columns order matches features then target
+            # If features do not match, rely on column names after reset_index
+            # Rename target to target_col already done
+            return pl.DataFrame(df_pd)
+        # numpy array path (fallback): build cartesian from features unique values is not available here
+        # We only support Series/DataFrame aggregates for DataFrame backend
+        arr = np.array(xijk, dtype=float).ravel()
+        # Create a single dummy column when features is single and length matches
+        if len(features) == 1:
+            return pl.DataFrame({features[0]: range(len(arr)), target_col: arr})
+        else:
+            raise ValueError("When using polars backend with DataFrame inputs, aggregates should be pandas Series/DataFrame")
+
+    def ipfn_pl(self, df_pl, aggregates, dimensions, weight_col='total'):
+        """
+        Polars-based implementation of IPFN for tabular inputs.
+        Accepts a Polars DataFrame and aggregates as pandas Series/DataFrame or Polars DataFrames.
+        Returns a Polars DataFrame and the max convergence.
+        """
+        if pl is None:
+            raise RuntimeError("Polars is not installed. Please install polars to use algorithm='polars'.")
+
+        steps = len(aggregates)
+        df = df_pl
+        # Ensure weight column is float
+        if weight_col not in df.columns:
+            raise ValueError(f"weight_col '{weight_col}' not found in DataFrame")
+        df = df.with_columns(pl.col(weight_col).cast(pl.Float64))
+
+        for inc, features in enumerate(dimensions):
+            # Group sums
+            grouped = df.group_by(features).agg(pl.col(weight_col).sum().alias('sum_w'))
+            # Target as Polars
+            xijk = aggregates[inc]
+            if isinstance(xijk, pl.DataFrame):
+                target_df = xijk.rename({xijk.columns[-1]: 'target'})
+            else:
+                target_df = self._aggregate_to_pl_df(xijk, features, target_col='target')
+            # Join and compute ratio
+            ratio_tbl = grouped.join(target_df, on=features, how='left')
+            ratio_tbl = ratio_tbl.with_columns(
+                pl.when(pl.col('sum_w') != 0)
+                  .then(pl.col('target').cast(pl.Float64) / pl.col('sum_w'))
+                  .otherwise(1.0)
+                  .alias('ipf_ratio')
+            ).select(features + ['ipf_ratio'])
+            # Apply ratio to weights
+            df = df.join(ratio_tbl, on=features, how='left')
+            df = df.with_columns((pl.col(weight_col) * pl.col('ipf_ratio')).alias(weight_col))
+            df = df.drop('ipf_ratio')
+
+        # Convergence computation
+        max_conv = 0.0
+        for inc, features in enumerate(dimensions):
+            grouped = df.group_by(features).agg(pl.col(weight_col).sum().alias('sum_w'))
+            xijk = aggregates[inc]
+            if isinstance(xijk, pl.DataFrame):
+                target_df = xijk.rename({xijk.columns[-1]: 'target'})
+            else:
+                target_df = self._aggregate_to_pl_df(xijk, features, target_col='target')
+            joined = grouped.join(target_df, on=features, how='left')
+            joined = joined.with_columns(
+                pl.when(pl.col('target') == 0)
+                  .then(pl.when(pl.col('sum_w') == 0).then(0.0).otherwise(float('inf')))
+                  .otherwise((pl.col('sum_w') / pl.col('target') - 1.0).abs())
+                  .alias('conv_abs')
+            )
+            local_max = joined.select(pl.max('conv_abs')).item()
+            if local_max is not None and float(local_max) > max_conv:
+                max_conv = float(local_max)
+
+        return df, max_conv
+
     def iteration(self):
         """
-        Runs the ipfn algorithm. Automatically detects of working with numpy ndarray or pandas dataframes.
+        Runs the ipfn algorithm. Automatically detects of working with numpy ndarray or (pandas|polars) dataframes.
         """
 
         i = 0
@@ -261,14 +410,33 @@ def iteration(self):
         conv_list = []
         m = self.original
 
-        # If the original data input is in pandas DataFrame format
-        if isinstance(self.original, pd.DataFrame):
-            ipfn_method = self.ipfn_df
-        elif isinstance(self.original, np.ndarray):
-            ipfn_method = self.ipfn_np
+        used_polars_backend = False
+
+        # Determine backend based on input type and requested algorithm
+        if isinstance(m, pd.DataFrame):
+            if self.algorithm == 'legacy':
+                ipfn_method = self._ipfn_df_legacy
+            elif self.algorithm == 'polars':
+                if pl is None:
+                    raise RuntimeError("algorithm='polars' requested but Polars is not installed")
+                # convert pandas to polars
+                try:
+                    m = pl.from_pandas(m)
+                except Exception:
+                    m = pl.DataFrame(m)
+                ipfn_method = self.ipfn_pl
+                used_polars_backend = True
+            else:
+                ipfn_method = self.ipfn_df
+        elif isinstance(m, np.ndarray):
+            ipfn_method = self._ipfn_np_legacy if self.algorithm == 'legacy' else self.ipfn_np
             self.original = self.original.astype('float64')
+        elif (pl is not None) and isinstance(m, pl.DataFrame):
+            ipfn_method = self.ipfn_pl
+            used_polars_backend = True
         else:
-            raise(ValueError(f'Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame'))
+            raise(ValueError('Data input instance not recognized. The input matrix is not a numpy array or (pandas|polars) DataFrame'))
+
         while ((i <= self.max_itr and conv > self.conv_rate) and (i <= self.max_itr and abs(conv - old_conv) > self.rate_tolerance)):
             old_conv = conv
             m, conv = ipfn_method(m, self.aggregates, self.dimensions, self.weight_col)
@@ -284,6 +452,14 @@ def iteration(self):
             print('Maximum iterations reached')
             converged = 0
 
+        # Convert back to pandas DataFrame if we used polars backend but the original was pandas
+        if used_polars_backend and (isinstance(self.original, pd.DataFrame)):
+            try:
+                m = m.to_pandas()
+            except Exception:
+                # Fallback: construct via rows
+                m = pd.DataFrame(m.to_dicts())
+
         # Handle the verbose
         if self.verbose == 0:
             return m
diff --git a/ipfn_contributors.txt b/ipfn_contributors.txt
index 7a59a7f..1b6c1c6 100644
--- a/ipfn_contributors.txt
+++ b/ipfn_contributors.txt
@@ -19,3 +19,8 @@ https://github.com/harisbal
 
 mattswoon
 https://github.com/mattswoon
+
+dhruv-panther
+hasan@panther-home.com
+https://github.com/dhruv-panther
+Added new algoriths and polars support
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..e6cc686
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,6 @@
+[pytest]
+# Ensure pytest discovers our legacy-named tests file
+python_files = tests.py test_*.py *_test.py
+python_classes = Test*
+python_functions = test_*
+testpaths = tests
diff --git a/tests/context.py b/tests/context.py
index 3399491..f8163fd 100644
--- a/tests/context.py
+++ b/tests/context.py
@@ -1,4 +1,4 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-from ipfn import ipfn
+import ipfn