danlkv · danlkv · Aug 29, 2020 · Aug 31, 2020 · Aug 31, 2020 · Aug 31, 2020
diff --git a/.github/workflows/jlse.yaml b/.github/workflows/jlse.yaml
@@ -17,16 +17,16 @@ jobs:
       run:
         working-directory: run/automake
     steps:
-      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
       - uses: actions/checkout@v2
         with:
           submodules: recursive
 
 
-      - name: Update subpackages
+      - name: Update packages
         run: |
-            (cd ../../analysis/spec/ && python setup.py develop --user --no-deps)
-            (cd ../../qtree/ && python setup.py develop --user --no-deps)
+            (cd ../../ && python setup.py develop --user)
+            (cd ../../analysis/spec/ && python setup.py develop --user)
+            (cd ../../qtree/ && python setup.py develop --user)
 
       - name: Remove previous result.md
         run: |

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -15,25 +15,46 @@ jobs:
   build:
     # The type of runner that the job will run on
     runs-on: ubuntu-latest
+    container: robbyjo/ubuntu-mkl:18.04-2019.1
 
-    # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
     # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+    - name: Setup git
+      run: |
+        yes | apt-get update
+        yes | apt-get install software-properties-common python3 python3-pip
+        yes | add-apt-repository ppa:git-core/ppa
+        yes | apt-get update
+        yes | apt-get install git
+
     - uses: actions/checkout@v2
       with:
-          submodules: recursive
-
-    - name: Setup Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.x
+        submodules: recursive
+
+    - name: Link to proper python
+      run: |
+        ln -srf $(which python3) /usr/bin/python
+        ln -srf $(which pip3) /usr/bin/pip
+        which pip3
+        echo $PATH
 
 
     - name: Setup dependencies
+      env:
+        LC_CTYPE: en_US.UTF-8
+        LANG: en_US.UTF-8
+        LC_ALL: C.UTF-8
       run: |
+        pip install --upgrade pip
+        pip install --upgrade setuptools
+        pip --version
         pip install .
         pip install pytest mock
-        cd qtree && pip install .
+        (cd qtree && pip install .)
+        (cd scratchpad/cpp_connections/vanilia/nparray/ && pip install .)
 
     - name: Test
-      run: cd qtensor && pytest
+      env: 
+        LD_PRELOAD: "/opt/intel/mkl/lib/intel64/libmkl_def.so:/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_intel_thread.so:/opt/intel/lib/intel64_lin/libiomp5.so"
+        LC_ALL: C.UTF-8
+      run: cd qtensor && pytest -s
diff --git a/.gitignore b/.gitignore
@@ -14,8 +14,6 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
-lib64/
 parts/
 sdist/
 var/

diff --git a/README.md b/README.md
@@ -148,3 +148,9 @@ treewidth = opt.treewidth
 mems, flops = tn.simulation_cost(peo)
 print('Max memory=', max(mems), 'Total flops=', sum(flops))
 ```
+
+### Use cli to run benchmarks
+
+```bash
+» python -m qtensor.cli generate-qaoa-ansatz-circuit -p 3 -n 24 | python -m qtensor.cli sim-file --profile --max-tw 27
+```
diff --git a/analysis/spec/notebooks/Time_vs_FLOP.ipynb b/analysis/spec/notebooks/Time_vs_FLOP.ipynb
diff --git a/analysis/spec/qtensor_specs/_nbdev.py b/analysis/spec/qtensor_specs/_nbdev.py
@@ -12,8 +12,6 @@
          "step_flops": "Time_vs_FLOP.ipynb",
          "max_mem": "Time_vs_FLOP.ipynb",
          "SEED": "Time_vs_FLOP.ipynb",
-         "EDGE_IDX_FOR_SEED": "Time_vs_FLOP.ipynb",
-         "EDGE_IDX_FOR_SEED_JLSE": "Time_vs_FLOP.ipynb",
          "sim_profile": "Time_vs_FLOP.ipynb",
          "step_sim_time": "Time_vs_FLOP.ipynb",
          "plot_with_filter": "Time_vs_FLOP.ipynb",

diff --git a/analysis/spec/qtensor_specs/time_vs_flop.py b/analysis/spec/qtensor_specs/time_vs_flop.py
@@ -1,13 +1,13 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: notebooks/Time_vs_FLOP.ipynb (unless otherwise specified).
 
 __all__ = ['ex', 'graph', 'circuit', 'tn', 'peo', 'sim_costs', 'sum_flops', 'step_flops', 'max_mem', 'SEED',
-           'EDGE_IDX_FOR_SEED', 'EDGE_IDX_FOR_SEED_JLSE', 'sim_profile', 'step_sim_time', 'plot_with_filter',
-           'get_log_flops_vs_matmul', 'cli', 'time_vs_flops_plot']
+           'sim_profile', 'step_sim_time', 'plot_with_filter', 'get_log_flops_vs_matmul', 'cli', 'time_vs_flops_plot']
 
 # Cell
 import sys
 import numpy as np
 import matplotlib.pyplot as plt
+import scipy
 
 import qtensor as qt
 from cartesian_explorer import Explorer
@@ -37,15 +37,19 @@ def tn(circuit):
     return qt.optimisation.TensorNet.QtreeTensorNet.from_qtree_gates(circuit)
 
 @ex.provider
-def peo(tn):
-    opt = qt.optimisation.Optimizer.DefaultOptimizer()
+def peo(tn, ordering_algo='greedy', tamaki_wait_time=15):
+    if ordering_algo=='greedy':
+        opt = qt.optimisation.Optimizer.DefaultOptimizer()
+    elif 'tamaki' in ordering_algo:
+        if '_' in ordering_algo:
+            _, time_str = ordering_algo.split('_')
+            tamaki_wait_time=int(time_str)
+        opt = qt.optimisation.Optimizer.TamakiOptimizer(wait_time=tamaki_wait_time)
     peo, _ = opt.optimize(tn)
     return tuple(peo)
 
 @ex.provider
 def sim_costs(tn, peo):
-    opt = qt.optimisation.Optimizer.DefaultOptimizer()
-    peo, _ = opt.optimize(tn)
     costs, mems = tn.simulation_cost(peo)
     return costs, mems
 
@@ -68,19 +72,16 @@ def max_mem(sim_costs):
 # Cell
 SEED=107
 
-# Cell
-EDGE_IDX_FOR_SEED = {
-    107:  [2, 3, 10, 15]
-}
-
-EDGE_IDX_FOR_SEED_JLSE = {
-    107:  [2, 4, 8, 14, 15, 21]
-}
-
 # Cell
 @ex.provider
-def sim_profile(circuit, tn):
-    backend = qt.PerfNumpyBackend(print=False)
+def sim_profile(circuit, tn, backend='numpy'):
+    if backend == 'numpy':
+        backend = qt.PerfNumpyBackend(print=False)
+    elif backend == 'mkl':
+        backend = qt.ProcessingFrameworks.PerfBackend.from_backend(
+            qt.ProcessingFrameworks.CMKLExtendedBackend, print=False)
+    elif backend == 'debug_mkl':
+        backend = qt.DebugFrameworks.DebugMKLBackend()
     sim = qt.QtreeSimulator(bucket_backend=backend)
 
     sim.simulate(circuit)
@@ -95,32 +96,41 @@ def step_sim_time(sim_profile, tn):
 
 # Cell
 def plot_with_filter(est_flat, times_flat):
-    filt = (est_flat>1e4) #& (times_flat>1e-4)
+    filt = (est_flat>5e4) #& (times_flat>1e-4)
     est_flat_filtered = est_flat[filt]
     times_flat_filtered = times_flat[filt]
 
     # Fit times
     log_fit_coef = np.polyfit(np.log(est_flat_filtered), np.log(times_flat_filtered), 1)
     fit_coef = np.polyfit(est_flat_filtered, times_flat_filtered, 1)
+    def fixed_slope(x, shift):
+        slope = 1.0
+        return x*slope + shift
+    popt, pcov = scipy.optimize.curve_fit(fixed_slope, np.log(est_flat_filtered), np.log(times_flat_filtered))
     print('Lin fit:', fit_coef)
     print('Log fit:', log_fit_coef)
+    print('Slope-1 log fit:', popt)
     fit_fn = np.poly1d(log_fit_coef)
+    fit_fn = fixed_slope
 
     # Plot scatter with filtered data
-    plt.scatter(est_flat_filtered, times_flat_filtered)
-    xfit = 10**np.linspace(4, 7, 100)
-    plt.plot(xfit, np.exp(fit_fn(np.log(xfit))), color='blue')
+    plt.scatter(est_flat_filtered, times_flat_filtered, marker='x')
+    min_x = np.log10(est_flat_filtered.min())
+    max_x = np.log10(est_flat_filtered.max()) + .5
+    xfit = 10**np.linspace(min_x, max_x, 100)
+    plt.plot(xfit, np.exp(fit_fn(np.log(xfit), popt[0])), color='blue')
     plt.loglog()
     plt.xlabel('estimated FLOP')
     plt.ylabel('Runtime')
+    plt.grid()
     return log_fit_coef, fit_coef
 
 # Cell
 import timeit
 def get_log_flops_vs_matmul(log_fit_coef):
     FLOPS_logfit = np.exp(-log_fit_coef[1])
 
-    N = 300
+    N = 500
     matmul_flop = N**2*(N-1)
     x, y = np.random.randn(2, N, N)
     number = 100
@@ -138,9 +148,20 @@ def get_log_flops_vs_matmul(log_fit_coef):
 def cli():
     pass
 
-@cli.command()
-@click.argument('filename')
-def time_vs_flops_plot(filename):
+@click.argument('filename', nargs=-1)
+@click.option('-B', '--backend', default='numpy')
+@click.option('-M', '--max-memory', default=3e8)
+@click.option('-s', '--seed', default=SEED)
+@click.option('-O', '--ordering_algo', default='greedy'
+              ,help=("One of (greedy, tamaki, tamaki_{wait_time})"
+                     "'tamki_15' means heuristic solver running for 15 seconds per graph"
+                    )
+             )
+@click.option('--min-memory', default=3e6)
+def time_vs_flops_plot(filename=None, backend='numpy', seed=SEED,
+                       max_memory=2e8, min_memory=1e6,
+                       ordering_algo='greedy', tamaki_time=10
+                      ):
     """
     Plots times and estimated FLOP for each step of several QAOA energy computation contractions.
 
@@ -150,31 +171,46 @@ def time_vs_flops_plot(filename):
         - N = 1000
 
     """
-    edge_indices = EDGE_IDX_FOR_SEED[SEED]
     ds = [3, 4]
     p = 3
     N = 1000
 
-    estimators = ex.map_variable('step_flops', d=ds,
-                                 edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])
-    maxmems = ex.map_variable('max_mem', d=ds,
-                                 edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])
-    if np.max(maxmems)>1e10:
-        print('memory estimations:', maxmems)
-        raise Exception('Will get too large tetsors!!')
+    edges_to_try = 20
+    estimators, maxmems = ex.map_variables(
+        ('step_flops', 'max_mem'),
+         d=ds,
+         edge_idx=range(edges_to_try), n=[N], p=[p],
+         seed=[seed],
+         ordering_algo=[ordering_algo],
+        )
+
+
+    selector = ((min_memory < maxmems) & (maxmems < max_memory)).all(axis=0)
+    edge_indices = np.arange(edges_to_try)[selector]
+    print('Selected edges', edge_indices)
+    print('Estimated memories', maxmems.T[selector].flatten())
+    estimators = estimators.T[selector]
 
     times = ex.map_variable('step_sim_time', d=ds,
-                            edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])
+                            edge_idx=edge_indices, n=[N], p=[p]
+                            ,seed=[seed]
+                            ,backend=[backend]
+                            ,ordering_algo=[ordering_algo]
+                           )
 
-    est_flat = np.concatenate(estimators.flatten())
+    est_flat = np.concatenate(estimators.T.flatten())
     times_flat = np.concatenate(times.flatten())
 
     log_fit_coef, fit_coef = plot_with_filter(est_flat, times_flat)
-    plt.savefig(filename)
+    if filename:
+        plt.savefig(filename[0])
 
     fit, matmul = get_log_flops_vs_matmul(log_fit_coef)
 
     print('===Results===')
+    print(f'Total time: {times_flat.sum():.5}')
     print(f'Simulator fitted flops: {fit/1e9:.5} G')
     print(f'Matmul flops: {matmul/1e9:.5} G')
-    print(f'Simulator optimality: {fit/matmul}')
+    print(f'Simulator optimality: {fit/matmul}')
+
+cli.command()(time_vs_flops_plot)
diff --git a/bench/mklbench/bench.cpp b/bench/mklbench/bench.cpp
@@ -157,29 +157,20 @@ int main(void)
     //               transa    transb    M     N     K
 
     int i;
-    run_size<double>(do_trans, no_trans, 4096);
-
-//    for (i = 10; i >= -10; i--)
-//        run_size<double>(do_trans, no_trans, 4096 + i);
-//    for (i = 10; i >= -10; i--)
-//        run_size<double>(do_trans, no_trans, 2048 + i);
-//    for (i = 10; i >= -10; i--)
-//        run_size<double>(do_trans, no_trans, 1024 + i);
-//    for (i = 10; i >= -10; i--)
-//        run_size<double>(do_trans, no_trans, 256 + i);
-//    for (i = 10; i >= -10; i--)
-//        run_size<double>(do_trans, no_trans, 128 + i);
-//    for (i = 10; i >= -10; i--)
-//        run_size<double>(do_trans, no_trans, 32 + i);
-//
-//  for (i = 4096; i >= 512; i -= 256)
-//      run_size<double>(no_trans, no_trans, i, i, i);
-//
-//  for (i = 512; i >= 64; i -= 32)
-//      run_size<double>(no_trans, no_trans, i, i, i);   
-
-//  for (i = 64; i >= 16; i -= 1)
-//      run_size<double>(no_trans, no_trans, i, i, i);
+    //run_size<double>(no_trans, do_trans, 4096, 4096, 4096);
+    run_size<double>(no_trans, do_trans, 4096, 1, 4096);
+    //run_size<double>(no_trans, do_trans, 1000, 1000, 1000);
+    run_size<double>(no_trans, do_trans, 1000, 1, 1000);
 
+
+    // for (i = 4096; i >= 512; i -= 256)
+    //    run_size<double>(no_trans, no_trans, i, i, i);
+
+    //for (i = 512; i >= 64; i -= 32)
+    //    run_size<double>(no_trans, no_trans, i, i, i);   
+
+    //for (i = 64; i >= 16; i -= 1)
+    //    run_size<double>(no_trans, no_trans, i, i, i);
+
     return EXIT_SUCCESS;
 }
diff --git a/bench/pybench/tn_bench.py b/bench/pybench/tn_bench.py
@@ -29,6 +29,6 @@ def run(n, num_iter, num_batch):
 
 if __name__ == "__main__":
     tn.set_default_backend(sys.argv[1])
-    for i in range(4102, 4090 - 1, -1):
-        run(i, 20, 1)
+    for i in range(4120, 4082 - 2, -2):
+        run(i, 10, 1)
 
diff --git a/data/README.md b/data/README.md
@@ -0,0 +1 @@
+debugging backend and mkl_verbose add 3 seconds to a 18 senond task
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,8 +14,6 @@ dist/ @@
     downloads/
     eggs/
     .eggs/
-    lib/
-    lib64/
     parts/
     sdist/
     var/
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		debugging backend and mkl_verbose add 3 seconds to a 18 senond task