diff --git a/.github/workflows/jlse.yaml b/.github/workflows/jlse.yaml
index f8a2bbc2..a44c3be2 100644
--- a/.github/workflows/jlse.yaml
+++ b/.github/workflows/jlse.yaml
@@ -17,16 +17,16 @@ jobs:
run:
working-directory: run/automake
steps:
- # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
with:
submodules: recursive
- - name: Update subpackages
+ - name: Update packages
run: |
- (cd ../../analysis/spec/ && python setup.py develop --user --no-deps)
- (cd ../../qtree/ && python setup.py develop --user --no-deps)
+ (cd ../../ && python setup.py develop --user)
+ (cd ../../analysis/spec/ && python setup.py develop --user)
+ (cd ../../qtree/ && python setup.py develop --user)
- name: Remove previous result.md
run: |
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 27a26720..7764edb3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,25 +15,46 @@ jobs:
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
+ container: robbyjo/ubuntu-mkl:18.04-2019.1
- # Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+ - name: Setup git
+ run: |
+ yes | apt-get update
+ yes | apt-get install software-properties-common python3 python3-pip
+ yes | add-apt-repository ppa:git-core/ppa
+ yes | apt-get update
+ yes | apt-get install git
+
- uses: actions/checkout@v2
with:
- submodules: recursive
-
- - name: Setup Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.x
+ submodules: recursive
+
+ - name: Link to proper python
+ run: |
+ ln -srf $(which python3) /usr/bin/python
+ ln -srf $(which pip3) /usr/bin/pip
+ which pip3
+ echo $PATH
- name: Setup dependencies
+ env:
+ LC_CTYPE: en_US.UTF-8
+ LANG: en_US.UTF-8
+ LC_ALL: C.UTF-8
run: |
+ pip install --upgrade pip
+ pip install --upgrade setuptools
+ pip --version
pip install .
pip install pytest mock
- cd qtree && pip install .
+ (cd qtree && pip install .)
+ (cd scratchpad/cpp_connections/vanilia/nparray/ && pip install .)
- name: Test
- run: cd qtensor && pytest
+ env:
+ LD_PRELOAD: "/opt/intel/mkl/lib/intel64/libmkl_def.so:/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_intel_thread.so:/opt/intel/lib/intel64_lin/libiomp5.so"
+ LC_ALL: C.UTF-8
+ run: cd qtensor && pytest -s
diff --git a/.gitignore b/.gitignore
index ec0ab8aa..e83b302f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,8 +14,6 @@ dist/
downloads/
eggs/
.eggs/
-lib/
-lib64/
parts/
sdist/
var/
diff --git a/README.md b/README.md
index d7545d40..fc35f7a8 100644
--- a/README.md
+++ b/README.md
@@ -148,3 +148,9 @@ treewidth = opt.treewidth
mems, flops = tn.simulation_cost(peo)
print('Max memory=', max(mems), 'Total flops=', sum(flops))
```
+
+### Use cli to run benchmarks
+
+```bash
+» python -m qtensor.cli generate-qaoa-ansatz-circuit -p 3 -n 24 | python -m qtensor.cli sim-file --profile --max-tw 27
+```
diff --git a/analysis/spec/notebooks/Time_vs_FLOP.ipynb b/analysis/spec/notebooks/Time_vs_FLOP.ipynb
index 77d83c40..36cd5c6d 100644
--- a/analysis/spec/notebooks/Time_vs_FLOP.ipynb
+++ b/analysis/spec/notebooks/Time_vs_FLOP.ipynb
@@ -7,7 +7,7 @@
},
"source": [
"
Table of Contents
\n",
- ""
+ ""
]
},
{
@@ -27,8 +27,8 @@
"execution_count": 1,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:23:46.967223Z",
- "start_time": "2020-10-07T10:23:44.534313Z"
+ "end_time": "2020-10-11T03:32:44.317870Z",
+ "start_time": "2020-10-11T03:32:41.940108Z"
}
},
"outputs": [],
@@ -37,6 +37,7 @@
"import sys\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
+ "import scipy\n",
"\n",
"import qtensor as qt\n",
"from cartesian_explorer import Explorer"
@@ -47,8 +48,8 @@
"execution_count": 2,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:23:46.974535Z",
- "start_time": "2020-10-07T10:23:46.968412Z"
+ "end_time": "2020-10-11T03:32:44.323932Z",
+ "start_time": "2020-10-11T03:32:44.320677Z"
}
},
"outputs": [],
@@ -64,8 +65,8 @@
"execution_count": 3,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:23:46.980521Z",
- "start_time": "2020-10-07T10:23:46.977197Z"
+ "end_time": "2020-10-11T03:32:44.352129Z",
+ "start_time": "2020-10-11T03:32:44.339056Z"
}
},
"outputs": [],
@@ -103,16 +104,17 @@
"execution_count": 4,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:23:46.986625Z",
- "start_time": "2020-10-07T10:23:46.982470Z"
+ "end_time": "2020-10-11T03:32:45.497342Z",
+ "start_time": "2020-10-11T03:32:45.491342Z"
}
},
"outputs": [],
"source": [
"N = 1000\n",
- "p = 4\n",
- "edge_idx = 7\n",
- " "
+ "p = 3\n",
+ "edge_idx = 28\n",
+ "degree = 4\n",
+ " though I do think a nicer treatment of it would probably be "
]
},
{
@@ -120,33 +122,149 @@
"execution_count": 5,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:23:49.052258Z",
- "start_time": "2020-10-07T10:23:48.001942Z"
- },
- "scrolled": false
+ "end_time": "2020-10-11T03:32:46.435411Z",
+ "start_time": "2020-10-11T03:32:46.164940Z"
+ }
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Line graph nodes 1671\n"
+ ]
+ }
+ ],
"source": [
- " \n",
"gamma, beta = [.1]*p, [.3]*p\n",
- "graph = qt.toolbox.random_graph(nodes=N, degree=3)\n",
+ "graph = qt.toolbox.random_graph(nodes=N, degree=degree, seed=108)\n",
"\n",
"comp = qt.QtreeQAOAComposer(graph, gamma=gamma, beta=beta)\n",
"\n",
"comp.energy_expectation_lightcone(list(graph.edges())[edge_idx])\n",
"tn = qt.optimisation.TensorNet.QtreeTensorNet.from_qtree_gates(comp.circuit)\n",
+ "line_graph = tn.get_line_graph()\n",
+ "print('Line graph nodes', line_graph.number_of_nodes())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Using greedy optimizer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:32:49.121865Z",
+ "start_time": "2020-10-11T03:32:48.199388Z"
+ },
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "treewidth 21\n"
+ ]
+ }
+ ],
+ "source": [
"opt = qt.optimisation.Optimizer.DefaultOptimizer()\n",
"peo, _ = opt.optimize(tn)\n",
+ "print('treewidth', opt.treewidth)\n",
+ "if opt.treewidth > 100:\n",
+ " raise Exception('Too large treewidth')\n",
"costs, mems = tn.simulation_cost(peo)"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:32:50.149626Z",
+ "start_time": "2020-10-11T03:32:49.123067Z"
+ },
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total FLOPS=0.034049645 G, Memory=0.033554432 G\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.plot(costs, label='flops')\n",
+ "plt.plot(mems, label='memory')\n",
+ "plt.yscale('log')\n",
+ "plt.legend()\n",
+ "plt.grid()\n",
+ "plt.title('Simulation cost per step')\n",
+ "print(f'Total FLOPS={sum(costs)/1e9} G, Memory={max(mems)/1e9} G')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Using tamaki optimizer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:25:03.478651Z",
+ "start_time": "2020-10-11T03:24:51.686901Z"
+ },
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "treewidth 17\n"
+ ]
+ }
+ ],
+ "source": [
+ "opt = qt.optimisation.Optimizer.TamakiOptimizer(wait_time=10)\n",
+ "peo, _ = opt.optimize(tn)\n",
+ "print('treewidth', opt.treewidth)\n",
+ "if opt.treewidth > 100:\n",
+ " raise Exception('Too large treewidth')\n",
+ "costs, mems = tn.simulation_cost(peo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:23:50.164426Z",
- "start_time": "2020-10-07T10:23:49.770343Z"
+ "end_time": "2020-10-11T03:25:04.376000Z",
+ "start_time": "2020-10-11T03:25:03.480448Z"
},
"scrolled": true
},
@@ -155,12 +273,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Total FLOPS=0.247250917 G, Memory=0.268435456 G\n"
+ "Total FLOPS=0.002586149 G, Memory=0.002097152 G\n"
]
},
{
"data": {
- "image/png": "\n",
+ "image/png": "\n",
"text/plain": [
""
]
@@ -195,14 +313,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:23:49.308184Z",
- "start_time": "2020-10-07T07:23:48.102918Z"
+ "end_time": "2020-10-11T03:33:13.080314Z",
+ "start_time": "2020-10-11T03:33:10.463143Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-0.14101067-3.12250226e-17j])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"backend = qt.PerfNumpyBackend(print=False)\n",
"sim = qt.QtreeSimulator(bucket_backend=backend)\n",
@@ -224,15 +353,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:24:09.759656Z",
- "start_time": "2020-10-07T07:24:08.942804Z"
+ "end_time": "2020-10-11T03:33:14.381426Z",
+ "start_time": "2020-10-11T03:33:13.081720Z"
},
"scrolled": true
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total time=1.9989681243896484\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
"profile_results = backend._profile_results\n",
"step_times = [x[1] for x in profile_results.values()]\n",
@@ -271,11 +420,11 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 10,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:32:30.352158Z",
- "start_time": "2020-10-07T10:32:30.333400Z"
+ "end_time": "2020-10-11T03:33:18.651384Z",
+ "start_time": "2020-10-11T03:33:18.636636Z"
}
},
"outputs": [],
@@ -297,15 +446,19 @@
" return qt.optimisation.TensorNet.QtreeTensorNet.from_qtree_gates(circuit)\n",
"\n",
"@ex.provider\n",
- "def peo(tn):\n",
- " opt = qt.optimisation.Optimizer.DefaultOptimizer()\n",
+ "def peo(tn, ordering_algo='greedy', tamaki_wait_time=15):\n",
+ " if ordering_algo=='greedy':\n",
+ " opt = qt.optimisation.Optimizer.DefaultOptimizer()\n",
+ " elif 'tamaki' in ordering_algo:\n",
+ " if '_' in ordering_algo:\n",
+ " _, time_str = ordering_algo.split('_')\n",
+ " tamaki_wait_time=int(time_str)\n",
+ " opt = qt.optimisation.Optimizer.TamakiOptimizer(wait_time=tamaki_wait_time)\n",
" peo, _ = opt.optimize(tn)\n",
" return tuple(peo)\n",
"\n",
"@ex.provider\n",
"def sim_costs(tn, peo):\n",
- " opt = qt.optimisation.Optimizer.DefaultOptimizer()\n",
- " peo, _ = opt.optimize(tn)\n",
" costs, mems = tn.simulation_cost(peo)\n",
" return costs, mems\n",
"\n",
@@ -317,11 +470,11 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 11,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:32:30.486762Z",
- "start_time": "2020-10-07T10:32:30.475258Z"
+ "end_time": "2020-10-11T03:33:19.218710Z",
+ "start_time": "2020-10-11T03:33:19.214736Z"
}
},
"outputs": [],
@@ -340,11 +493,11 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 12,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:33:46.364675Z",
- "start_time": "2020-10-07T10:33:46.361706Z"
+ "end_time": "2020-10-11T03:33:19.709292Z",
+ "start_time": "2020-10-11T03:33:19.687803Z"
}
},
"outputs": [],
@@ -355,23 +508,24 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 13,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T10:35:58.030517Z",
- "start_time": "2020-10-07T10:35:57.468824Z"
- }
+ "end_time": "2020-10-11T03:33:34.270292Z",
+ "start_time": "2020-10-11T03:33:20.397957Z"
+ },
+ "scrolled": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "38797a3855c44faf80bd24f505fa9b8d",
+ "model_id": "aacd4f2ba12c42098133c49a47077d7c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "HBox(children=(FloatProgress(value=0.0, max=120.0), HTML(value='')))"
+ "HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))"
]
},
"metadata": {},
@@ -386,7 +540,7 @@
},
{
"data": {
- "image/png": "\n",
+ "image/png": "\n",
"text/plain": [
""
]
@@ -399,7 +553,7 @@
],
"source": [
"fig = ex.plot_variables2d(('sum_flops', 'max_mem'), n=[N], p=[3],\n",
- " d=[3,4], edge_idx=range(30),\n",
+ " d=[3,4], edge_idx=range(10),\n",
" seed=[SEED]\n",
" )\n",
"for ax in fig.axes:\n",
@@ -407,6 +561,62 @@
" ax.grid()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:32:05.580417Z",
+ "start_time": "2020-10-11T03:32:05.034225Z"
+ },
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "58766bb48c1142a885c375dc4a55818c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig = ex.plot_variables2d(('sum_flops', 'max_mem'), n=[N], p=[3],\n",
+ " d=[3,4], edge_idx=range(10)\n",
+ " ,seed=[SEED]\n",
+ " ,ordering_algo=['tamaki_5']\n",
+ " )\n",
+ "for ax in fig.axes:\n",
+ " ax.set_yscale('log')\n",
+ " ax.grid()"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -428,56 +638,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-10-07T07:43:46.450025Z",
- "start_time": "2020-10-07T07:43:46.445069Z"
- }
- },
- "outputs": [],
- "source": [
- "#export\n",
- "EDGE_IDX_FOR_SEED = {\n",
- " 107: [2, 3, 10, 15]\n",
- "}\n",
- "\n",
- "EDGE_IDX_FOR_SEED_JLSE = {\n",
- " 107: [2, 4, 8, 14, 15, 21]\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:39:58.442055Z",
- "start_time": "2020-10-07T07:39:58.439007Z"
+ "end_time": "2020-10-11T03:33:37.905921Z",
+ "start_time": "2020-10-11T03:33:37.897612Z"
}
},
"outputs": [],
"source": [
- "edge_indices = EDGE_IDX_FOR_SEED[SEED]\n",
+ "edge_indices = [2]\n",
"ds = [3, 4]\n",
"p = 3"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T08:17:25.827182Z",
- "start_time": "2020-10-07T08:17:25.810647Z"
+ "end_time": "2020-10-11T03:33:45.437646Z",
+ "start_time": "2020-10-11T03:33:45.430812Z"
}
},
"outputs": [],
"source": [
"#export\n",
"@ex.provider\n",
- "def sim_profile(circuit, tn):\n",
- " backend = qt.PerfNumpyBackend(print=False)\n",
+ "def sim_profile(circuit, tn, backend='numpy'):\n",
+ " if backend == 'numpy':\n",
+ " backend = qt.PerfNumpyBackend(print=False)\n",
+ " elif backend == 'mkl':\n",
+ " backend = qt.ProcessingFrameworks.PerfBackend.from_backend(\n",
+ " qt.ProcessingFrameworks.CMKLExtendedBackend, print=False)\n",
+ " elif backend == 'debug_mkl':\n",
+ " backend = qt.DebugFrameworks.DebugMKLBackend()\n",
+ " elif backend == 'exatn':\n",
+ " backend = qt.ProcessingFrameworks.PerfBackend.from_backend(qt.ProcessingFrameworks.ExaTnBackend, print=False)\n",
" sim = qt.QtreeSimulator(bucket_backend=backend)\n",
"\n",
" sim.simulate(circuit)\n",
@@ -493,58 +690,159 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:00.853291Z",
- "start_time": "2020-10-07T07:40:00.237722Z"
+ "end_time": "2020-10-11T03:33:47.801203Z",
+ "start_time": "2020-10-11T03:33:47.289919Z"
},
"scrolled": false
},
- "outputs": [],
- "source": [
- "f = ex.draw_dependency_graph(figsize=(7,6), node_size=20)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-10-07T07:40:01.553712Z",
- "start_time": "2020-10-07T07:40:01.484774Z"
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
- },
- "outputs": [],
+ ],
"source": [
- "estimators = ex.map_variable('step_flops', d=ds, edge_idx=edge_indices, n=[N], p=[p])"
+ "f = ex.draw_dependency_graph(figsize=(7,6), node_size=20)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:03.546159Z",
- "start_time": "2020-10-07T07:40:02.010399Z"
+ "end_time": "2020-10-11T03:34:24.680002Z",
+ "start_time": "2020-10-11T03:34:24.650994Z"
},
"scrolled": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1b32ab81edaf452eaf14d5c7e462e6e0",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "1192070"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "times = ex.map_variable('step_sim_time', d=ds, edge_idx=edge_indices, n=[N], p=[p])"
+ "estimators = ex.map_variable('step_flops', d=ds, edge_idx=edge_indices,\n",
+ " n=[N], p=[p], seed=[SEED]\n",
+ " )\n",
+ "max(np.max(estimators))"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:04.393623Z",
- "start_time": "2020-10-07T07:40:04.134441Z"
- }
+ "end_time": "2020-10-11T03:34:04.920612Z",
+ "start_time": "2020-10-11T03:34:03.202761Z"
+ },
+ "scrolled": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9b287cacb9c2431aacc94463a54cee33",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/dali/side-projects-hobby/cartesian_explorer/cartesian_explorer/ExplorerBasic.py:72: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+ " result = np.array(list(tqdm(\n"
+ ]
+ }
+ ],
+ "source": [
+ "times = ex.map_variable('step_sim_time', d=ds, edge_idx=edge_indices,\n",
+ " n=[N], p=[p], seed=[SEED], backend=['mkl'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:34:27.480257Z",
+ "start_time": "2020-10-11T03:34:27.300743Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Text(0, 0.5, 'Runtime')"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
"est_flat = np.concatenate(estimators.flatten())\n",
"times_flat = np.concatenate(times.flatten())\n",
@@ -576,48 +874,79 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T08:20:00.827753Z",
- "start_time": "2020-10-07T08:20:00.819148Z"
+ "end_time": "2020-10-11T03:38:15.650497Z",
+ "start_time": "2020-10-11T03:38:15.632471Z"
}
},
"outputs": [],
"source": [
"#export\n",
"def plot_with_filter(est_flat, times_flat):\n",
- " filt = (est_flat>1e4) #& (times_flat>1e-4)\n",
+ " filt = (est_flat>5e4) #& (times_flat>1e-4)\n",
" est_flat_filtered = est_flat[filt]\n",
" times_flat_filtered = times_flat[filt]\n",
"\n",
" # Fit times\n",
" log_fit_coef = np.polyfit(np.log(est_flat_filtered), np.log(times_flat_filtered), 1)\n",
" fit_coef = np.polyfit(est_flat_filtered, times_flat_filtered, 1)\n",
+ " def fixed_slope(x, shift):\n",
+ " slope = 1.0\n",
+ " return x*slope + shift\n",
+ " popt, pcov = scipy.optimize.curve_fit(fixed_slope, np.log(est_flat_filtered), np.log(times_flat_filtered))\n",
" print('Lin fit:', fit_coef)\n",
" print('Log fit:', log_fit_coef)\n",
+ " print('Slope-1 log fit:', popt)\n",
" fit_fn = np.poly1d(log_fit_coef)\n",
+ " fit_fn = fixed_slope\n",
"\n",
" # Plot scatter with filtered data\n",
- " plt.scatter(est_flat_filtered, times_flat_filtered)\n",
- " xfit = 10**np.linspace(4, 7, 100)\n",
- " plt.plot(xfit, np.exp(fit_fn(np.log(xfit))), color='blue')\n",
+ " plt.scatter(est_flat_filtered, times_flat_filtered, marker='x')\n",
+ " min_x = np.log10(est_flat_filtered.min())\n",
+ " max_x = np.log10(est_flat_filtered.max()) + .5\n",
+ " xfit = 10**np.linspace(min_x, max_x, 100)\n",
+ " plt.plot(xfit, np.exp(fit_fn(np.log(xfit), popt[0])), color='blue')\n",
" plt.loglog()\n",
" plt.xlabel('estimated FLOP')\n",
" plt.ylabel('Runtime')\n",
+ " plt.grid()\n",
" return log_fit_coef, fit_coef"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T08:20:00.986928Z",
- "start_time": "2020-10-07T08:20:00.964812Z"
+ "end_time": "2020-10-11T03:38:17.235983Z",
+ "start_time": "2020-10-11T03:38:16.590680Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Lin fit: [ 3.95588723e-08 -8.67827872e-03]\n",
+ "Log fit: [ 1.29641774 -22.79449518]\n",
+ "Slope-1 log fit: [-19.11949889]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
"log_fit_coef, fit_coef = plot_with_filter(est_flat, times_flat)"
]
@@ -638,14 +967,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:10.141424Z",
- "start_time": "2020-10-07T07:40:10.138386Z"
+ "end_time": "2020-10-11T03:38:42.394084Z",
+ "start_time": "2020-10-11T03:38:42.385458Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Factual FLOPS on a laptop 3.333333e+07\n"
+ ]
+ }
+ ],
"source": [
"FLOP = 1e6/.03\n",
"print(f'Factual FLOPS on a laptop {FLOP:e}')"
@@ -660,29 +997,52 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:12.055353Z",
- "start_time": "2020-10-07T07:40:12.050244Z"
+ "end_time": "2020-10-11T03:38:43.794429Z",
+ "start_time": "2020-10-11T03:38:43.789660Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Factual FLOPS on a laptop, from log fit 7.934571e+09\n"
+ ]
+ }
+ ],
"source": [
"FLOP_logfit = np.exp(-log_fit_coef[1])\n",
"print(f'Factual FLOPS on a laptop, from log fit {FLOP_logfit:e}')"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Compare with matrix multiplication"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:16.520371Z",
- "start_time": "2020-10-07T07:40:12.850181Z"
+ "end_time": "2020-10-11T03:38:51.089545Z",
+ "start_time": "2020-10-11T03:38:47.401257Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "4.45 ms ± 487 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ ]
+ }
+ ],
"source": [
"N = 500\n",
"matmul_flop = N**2*(N-1)\n",
@@ -690,69 +1050,80 @@
"%timeit np.matmul(x,y)"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Compare with matrix multiplication"
- ]
- },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:19.790768Z",
- "start_time": "2020-10-07T07:40:19.787939Z"
+ "end_time": "2020-10-11T03:38:51.102068Z",
+ "start_time": "2020-10-11T03:38:51.090876Z"
}
},
"outputs": [],
"source": [
- "FLOPS_matmul = matmul_flop/4.65e-3\n",
- "print(f'FLOPS on this laptop for matrix mul: {FLOPS_matmul:e}')"
+ "#export\n",
+ "import timeit\n",
+ "def get_log_flops_vs_matmul(log_fit_coef):\n",
+ " FLOPS_logfit = np.exp(-log_fit_coef[1])\n",
+ "\n",
+ " N = 500\n",
+ " matmul_flop = N**2*(N-1)\n",
+ " x, y = np.random.randn(2, N, N)\n",
+ " number = 100\n",
+ " matmul_time = timeit.timeit(lambda: np.matmul(x,y)\n",
+ " , number=number)/number\n",
+ "\n",
+ " FLOPS_matmul = matmul_flop/matmul_time\n",
+ " \n",
+ " return FLOPS_logfit, FLOPS_matmul"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T07:40:20.329754Z",
- "start_time": "2020-10-07T07:40:20.326436Z"
+ "end_time": "2020-10-11T03:38:51.944811Z",
+ "start_time": "2020-10-11T03:38:51.106891Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Simulator inefficiency: 1.9337510525337913\n",
+ "Simulator optimality: 0.5171296474226614\n"
+ ]
+ }
+ ],
"source": [
+ "FLOP_logfit, FLOPS_matmul = get_log_flops_vs_matmul(log_fit_coef)\n",
"print(f'Simulator inefficiency: {FLOPS_matmul/FLOP_logfit}')\n",
"print(f'Simulator optimality: {FLOP_logfit/FLOPS_matmul}')"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T08:25:27.943213Z",
- "start_time": "2020-10-07T08:25:27.939822Z"
+ "end_time": "2020-10-11T03:38:52.758397Z",
+ "start_time": "2020-10-11T03:38:52.744737Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "FLOPS on this laptop for matrix mul: 2.682796e+10\n"
+ ]
+ }
+ ],
"source": [
- "#export\n",
- "import timeit\n",
- "def get_log_flops_vs_matmul(log_fit_coef):\n",
- " FLOPS_logfit = np.exp(-log_fit_coef[1])\n",
- "\n",
- " N = 300\n",
- " matmul_flop = N**2*(N-1)\n",
- " x, y = np.random.randn(2, N, N)\n",
- " number = 100\n",
- " matmul_time = timeit.timeit(lambda: np.matmul(x,y)\n",
- " , number=number)/number\n",
- "\n",
- " FLOPS_matmul = matmul_flop/matmul_time\n",
- " \n",
- " return FLOPS_logfit, FLOPS_matmul"
+ "FLOPS_matmul = matmul_flop/4.65e-3\n",
+ "print(f'FLOPS on this laptop for matrix mul: {FLOPS_matmul:e}')"
]
},
{
@@ -771,14 +1142,52 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 51,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T08:14:59.391811Z",
- "start_time": "2020-10-07T08:14:59.381070Z"
+ "end_time": "2020-10-11T03:19:32.887639Z",
+ "start_time": "2020-10-11T03:19:32.862780Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'FLOPS_matmul' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m--------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'Simulator inefficiency: {FLOPS_matmul/FLOP_logfit}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'Simulator optimality: {FLOP_logfit/FLOPS_matmul}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'FLOPS_matmul' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'Simulator inefficiency: {FLOPS_matmul/FLOP_logfit}')\n",
+ "print(f'Simulator optimality: {FLOP_logfit/FLOPS_matmul}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:39:10.751939Z",
+ "start_time": "2020-10-11T03:39:10.718466Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# export\n",
"import click\n",
@@ -787,9 +1196,20 @@
"def cli():\n",
" pass\n",
"\n",
- "@cli.command()\n",
- "@click.argument('filename')\n",
- "def time_vs_flops_plot(filename):\n",
+ "@click.argument('filename', nargs=-1)\n",
+ "@click.option('-B', '--backend', default='numpy')\n",
+ "@click.option('-M', '--max-memory', default=3e8)\n",
+ "@click.option('-s', '--seed', default=SEED)\n",
+ "@click.option('-O', '--ordering_algo', default='greedy'\n",
+ " ,help=(\"One of (greedy, tamaki, tamaki_{wait_time})\"\n",
+ " \"'tamaki_15' means heuristic solver running for 15 seconds per graph\"\n",
+ " )\n",
+ " )\n",
+ "@click.option('--min-memory', default=3e6)\n",
+ "def time_vs_flops_plot(filename=None, backend='numpy', seed=SEED,\n",
+ " max_memory=2e8, min_memory=1e6,\n",
+ " ordering_algo='greedy'\n",
+ " ):\n",
" \"\"\"\n",
" Plots times and estimated FLOP for each step of several QAOA energy computation contractions.\n",
" \n",
@@ -799,34 +1219,200 @@
" - N = 1000\n",
" \n",
" \"\"\"\n",
- " edge_indices = EDGE_IDX_FOR_SEED[SEED]\n",
" ds = [3, 4]\n",
" p = 3\n",
" N = 1000\n",
" \n",
- " estimators = ex.map_variable('step_flops', d=ds,\n",
- " edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])\n",
- " maxmems = ex.map_variable('max_mem', d=ds,\n",
- " edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])\n",
- " if np.max(maxmems)>1e10:\n",
- " print('memory estimations:', maxmems)\n",
- " raise Exception('Will get too large tetsors!!')\n",
+ " edges_to_try = 20\n",
+ " estimators, maxmems = ex.map_variables(\n",
+ " ('step_flops', 'max_mem'),\n",
+ " d=ds,\n",
+ " edge_idx=range(edges_to_try), n=[N], p=[p],\n",
+ " seed=[seed],\n",
+ " ordering_algo=[ordering_algo],\n",
+ " )\n",
+ " \n",
+ " \n",
+ " selector = ((min_memory < maxmems) & (maxmems < max_memory)).all(axis=0)\n",
+ " edge_indices = np.arange(edges_to_try)[selector]\n",
+ " print('Selected edges', edge_indices)\n",
+ " print('Estimated memories', maxmems.T[selector].flatten())\n",
+ " estimators = estimators.T[selector]\n",
" \n",
" times = ex.map_variable('step_sim_time', d=ds,\n",
- " edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])\n",
+ " edge_idx=edge_indices, n=[N], p=[p]\n",
+ " ,seed=[seed]\n",
+ " ,backend=[backend]\n",
+ " ,ordering_algo=[ordering_algo]\n",
+ " )\n",
" \n",
- " est_flat = np.concatenate(estimators.flatten())\n",
+ " est_flat = np.concatenate(estimators.T.flatten())\n",
" times_flat = np.concatenate(times.flatten())\n",
" \n",
" log_fit_coef, fit_coef = plot_with_filter(est_flat, times_flat)\n",
- " plt.savefig(filename)\n",
+ " if filename:\n",
+ " plt.savefig(filename[0])\n",
" \n",
" fit, matmul = get_log_flops_vs_matmul(log_fit_coef)\n",
" \n",
" print('===Results===')\n",
+ " print(f'Total time: {times_flat.sum():.5}')\n",
" print(f'Simulator fitted flops: {fit/1e9:.5} G')\n",
" print(f'Matmul flops: {matmul/1e9:.5} G')\n",
- " print(f'Simulator optimality: {fit/matmul}')"
+ " print(f'Simulator optimality: {fit/matmul}')\n",
+ "\n",
+ "cli.command()(time_vs_flops_plot)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2020-10-11T03:39:58.588851Z",
+ "start_time": "2020-10-11T03:39:12.389753Z"
+ },
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "00c65522a7cd40e4999f31e3da7ba01f",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Selected edges [ 0 2 3 4 10 13 16]\n",
+ "Estimated memories [27262976 1310720 7864320 37748736 11534336 16777216 46137344 436207616\n",
+ " 3145728 83886080 2621440 14680064 13631488 5767168]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/dali/side-projects-hobby/cartesian_explorer/cartesian_explorer/ExplorerBasic.py:72: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+ " result = np.array(list(tqdm(\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6a05b71100b745a3a8b1674130988427",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Lin fit: [2.11670890e-08 6.72474995e-03]\n",
+ "Log fit: [ 1.33334429 -23.18701601]\n",
+ "Slope-1 log fit: [-18.95265756]\n",
+ "===Results===\n",
+ "Total time: 9.8551\n",
+ "Simulator fitted flops: 11.749 G\n",
+ "Matmul flops: 28.388 G\n",
+ "Simulator optimality: 0.41387031429035953\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fbf71d18456449bc8d67fa0bdb46cee5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Selected edges [ 0 2 3 4 10 13 16]\n",
+ "Estimated memories [27262976 1310720 7864320 37748736 11534336 16777216 46137344 436207616\n",
+ " 3145728 83886080 2621440 14680064 13631488 5767168]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/dali/side-projects-hobby/cartesian_explorer/cartesian_explorer/ExplorerBasic.py:72: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+ " result = np.array(list(tqdm(\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7894f510be9e4a9fbc9687a923902b5e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Lin fit: [ 1.61124218e-08 -1.83452513e-03]\n",
+ "Log fit: [ 1.25167296 -22.48619316]\n",
+ "Slope-1 log fit: [-19.28927764]\n",
+ "===Results===\n",
+ "Total time: 6.3469\n",
+ "Simulator fitted flops: 5.8295 G\n",
+ "Matmul flops: 23.118 G\n",
+ "Simulator optimality: 0.2521579552905311\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "time_vs_flops_plot(max_memory=5e8, min_memory=1e6)\n",
+ "time_vs_flops_plot(max_memory=5e8, min_memory=1e6, backend='mkl')"
]
},
{
@@ -838,11 +1424,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T08:24:00.725270Z",
- "start_time": "2020-10-07T08:24:00.720844Z"
+ "end_time": "2020-10-11T03:41:01.528110Z",
+ "start_time": "2020-10-11T03:41:01.526254Z"
}
},
"outputs": [],
@@ -853,11 +1439,11 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 39,
"metadata": {
"ExecuteTime": {
- "end_time": "2020-10-07T11:26:28.836173Z",
- "start_time": "2020-10-07T11:26:28.792839Z"
+ "end_time": "2020-10-11T04:13:46.365119Z",
+ "start_time": "2020-10-11T04:13:46.294641Z"
}
},
"outputs": [
@@ -865,6 +1451,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
+ "Converted QAOA_bench.ipynb.\n",
"Converted Time_vs_FLOP.ipynb.\n"
]
}
@@ -901,7 +1488,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.5"
+ "version": "3.7.6"
},
"toc": {
"base_numbering": 1,
@@ -919,7 +1506,7 @@
"width": "165px"
},
"toc_section_display": true,
- "toc_window_display": true
+ "toc_window_display": false
}
},
"nbformat": 4,
diff --git a/analysis/spec/qtensor_specs/_nbdev.py b/analysis/spec/qtensor_specs/_nbdev.py
index f5881898..560d8c9b 100644
--- a/analysis/spec/qtensor_specs/_nbdev.py
+++ b/analysis/spec/qtensor_specs/_nbdev.py
@@ -12,8 +12,6 @@
"step_flops": "Time_vs_FLOP.ipynb",
"max_mem": "Time_vs_FLOP.ipynb",
"SEED": "Time_vs_FLOP.ipynb",
- "EDGE_IDX_FOR_SEED": "Time_vs_FLOP.ipynb",
- "EDGE_IDX_FOR_SEED_JLSE": "Time_vs_FLOP.ipynb",
"sim_profile": "Time_vs_FLOP.ipynb",
"step_sim_time": "Time_vs_FLOP.ipynb",
"plot_with_filter": "Time_vs_FLOP.ipynb",
diff --git a/analysis/spec/qtensor_specs/time_vs_flop.py b/analysis/spec/qtensor_specs/time_vs_flop.py
index 0c6d16d8..18dbe9c6 100644
--- a/analysis/spec/qtensor_specs/time_vs_flop.py
+++ b/analysis/spec/qtensor_specs/time_vs_flop.py
@@ -1,13 +1,13 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks/Time_vs_FLOP.ipynb (unless otherwise specified).
__all__ = ['ex', 'graph', 'circuit', 'tn', 'peo', 'sim_costs', 'sum_flops', 'step_flops', 'max_mem', 'SEED',
- 'EDGE_IDX_FOR_SEED', 'EDGE_IDX_FOR_SEED_JLSE', 'sim_profile', 'step_sim_time', 'plot_with_filter',
- 'get_log_flops_vs_matmul', 'cli', 'time_vs_flops_plot']
+ 'sim_profile', 'step_sim_time', 'plot_with_filter', 'get_log_flops_vs_matmul', 'cli', 'time_vs_flops_plot']
# Cell
import sys
import numpy as np
import matplotlib.pyplot as plt
+import scipy
import qtensor as qt
from cartesian_explorer import Explorer
@@ -37,15 +37,19 @@ def tn(circuit):
return qt.optimisation.TensorNet.QtreeTensorNet.from_qtree_gates(circuit)
@ex.provider
-def peo(tn):
- opt = qt.optimisation.Optimizer.DefaultOptimizer()
+def peo(tn, ordering_algo='greedy', tamaki_wait_time=15):
+ if ordering_algo=='greedy':
+ opt = qt.optimisation.Optimizer.DefaultOptimizer()
+ elif 'tamaki' in ordering_algo:
+ if '_' in ordering_algo:
+ _, time_str = ordering_algo.split('_')
+ tamaki_wait_time=int(time_str)
+ opt = qt.optimisation.Optimizer.TamakiOptimizer(wait_time=tamaki_wait_time)
peo, _ = opt.optimize(tn)
return tuple(peo)
@ex.provider
def sim_costs(tn, peo):
- opt = qt.optimisation.Optimizer.DefaultOptimizer()
- peo, _ = opt.optimize(tn)
costs, mems = tn.simulation_cost(peo)
return costs, mems
@@ -68,19 +72,16 @@ def max_mem(sim_costs):
# Cell
SEED=107
-# Cell
-EDGE_IDX_FOR_SEED = {
- 107: [2, 3, 10, 15]
-}
-
-EDGE_IDX_FOR_SEED_JLSE = {
- 107: [2, 4, 8, 14, 15, 21]
-}
-
# Cell
@ex.provider
-def sim_profile(circuit, tn):
- backend = qt.PerfNumpyBackend(print=False)
+def sim_profile(circuit, tn, backend='numpy'):
+ if backend == 'numpy':
+ backend = qt.PerfNumpyBackend(print=False)
+ elif backend == 'mkl':
+ backend = qt.ProcessingFrameworks.PerfBackend.from_backend(
+ qt.ProcessingFrameworks.CMKLExtendedBackend, print=False)
+ elif backend == 'debug_mkl':
+ backend = qt.DebugFrameworks.DebugMKLBackend()
sim = qt.QtreeSimulator(bucket_backend=backend)
sim.simulate(circuit)
@@ -95,24 +96,33 @@ def step_sim_time(sim_profile, tn):
# Cell
def plot_with_filter(est_flat, times_flat):
- filt = (est_flat>1e4) #& (times_flat>1e-4)
+ filt = (est_flat>5e4) #& (times_flat>1e-4)
est_flat_filtered = est_flat[filt]
times_flat_filtered = times_flat[filt]
# Fit times
log_fit_coef = np.polyfit(np.log(est_flat_filtered), np.log(times_flat_filtered), 1)
fit_coef = np.polyfit(est_flat_filtered, times_flat_filtered, 1)
+ def fixed_slope(x, shift):
+ slope = 1.0
+ return x*slope + shift
+ popt, pcov = scipy.optimize.curve_fit(fixed_slope, np.log(est_flat_filtered), np.log(times_flat_filtered))
print('Lin fit:', fit_coef)
print('Log fit:', log_fit_coef)
+ print('Slope-1 log fit:', popt)
fit_fn = np.poly1d(log_fit_coef)
+ fit_fn = fixed_slope
# Plot scatter with filtered data
- plt.scatter(est_flat_filtered, times_flat_filtered)
- xfit = 10**np.linspace(4, 7, 100)
- plt.plot(xfit, np.exp(fit_fn(np.log(xfit))), color='blue')
+ plt.scatter(est_flat_filtered, times_flat_filtered, marker='x')
+ min_x = np.log10(est_flat_filtered.min())
+ max_x = np.log10(est_flat_filtered.max()) + .5
+ xfit = 10**np.linspace(min_x, max_x, 100)
+ plt.plot(xfit, np.exp(fit_fn(np.log(xfit), popt[0])), color='blue')
plt.loglog()
plt.xlabel('estimated FLOP')
plt.ylabel('Runtime')
+ plt.grid()
return log_fit_coef, fit_coef
# Cell
@@ -120,7 +130,7 @@ def plot_with_filter(est_flat, times_flat):
def get_log_flops_vs_matmul(log_fit_coef):
FLOPS_logfit = np.exp(-log_fit_coef[1])
- N = 300
+ N = 500
matmul_flop = N**2*(N-1)
x, y = np.random.randn(2, N, N)
number = 100
@@ -138,9 +148,20 @@ def get_log_flops_vs_matmul(log_fit_coef):
def cli():
pass
-@cli.command()
-@click.argument('filename')
-def time_vs_flops_plot(filename):
+@click.argument('filename', nargs=-1)
+@click.option('-B', '--backend', default='numpy')
+@click.option('-M', '--max-memory', default=3e8)
+@click.option('-s', '--seed', default=SEED)
+@click.option('-O', '--ordering_algo', default='greedy'
+ ,help=("One of (greedy, tamaki, tamaki_{wait_time})"
+ "'tamki_15' means heuristic solver running for 15 seconds per graph"
+ )
+ )
+@click.option('--min-memory', default=3e6)
+def time_vs_flops_plot(filename=None, backend='numpy', seed=SEED,
+ max_memory=2e8, min_memory=1e6,
+ ordering_algo='greedy', tamaki_time=10
+ ):
"""
Plots times and estimated FLOP for each step of several QAOA energy computation contractions.
@@ -150,31 +171,46 @@ def time_vs_flops_plot(filename):
- N = 1000
"""
- edge_indices = EDGE_IDX_FOR_SEED[SEED]
ds = [3, 4]
p = 3
N = 1000
- estimators = ex.map_variable('step_flops', d=ds,
- edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])
- maxmems = ex.map_variable('max_mem', d=ds,
- edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])
- if np.max(maxmems)>1e10:
- print('memory estimations:', maxmems)
- raise Exception('Will get too large tetsors!!')
+ edges_to_try = 20
+ estimators, maxmems = ex.map_variables(
+ ('step_flops', 'max_mem'),
+ d=ds,
+ edge_idx=range(edges_to_try), n=[N], p=[p],
+ seed=[seed],
+ ordering_algo=[ordering_algo],
+ )
+
+
+ selector = ((min_memory < maxmems) & (maxmems < max_memory)).all(axis=0)
+ edge_indices = np.arange(edges_to_try)[selector]
+ print('Selected edges', edge_indices)
+ print('Estimated memories', maxmems.T[selector].flatten())
+ estimators = estimators.T[selector]
times = ex.map_variable('step_sim_time', d=ds,
- edge_idx=edge_indices, n=[N], p=[p], seed=[SEED])
+ edge_idx=edge_indices, n=[N], p=[p]
+ ,seed=[seed]
+ ,backend=[backend]
+ ,ordering_algo=[ordering_algo]
+ )
- est_flat = np.concatenate(estimators.flatten())
+ est_flat = np.concatenate(estimators.T.flatten())
times_flat = np.concatenate(times.flatten())
log_fit_coef, fit_coef = plot_with_filter(est_flat, times_flat)
- plt.savefig(filename)
+ if filename:
+ plt.savefig(filename[0])
fit, matmul = get_log_flops_vs_matmul(log_fit_coef)
print('===Results===')
+ print(f'Total time: {times_flat.sum():.5}')
print(f'Simulator fitted flops: {fit/1e9:.5} G')
print(f'Matmul flops: {matmul/1e9:.5} G')
- print(f'Simulator optimality: {fit/matmul}')
\ No newline at end of file
+ print(f'Simulator optimality: {fit/matmul}')
+
+cli.command()(time_vs_flops_plot)
\ No newline at end of file
diff --git a/bench/mklbench/bench.cpp b/bench/mklbench/bench.cpp
index 1ef73012..eb2489e0 100644
--- a/bench/mklbench/bench.cpp
+++ b/bench/mklbench/bench.cpp
@@ -157,29 +157,20 @@ int main(void)
// transa transb M N K
int i;
- run_size(do_trans, no_trans, 4096);
-
-// for (i = 10; i >= -10; i--)
-// run_size(do_trans, no_trans, 4096 + i);
-// for (i = 10; i >= -10; i--)
-// run_size(do_trans, no_trans, 2048 + i);
-// for (i = 10; i >= -10; i--)
-// run_size(do_trans, no_trans, 1024 + i);
-// for (i = 10; i >= -10; i--)
-// run_size(do_trans, no_trans, 256 + i);
-// for (i = 10; i >= -10; i--)
-// run_size(do_trans, no_trans, 128 + i);
-// for (i = 10; i >= -10; i--)
-// run_size(do_trans, no_trans, 32 + i);
-//
-// for (i = 4096; i >= 512; i -= 256)
-// run_size(no_trans, no_trans, i, i, i);
-//
-// for (i = 512; i >= 64; i -= 32)
-// run_size(no_trans, no_trans, i, i, i);
-
-// for (i = 64; i >= 16; i -= 1)
-// run_size(no_trans, no_trans, i, i, i);
+ //run_size(no_trans, do_trans, 4096, 4096, 4096);
+ run_size(no_trans, do_trans, 4096, 1, 4096);
+ //run_size(no_trans, do_trans, 1000, 1000, 1000);
+ run_size(no_trans, do_trans, 1000, 1, 1000);
+
+ // for (i = 4096; i >= 512; i -= 256)
+ // run_size(no_trans, no_trans, i, i, i);
+
+ //for (i = 512; i >= 64; i -= 32)
+ // run_size(no_trans, no_trans, i, i, i);
+
+ //for (i = 64; i >= 16; i -= 1)
+ // run_size(no_trans, no_trans, i, i, i);
+
return EXIT_SUCCESS;
}
diff --git a/bench/pybench/tn_bench.py b/bench/pybench/tn_bench.py
index ec728b68..8f2617e3 100644
--- a/bench/pybench/tn_bench.py
+++ b/bench/pybench/tn_bench.py
@@ -29,6 +29,6 @@ def run(n, num_iter, num_batch):
if __name__ == "__main__":
tn.set_default_backend(sys.argv[1])
- for i in range(4102, 4090 - 1, -1):
- run(i, 20, 1)
+ for i in range(4120, 4082 - 2, -2):
+ run(i, 10, 1)
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 00000000..914b9cc8
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1 @@
+debugging backend and mkl_verbose add 3 seconds to a 18 senond task
diff --git a/data/debug_log_matched.log b/data/debug_log_matched.log
new file mode 100644
index 00000000..61efd8e7
--- /dev/null
+++ b/data/debug_log_matched.log
@@ -0,0 +1,1181 @@
+==
+PROF:: Bucket contains: [XPhase+(v_1827,v_1828), E162(v_1827), E1817(v_1827,v_1828,v_1829,v_1830,v_1831,v_1832,v_1833,v_1834,v_1835,v_1836,v_1837,v_1839,v_1840,v_1843,v_1844,v_1849,v_1856,v_1860), E1820(v_1827,v_1828,v_1829,v_1830,v_1831,v_1832,v_1833,v_1834,v_1835,v_1836,v_1837,v_1842,v_1847,v_1851,v_1852,v_1855,v_1859,v_1861,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract, input sizes: 4 2 output: 4
+ * Dimensions: C[0]:2 C[1]:2 C[2]:1
+ MKL_VERBOSE ZGEMM(N,T,1,2,1,0x7ffc3fd2db50,0x4c0f190,1,0x4d90500,2,0x7ffc3fd2db60,0x4d8ff30,1) 5.28us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1,2,1,0x7ffc3fd2db50,0x4c0f1a0,1,0x4d90520,2,0x7ffc3fd2db60,0x4d8ff50,1) 253ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 0 milliseconds
+ After debug_mkl_contract, duration: 7.963180541992188e-05
+ Starting debug_mkl_contract, input sizes: 4 262144 output: 262144
+ Dimensions: C[0]:4 C[1]:1 C[2]:65536
+ MKL_VERBOSE ZGEMM(N,T,65536,1,1,0x7ffc3fd2db50,0xaea0390,65536,0x4d8ff30,1,0x7ffc3fd2db60,0xb2a03a0,65536) 127.61us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,65536,1,1,0x7ffc3fd2db50,0xafa0390,65536,0x4d8ff40,1,0x7ffc3fd2db60,0xb3a03a0,65536) 740.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,65536,1,1,0x7ffc3fd2db50,0xb0a0390,65536,0x4d8ff50,1,0x7ffc3fd2db60,0xb4a03a0,65536) 55.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,65536,1,1,0x7ffc3fd2db50,0xb1a0390,65536,0x4d8ff60,1,0x7ffc3fd2db60,0xb5a03a0,65536) 180.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 1 milliseconds
+ After debug_mkl_contract, duration: 0.001222372055053711
+ Starting debug_mkl_contract_sum, input sizes: 262144 2097152 output: 134217728
+ * Dimensions: f:1024 k:2 n:1024 m:128
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5dfff010,1048576,0xb2a03a0,131072,0x7ffc3fd2dde0,0x7f58dbff5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e003010,1048576,0xb2a0ba0,131072,0x7ffc3fd2dde0,0x7f58dc1f5010,1024) 958.32us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e007010,1048576,0xb2a13a0,131072,0x7ffc3fd2dde0,0x7f58dc3f5010,1024) 942.88us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e00b010,1048576,0xb2a1ba0,131072,0x7ffc3fd2dde0,0x7f58dc5f5010,1024) 957.45us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e00f010,1048576,0xb2a23a0,131072,0x7ffc3fd2dde0,0x7f58dc7f5010,1024) 928.45us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e013010,1048576,0xb2a2ba0,131072,0x7ffc3fd2dde0,0x7f58dc9f5010,1024) 903.97us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e017010,1048576,0xb2a33a0,131072,0x7ffc3fd2dde0,0x7f58dcbf5010,1024) 901.50us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e01b010,1048576,0xb2a3ba0,131072,0x7ffc3fd2dde0,0x7f58dcdf5010,1024) 886.54us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e01f010,1048576,0xb2a43a0,131072,0x7ffc3fd2dde0,0x7f58dcff5010,1024) 968.97us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e023010,1048576,0xb2a4ba0,131072,0x7ffc3fd2dde0,0x7f58dd1f5010,1024) 903.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e027010,1048576,0xb2a53a0,131072,0x7ffc3fd2dde0,0x7f58dd3f5010,1024) 911.73us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e02b010,1048576,0xb2a5ba0,131072,0x7ffc3fd2dde0,0x7f58dd5f5010,1024) 927.15us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e02f010,1048576,0xb2a63a0,131072,0x7ffc3fd2dde0,0x7f58dd7f5010,1024) 913.26us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e033010,1048576,0xb2a6ba0,131072,0x7ffc3fd2dde0,0x7f58dd9f5010,1024) 914.10us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e037010,1048576,0xb2a73a0,131072,0x7ffc3fd2dde0,0x7f58ddbf5010,1024) 912.27us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e03b010,1048576,0xb2a7ba0,131072,0x7ffc3fd2dde0,0x7f58dddf5010,1024) 920.12us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e03f010,1048576,0xb2a83a0,131072,0x7ffc3fd2dde0,0x7f58ddff5010,1024) 917.93us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e043010,1048576,0xb2a8ba0,131072,0x7ffc3fd2dde0,0x7f58de1f5010,1024) 927.94us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e047010,1048576,0xb2a93a0,131072,0x7ffc3fd2dde0,0x7f58de3f5010,1024) 919.64us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e04b010,1048576,0xb2a9ba0,131072,0x7ffc3fd2dde0,0x7f58de5f5010,1024) 987.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e04f010,1048576,0xb2aa3a0,131072,0x7ffc3fd2dde0,0x7f58de7f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e053010,1048576,0xb2aaba0,131072,0x7ffc3fd2dde0,0x7f58de9f5010,1024) 950.49us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e057010,1048576,0xb2ab3a0,131072,0x7ffc3fd2dde0,0x7f58debf5010,1024) 936.80us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e05b010,1048576,0xb2abba0,131072,0x7ffc3fd2dde0,0x7f58dedf5010,1024) 913.59us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e05f010,1048576,0xb2ac3a0,131072,0x7ffc3fd2dde0,0x7f58deff5010,1024) 961.82us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e063010,1048576,0xb2acba0,131072,0x7ffc3fd2dde0,0x7f58df1f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e067010,1048576,0xb2ad3a0,131072,0x7ffc3fd2dde0,0x7f58df3f5010,1024) 886.38us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e06b010,1048576,0xb2adba0,131072,0x7ffc3fd2dde0,0x7f58df5f5010,1024) 854.98us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e06f010,1048576,0xb2ae3a0,131072,0x7ffc3fd2dde0,0x7f58df7f5010,1024) 833.66us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e073010,1048576,0xb2aeba0,131072,0x7ffc3fd2dde0,0x7f58df9f5010,1024) 837.83us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e077010,1048576,0xb2af3a0,131072,0x7ffc3fd2dde0,0x7f58dfbf5010,1024) 847.38us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e07b010,1048576,0xb2afba0,131072,0x7ffc3fd2dde0,0x7f58dfdf5010,1024) 844.66us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e07f010,1048576,0xb2b03a0,131072,0x7ffc3fd2dde0,0x7f58dfff5010,1024) 846.41us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e083010,1048576,0xb2b0ba0,131072,0x7ffc3fd2dde0,0x7f58e01f5010,1024) 840.42us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e087010,1048576,0xb2b13a0,131072,0x7ffc3fd2dde0,0x7f58e03f5010,1024) 855.80us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e08b010,1048576,0xb2b1ba0,131072,0x7ffc3fd2dde0,0x7f58e05f5010,1024) 871.87us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e08f010,1048576,0xb2b23a0,131072,0x7ffc3fd2dde0,0x7f58e07f5010,1024) 876.54us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e093010,1048576,0xb2b2ba0,131072,0x7ffc3fd2dde0,0x7f58e09f5010,1024) 926.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e097010,1048576,0xb2b33a0,131072,0x7ffc3fd2dde0,0x7f58e0bf5010,1024) 927.38us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e09b010,1048576,0xb2b3ba0,131072,0x7ffc3fd2dde0,0x7f58e0df5010,1024) 964.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e09f010,1048576,0xb2b43a0,131072,0x7ffc3fd2dde0,0x7f58e0ff5010,1024) 975.92us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e0a3010,1048576,0xb2b4ba0,131072,0x7ffc3fd2dde0,0x7f58e11f5010,1024) 936.81us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e0a7010,1048576,0xb2b53a0,131072,0x7ffc3fd2dde0,0x7f58e13f5010,1024) 939.43us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e0ab010,1048576,0xb2b5ba0,131072,0x7ffc3fd2dde0,0x7f58e15f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [ ... and 200 lines like above...]
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3cf010,1048576,0xb31a3a0,131072,0x7ffc3fd2dde0,0x7f58fa7f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3d3010,1048576,0xb31aba0,131072,0x7ffc3fd2dde0,0x7f58fa9f5010,1024) 995.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3d7010,1048576,0xb31b3a0,131072,0x7ffc3fd2dde0,0x7f58fabf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3db010,1048576,0xb31bba0,131072,0x7ffc3fd2dde0,0x7f58fadf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3df010,1048576,0xb31c3a0,131072,0x7ffc3fd2dde0,0x7f58faff5010,1024) 943.77us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3e3010,1048576,0xb31cba0,131072,0x7ffc3fd2dde0,0x7f58fb1f5010,1024) 991.87us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3e7010,1048576,0xb31d3a0,131072,0x7ffc3fd2dde0,0x7f58fb3f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3eb010,1048576,0xb31dba0,131072,0x7ffc3fd2dde0,0x7f58fb5f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3ef010,1048576,0xb31e3a0,131072,0x7ffc3fd2dde0,0x7f58fb7f5010,1024) 925.15us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3f3010,1048576,0xb31eba0,131072,0x7ffc3fd2dde0,0x7f58fb9f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3f7010,1048576,0xb31f3a0,131072,0x7ffc3fd2dde0,0x7f58fbbf5010,1024) 951.69us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3fb010,1048576,0xb31fba0,131072,0x7ffc3fd2dde0,0x7f58fbdf5010,1024) 988.33us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e3ff010,1048576,0xb3203a0,131072,0x7ffc3fd2dde0,0x7f58fbff5010,1024) 959.36us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e403010,1048576,0xb320ba0,131072,0x7ffc3fd2dde0,0x7f58fc1f5010,1024) 945.27us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e407010,1048576,0xb3213a0,131072,0x7ffc3fd2dde0,0x7f58fc3f5010,1024) 973.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e40b010,1048576,0xb321ba0,131072,0x7ffc3fd2dde0,0x7f58fc5f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e40f010,1048576,0xb3223a0,131072,0x7ffc3fd2dde0,0x7f58fc7f5010,1024) 986.68us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e413010,1048576,0xb322ba0,131072,0x7ffc3fd2dde0,0x7f58fc9f5010,1024) 982.35us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e417010,1048576,0xb3233a0,131072,0x7ffc3fd2dde0,0x7f58fcbf5010,1024) 973.71us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e41b010,1048576,0xb323ba0,131072,0x7ffc3fd2dde0,0x7f58fcdf5010,1024) 959.75us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e41f010,1048576,0xb3243a0,131072,0x7ffc3fd2dde0,0x7f58fcff5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e423010,1048576,0xb324ba0,131072,0x7ffc3fd2dde0,0x7f58fd1f5010,1024) 981.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e427010,1048576,0xb3253a0,131072,0x7ffc3fd2dde0,0x7f58fd3f5010,1024) 964.30us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e42b010,1048576,0xb325ba0,131072,0x7ffc3fd2dde0,0x7f58fd5f5010,1024) 945.46us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e42f010,1048576,0xb3263a0,131072,0x7ffc3fd2dde0,0x7f58fd7f5010,1024) 938.79us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e433010,1048576,0xb326ba0,131072,0x7ffc3fd2dde0,0x7f58fd9f5010,1024) 962.15us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e437010,1048576,0xb3273a0,131072,0x7ffc3fd2dde0,0x7f58fdbf5010,1024) 954.58us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e43b010,1048576,0xb327ba0,131072,0x7ffc3fd2dde0,0x7f58fddf5010,1024) 928.71us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e43f010,1048576,0xb3283a0,131072,0x7ffc3fd2dde0,0x7f58fdff5010,1024) 943.96us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e443010,1048576,0xb328ba0,131072,0x7ffc3fd2dde0,0x7f58fe1f5010,1024) 975.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e447010,1048576,0xb3293a0,131072,0x7ffc3fd2dde0,0x7f58fe3f5010,1024) 965.79us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e44b010,1048576,0xb329ba0,131072,0x7ffc3fd2dde0,0x7f58fe5f5010,1024) 970.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e44f010,1048576,0xb32a3a0,131072,0x7ffc3fd2dde0,0x7f58fe7f5010,1024) 942.79us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e453010,1048576,0xb32aba0,131072,0x7ffc3fd2dde0,0x7f58fe9f5010,1024) 944.25us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e457010,1048576,0xb32b3a0,131072,0x7ffc3fd2dde0,0x7f58febf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e45b010,1048576,0xb32bba0,131072,0x7ffc3fd2dde0,0x7f58fedf5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e45f010,1048576,0xb32c3a0,131072,0x7ffc3fd2dde0,0x7f58feff5010,1024) 958.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e463010,1048576,0xb32cba0,131072,0x7ffc3fd2dde0,0x7f58ff1f5010,1024) 955.83us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e467010,1048576,0xb32d3a0,131072,0x7ffc3fd2dde0,0x7f58ff3f5010,1024) 959.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e46b010,1048576,0xb32dba0,131072,0x7ffc3fd2dde0,0x7f58ff5f5010,1024) 950.82us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e46f010,1048576,0xb32e3a0,131072,0x7ffc3fd2dde0,0x7f58ff7f5010,1024) 968.35us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e473010,1048576,0xb32eba0,131072,0x7ffc3fd2dde0,0x7f58ff9f5010,1024) 955.30us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e477010,1048576,0xb32f3a0,131072,0x7ffc3fd2dde0,0x7f58ffbf5010,1024) 943.74us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e47b010,1048576,0xb32fba0,131072,0x7ffc3fd2dde0,0x7f58ffdf5010,1024) 931.45us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e47f010,1048576,0xb3303a0,131072,0x7ffc3fd2dde0,0x7f58ffff5010,1024) 986.62us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e483010,1048576,0xb330ba0,131072,0x7ffc3fd2dde0,0x7f59001f5010,1024) 977.91us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e487010,1048576,0xb3313a0,131072,0x7ffc3fd2dde0,0x7f59003f5010,1024) 946.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e48b010,1048576,0xb331ba0,131072,0x7ffc3fd2dde0,0x7f59005f5010,1024) 946.69us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e48f010,1048576,0xb3323a0,131072,0x7ffc3fd2dde0,0x7f59007f5010,1024) 946.97us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e493010,1048576,0xb332ba0,131072,0x7ffc3fd2dde0,0x7f59009f5010,1024) 956.75us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e497010,1048576,0xb3333a0,131072,0x7ffc3fd2dde0,0x7f5900bf5010,1024) 941.58us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e49b010,1048576,0xb333ba0,131072,0x7ffc3fd2dde0,0x7f5900df5010,1024) 928.58us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e49f010,1048576,0xb3343a0,131072,0x7ffc3fd2dde0,0x7f5900ff5010,1024) 912.98us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4a3010,1048576,0xb334ba0,131072,0x7ffc3fd2dde0,0x7f59011f5010,1024) 931.40us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4a7010,1048576,0xb3353a0,131072,0x7ffc3fd2dde0,0x7f59013f5010,1024) 914.45us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4ab010,1048576,0xb335ba0,131072,0x7ffc3fd2dde0,0x7f59015f5010,1024) 929.06us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4af010,1048576,0xb3363a0,131072,0x7ffc3fd2dde0,0x7f59017f5010,1024) 917.62us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4b3010,1048576,0xb336ba0,131072,0x7ffc3fd2dde0,0x7f59019f5010,1024) 931.48us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4b7010,1048576,0xb3373a0,131072,0x7ffc3fd2dde0,0x7f5901bf5010,1024) 901.58us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4bb010,1048576,0xb337ba0,131072,0x7ffc3fd2dde0,0x7f5901df5010,1024) 928.18us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4bf010,1048576,0xb3383a0,131072,0x7ffc3fd2dde0,0x7f5901ff5010,1024) 929.69us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4c3010,1048576,0xb338ba0,131072,0x7ffc3fd2dde0,0x7f59021f5010,1024) 929.45us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4c7010,1048576,0xb3393a0,131072,0x7ffc3fd2dde0,0x7f59023f5010,1024) 968.94us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4cb010,1048576,0xb339ba0,131072,0x7ffc3fd2dde0,0x7f59025f5010,1024) 979.51us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4cf010,1048576,0xb33a3a0,131072,0x7ffc3fd2dde0,0x7f59027f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4d3010,1048576,0xb33aba0,131072,0x7ffc3fd2dde0,0x7f59029f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4d7010,1048576,0xb33b3a0,131072,0x7ffc3fd2dde0,0x7f5902bf5010,1024) 994.58us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4db010,1048576,0xb33bba0,131072,0x7ffc3fd2dde0,0x7f5902df5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4df010,1048576,0xb33c3a0,131072,0x7ffc3fd2dde0,0x7f5902ff5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4e3010,1048576,0xb33cba0,131072,0x7ffc3fd2dde0,0x7f59031f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4e7010,1048576,0xb33d3a0,131072,0x7ffc3fd2dde0,0x7f59033f5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4eb010,1048576,0xb33dba0,131072,0x7ffc3fd2dde0,0x7f59035f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4ef010,1048576,0xb33e3a0,131072,0x7ffc3fd2dde0,0x7f59037f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4f3010,1048576,0xb33eba0,131072,0x7ffc3fd2dde0,0x7f59039f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4f7010,1048576,0xb33f3a0,131072,0x7ffc3fd2dde0,0x7f5903bf5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4fb010,1048576,0xb33fba0,131072,0x7ffc3fd2dde0,0x7f5903df5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e4ff010,1048576,0xb3403a0,131072,0x7ffc3fd2dde0,0x7f5903ff5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e503010,1048576,0xb340ba0,131072,0x7ffc3fd2dde0,0x7f59041f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e507010,1048576,0xb3413a0,131072,0x7ffc3fd2dde0,0x7f59043f5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e50b010,1048576,0xb341ba0,131072,0x7ffc3fd2dde0,0x7f59045f5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e50f010,1048576,0xb3423a0,131072,0x7ffc3fd2dde0,0x7f59047f5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e513010,1048576,0xb342ba0,131072,0x7ffc3fd2dde0,0x7f59049f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e517010,1048576,0xb3433a0,131072,0x7ffc3fd2dde0,0x7f5904bf5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e51b010,1048576,0xb343ba0,131072,0x7ffc3fd2dde0,0x7f5904df5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e51f010,1048576,0xb3443a0,131072,0x7ffc3fd2dde0,0x7f5904ff5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e523010,1048576,0xb344ba0,131072,0x7ffc3fd2dde0,0x7f59051f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e527010,1048576,0xb3453a0,131072,0x7ffc3fd2dde0,0x7f59053f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e52b010,1048576,0xb345ba0,131072,0x7ffc3fd2dde0,0x7f59055f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e52f010,1048576,0xb3463a0,131072,0x7ffc3fd2dde0,0x7f59057f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e533010,1048576,0xb346ba0,131072,0x7ffc3fd2dde0,0x7f59059f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e537010,1048576,0xb3473a0,131072,0x7ffc3fd2dde0,0x7f5905bf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e53b010,1048576,0xb347ba0,131072,0x7ffc3fd2dde0,0x7f5905df5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e53f010,1048576,0xb3483a0,131072,0x7ffc3fd2dde0,0x7f5905ff5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e543010,1048576,0xb348ba0,131072,0x7ffc3fd2dde0,0x7f59061f5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e547010,1048576,0xb3493a0,131072,0x7ffc3fd2dde0,0x7f59063f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e54b010,1048576,0xb349ba0,131072,0x7ffc3fd2dde0,0x7f59065f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e54f010,1048576,0xb34a3a0,131072,0x7ffc3fd2dde0,0x7f59067f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e553010,1048576,0xb34aba0,131072,0x7ffc3fd2dde0,0x7f59069f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e557010,1048576,0xb34b3a0,131072,0x7ffc3fd2dde0,0x7f5906bf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e55b010,1048576,0xb34bba0,131072,0x7ffc3fd2dde0,0x7f5906df5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e55f010,1048576,0xb34c3a0,131072,0x7ffc3fd2dde0,0x7f5906ff5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e563010,1048576,0xb34cba0,131072,0x7ffc3fd2dde0,0x7f59071f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e567010,1048576,0xb34d3a0,131072,0x7ffc3fd2dde0,0x7f59073f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e56b010,1048576,0xb34dba0,131072,0x7ffc3fd2dde0,0x7f59075f5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e56f010,1048576,0xb34e3a0,131072,0x7ffc3fd2dde0,0x7f59077f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e573010,1048576,0xb34eba0,131072,0x7ffc3fd2dde0,0x7f59079f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e577010,1048576,0xb34f3a0,131072,0x7ffc3fd2dde0,0x7f5907bf5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e57b010,1048576,0xb34fba0,131072,0x7ffc3fd2dde0,0x7f5907df5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e57f010,1048576,0xb3503a0,131072,0x7ffc3fd2dde0,0x7f5907ff5010,1024) 979.51us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e583010,1048576,0xb350ba0,131072,0x7ffc3fd2dde0,0x7f59081f5010,1024) 993.47us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e587010,1048576,0xb3513a0,131072,0x7ffc3fd2dde0,0x7f59083f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e58b010,1048576,0xb351ba0,131072,0x7ffc3fd2dde0,0x7f59085f5010,1024) 988.89us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e58f010,1048576,0xb3523a0,131072,0x7ffc3fd2dde0,0x7f59087f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e593010,1048576,0xb352ba0,131072,0x7ffc3fd2dde0,0x7f59089f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e597010,1048576,0xb3533a0,131072,0x7ffc3fd2dde0,0x7f5908bf5010,1024) 995.65us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e59b010,1048576,0xb353ba0,131072,0x7ffc3fd2dde0,0x7f5908df5010,1024) 991.90us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e59f010,1048576,0xb3543a0,131072,0x7ffc3fd2dde0,0x7f5908ff5010,1024) 995.89us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5a3010,1048576,0xb354ba0,131072,0x7ffc3fd2dde0,0x7f59091f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5a7010,1048576,0xb3553a0,131072,0x7ffc3fd2dde0,0x7f59093f5010,1024) 995.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5ab010,1048576,0xb355ba0,131072,0x7ffc3fd2dde0,0x7f59095f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5af010,1048576,0xb3563a0,131072,0x7ffc3fd2dde0,0x7f59097f5010,1024) 983.48us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5b3010,1048576,0xb356ba0,131072,0x7ffc3fd2dde0,0x7f59099f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5b7010,1048576,0xb3573a0,131072,0x7ffc3fd2dde0,0x7f5909bf5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5bb010,1048576,0xb357ba0,131072,0x7ffc3fd2dde0,0x7f5909df5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5bf010,1048576,0xb3583a0,131072,0x7ffc3fd2dde0,0x7f5909ff5010,1024) 998.35us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5c3010,1048576,0xb358ba0,131072,0x7ffc3fd2dde0,0x7f590a1f5010,1024) 988.64us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5c7010,1048576,0xb3593a0,131072,0x7ffc3fd2dde0,0x7f590a3f5010,1024) 957.56us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5cb010,1048576,0xb359ba0,131072,0x7ffc3fd2dde0,0x7f590a5f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5cf010,1048576,0xb35a3a0,131072,0x7ffc3fd2dde0,0x7f590a7f5010,1024) 997.89us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5d3010,1048576,0xb35aba0,131072,0x7ffc3fd2dde0,0x7f590a9f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5d7010,1048576,0xb35b3a0,131072,0x7ffc3fd2dde0,0x7f590abf5010,1024) 980.69us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5db010,1048576,0xb35bba0,131072,0x7ffc3fd2dde0,0x7f590adf5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5df010,1048576,0xb35c3a0,131072,0x7ffc3fd2dde0,0x7f590aff5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5e3010,1048576,0xb35cba0,131072,0x7ffc3fd2dde0,0x7f590b1f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5e7010,1048576,0xb35d3a0,131072,0x7ffc3fd2dde0,0x7f590b3f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5eb010,1048576,0xb35dba0,131072,0x7ffc3fd2dde0,0x7f590b5f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5ef010,1048576,0xb35e3a0,131072,0x7ffc3fd2dde0,0x7f590b7f5010,1024) 997.80us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5f3010,1048576,0xb35eba0,131072,0x7ffc3fd2dde0,0x7f590b9f5010,1024) 961.92us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5f7010,1048576,0xb35f3a0,131072,0x7ffc3fd2dde0,0x7f590bbf5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5fb010,1048576,0xb35fba0,131072,0x7ffc3fd2dde0,0x7f590bdf5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e5ff010,1048576,0xb3603a0,131072,0x7ffc3fd2dde0,0x7f590bff5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e603010,1048576,0xb360ba0,131072,0x7ffc3fd2dde0,0x7f590c1f5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e607010,1048576,0xb3613a0,131072,0x7ffc3fd2dde0,0x7f590c3f5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e60b010,1048576,0xb361ba0,131072,0x7ffc3fd2dde0,0x7f590c5f5010,1024) 993.51us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e60f010,1048576,0xb3623a0,131072,0x7ffc3fd2dde0,0x7f590c7f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e613010,1048576,0xb362ba0,131072,0x7ffc3fd2dde0,0x7f590c9f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e617010,1048576,0xb3633a0,131072,0x7ffc3fd2dde0,0x7f590cbf5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e61b010,1048576,0xb363ba0,131072,0x7ffc3fd2dde0,0x7f590cdf5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e61f010,1048576,0xb3643a0,131072,0x7ffc3fd2dde0,0x7f590cff5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e623010,1048576,0xb364ba0,131072,0x7ffc3fd2dde0,0x7f590d1f5010,1024) 988.68us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e627010,1048576,0xb3653a0,131072,0x7ffc3fd2dde0,0x7f590d3f5010,1024) 978.39us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e62b010,1048576,0xb365ba0,131072,0x7ffc3fd2dde0,0x7f590d5f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e62f010,1048576,0xb3663a0,131072,0x7ffc3fd2dde0,0x7f590d7f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e633010,1048576,0xb366ba0,131072,0x7ffc3fd2dde0,0x7f590d9f5010,1024) 972.30us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e637010,1048576,0xb3673a0,131072,0x7ffc3fd2dde0,0x7f590dbf5010,1024) 994.98us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e63b010,1048576,0xb367ba0,131072,0x7ffc3fd2dde0,0x7f590ddf5010,1024) 991.37us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e63f010,1048576,0xb3683a0,131072,0x7ffc3fd2dde0,0x7f590dff5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e643010,1048576,0xb368ba0,131072,0x7ffc3fd2dde0,0x7f590e1f5010,1024) 968.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e647010,1048576,0xb3693a0,131072,0x7ffc3fd2dde0,0x7f590e3f5010,1024) 996.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e64b010,1048576,0xb369ba0,131072,0x7ffc3fd2dde0,0x7f590e5f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e64f010,1048576,0xb36a3a0,131072,0x7ffc3fd2dde0,0x7f590e7f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e653010,1048576,0xb36aba0,131072,0x7ffc3fd2dde0,0x7f590e9f5010,1024) 993.95us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e657010,1048576,0xb36b3a0,131072,0x7ffc3fd2dde0,0x7f590ebf5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e65b010,1048576,0xb36bba0,131072,0x7ffc3fd2dde0,0x7f590edf5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e65f010,1048576,0xb36c3a0,131072,0x7ffc3fd2dde0,0x7f590eff5010,1024) 999.36us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e663010,1048576,0xb36cba0,131072,0x7ffc3fd2dde0,0x7f590f1f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e667010,1048576,0xb36d3a0,131072,0x7ffc3fd2dde0,0x7f590f3f5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e66b010,1048576,0xb36dba0,131072,0x7ffc3fd2dde0,0x7f590f5f5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e66f010,1048576,0xb36e3a0,131072,0x7ffc3fd2dde0,0x7f590f7f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e673010,1048576,0xb36eba0,131072,0x7ffc3fd2dde0,0x7f590f9f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e677010,1048576,0xb36f3a0,131072,0x7ffc3fd2dde0,0x7f590fbf5010,1024) 999.73us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e67b010,1048576,0xb36fba0,131072,0x7ffc3fd2dde0,0x7f590fdf5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e67f010,1048576,0xb3703a0,131072,0x7ffc3fd2dde0,0x7f590fff5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e683010,1048576,0xb370ba0,131072,0x7ffc3fd2dde0,0x7f59101f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e687010,1048576,0xb3713a0,131072,0x7ffc3fd2dde0,0x7f59103f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e68b010,1048576,0xb371ba0,131072,0x7ffc3fd2dde0,0x7f59105f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e68f010,1048576,0xb3723a0,131072,0x7ffc3fd2dde0,0x7f59107f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e693010,1048576,0xb372ba0,131072,0x7ffc3fd2dde0,0x7f59109f5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e697010,1048576,0xb3733a0,131072,0x7ffc3fd2dde0,0x7f5910bf5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e69b010,1048576,0xb373ba0,131072,0x7ffc3fd2dde0,0x7f5910df5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e69f010,1048576,0xb3743a0,131072,0x7ffc3fd2dde0,0x7f5910ff5010,1024) 1.05ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6a3010,1048576,0xb374ba0,131072,0x7ffc3fd2dde0,0x7f59111f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6a7010,1048576,0xb3753a0,131072,0x7ffc3fd2dde0,0x7f59113f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6ab010,1048576,0xb375ba0,131072,0x7ffc3fd2dde0,0x7f59115f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6af010,1048576,0xb3763a0,131072,0x7ffc3fd2dde0,0x7f59117f5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6b3010,1048576,0xb376ba0,131072,0x7ffc3fd2dde0,0x7f59119f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6b7010,1048576,0xb3773a0,131072,0x7ffc3fd2dde0,0x7f5911bf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6bb010,1048576,0xb377ba0,131072,0x7ffc3fd2dde0,0x7f5911df5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6bf010,1048576,0xb3783a0,131072,0x7ffc3fd2dde0,0x7f5911ff5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6c3010,1048576,0xb378ba0,131072,0x7ffc3fd2dde0,0x7f59121f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6c7010,1048576,0xb3793a0,131072,0x7ffc3fd2dde0,0x7f59123f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6cb010,1048576,0xb379ba0,131072,0x7ffc3fd2dde0,0x7f59125f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6cf010,1048576,0xb37a3a0,131072,0x7ffc3fd2dde0,0x7f59127f5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6d3010,1048576,0xb37aba0,131072,0x7ffc3fd2dde0,0x7f59129f5010,1024) 1.06ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6d7010,1048576,0xb37b3a0,131072,0x7ffc3fd2dde0,0x7f5912bf5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6db010,1048576,0xb37bba0,131072,0x7ffc3fd2dde0,0x7f5912df5010,1024) 1.01ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6df010,1048576,0xb37c3a0,131072,0x7ffc3fd2dde0,0x7f5912ff5010,1024) 1.03ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6e3010,1048576,0xb37cba0,131072,0x7ffc3fd2dde0,0x7f59131f5010,1024) 1.02ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6e7010,1048576,0xb37d3a0,131072,0x7ffc3fd2dde0,0x7f59133f5010,1024) 1.04ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6eb010,1048576,0xb37dba0,131072,0x7ffc3fd2dde0,0x7f59135f5010,1024) 997.57us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6ef010,1048576,0xb37e3a0,131072,0x7ffc3fd2dde0,0x7f59137f5010,1024) 983.01us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6f3010,1048576,0xb37eba0,131072,0x7ffc3fd2dde0,0x7f59139f5010,1024) 979.34us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6f7010,1048576,0xb37f3a0,131072,0x7ffc3fd2dde0,0x7f5913bf5010,1024) 989.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6fb010,1048576,0xb37fba0,131072,0x7ffc3fd2dde0,0x7f5913df5010,1024) 998.51us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e6ff010,1048576,0xb3803a0,131072,0x7ffc3fd2dde0,0x7f5913ff5010,1024) 997.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e703010,1048576,0xb380ba0,131072,0x7ffc3fd2dde0,0x7f59141f5010,1024) 987.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e707010,1048576,0xb3813a0,131072,0x7ffc3fd2dde0,0x7f59143f5010,1024) 986.73us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e70b010,1048576,0xb381ba0,131072,0x7ffc3fd2dde0,0x7f59145f5010,1024) 995.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e70f010,1048576,0xb3823a0,131072,0x7ffc3fd2dde0,0x7f59147f5010,1024) 978.82us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e713010,1048576,0xb382ba0,131072,0x7ffc3fd2dde0,0x7f59149f5010,1024) 992.72us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e717010,1048576,0xb3833a0,131072,0x7ffc3fd2dde0,0x7f5914bf5010,1024) 1.00ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e71b010,1048576,0xb383ba0,131072,0x7ffc3fd2dde0,0x7f5914df5010,1024) 993.64us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e71f010,1048576,0xb3843a0,131072,0x7ffc3fd2dde0,0x7f5914ff5010,1024) 997.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e723010,1048576,0xb384ba0,131072,0x7ffc3fd2dde0,0x7f59151f5010,1024) 986.47us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e727010,1048576,0xb3853a0,131072,0x7ffc3fd2dde0,0x7f59153f5010,1024) 995.57us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e72b010,1048576,0xb385ba0,131072,0x7ffc3fd2dde0,0x7f59155f5010,1024) 981.23us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e72f010,1048576,0xb3863a0,131072,0x7ffc3fd2dde0,0x7f59157f5010,1024) 956.03us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e733010,1048576,0xb386ba0,131072,0x7ffc3fd2dde0,0x7f59159f5010,1024) 957.57us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e737010,1048576,0xb3873a0,131072,0x7ffc3fd2dde0,0x7f5915bf5010,1024) 981.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e73b010,1048576,0xb387ba0,131072,0x7ffc3fd2dde0,0x7f5915df5010,1024) 990.95us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e73f010,1048576,0xb3883a0,131072,0x7ffc3fd2dde0,0x7f5915ff5010,1024) 953.31us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e743010,1048576,0xb388ba0,131072,0x7ffc3fd2dde0,0x7f59161f5010,1024) 948.35us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e747010,1048576,0xb3893a0,131072,0x7ffc3fd2dde0,0x7f59163f5010,1024) 991.57us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e74b010,1048576,0xb389ba0,131072,0x7ffc3fd2dde0,0x7f59165f5010,1024) 983.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e74f010,1048576,0xb38a3a0,131072,0x7ffc3fd2dde0,0x7f59167f5010,1024) 949.39us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e753010,1048576,0xb38aba0,131072,0x7ffc3fd2dde0,0x7f59169f5010,1024) 904.40us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e757010,1048576,0xb38b3a0,131072,0x7ffc3fd2dde0,0x7f5916bf5010,1024) 907.30us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5e75b010,1048576,0xb38bba0,131072,0x7ffc3fd2dde0,0x7f5916df5010,1024) 924.83us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... and 550 lines like above...]
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5eff7010,1048576,0xb49f3a0,131072,0x7ffc3fd2dde0,0x7f595bbf5010,1024) 960.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,128,2,0x7ffc3fd2ddd0,0x7f5a5effb010,1048576,0xb49fba0,131072,0x7ffc3fd2dde0,0x7f595bdf5010,1024) 341.73us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ * Duration: 1003 milliseconds
+* After debug_mkl_contract_sum, duration: 1.003293752670288
+PROF:: perf data process bucket time: 1.0074033737182617
+==
+
+PROF:: Bucket contains: [XPhase+(v_1828,v_1829), E1827(v_1828,v_1829,v_1830,v_1831,v_1832,v_1833,v_1834,v_1835,v_1836,v_1837,v_1839,v_1840,v_1842,v_1843,v_1844,v_1847,v_1849,v_1851,v_1852,v_1855,v_1856,v_1859,v_1860,v_1861,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 134217728 output: 67108864
+ Dimensions: f:2 k:2 n:33554432 m:1
+ MKL_VERBOSE ZGEMM(N,T,33554432,1,2,0x7ffc3fd2ddd0,0x7f585bff4010,67108864,0x4d8ff30,2,0x7ffc3fd2dde0,0x7f581bff3010,33554432) 32.40ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,33554432,1,2,0x7ffc3fd2ddd0,0x7f587bff4010,67108864,0x4d8ff40,2,0x7ffc3fd2dde0,0x7f583bff3010,33554432) 32.17ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 64 milliseconds
+ After debug_mkl_contract_sum, duration: 0.06471776962280273
+PROF:: perf data process bucket time: 0.7571535110473633
+==
+
+PROF:: Bucket contains: [E1616(v_1839,v_1843,v_1848,v_1852,v_1861,v_1866), E1837(v_1839,v_1840,v_1842,v_1843,v_1844,v_1847,v_1848,v_1849,v_1851,v_1852,v_1855,v_1856,v_1859,v_1860,v_1861,v_1864,v_1865,v_1866), E1838(v_1839,v_1840,v_1841,v_1842,v_1843,v_1844,v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract, input sizes: 64 262144 output: 262144
+ After debug_mkl_contract, duration: 0.0024564266204833984
+ Starting debug_mkl_contract_sum, input sizes: 262144 268435456 output: 134217728
+ Dimensions: f:131072 k:2 n:1024 m:1
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561bff1010,134217728,0xcaa03c0,131072,0x7ffc3fd2dde0,0x7f559bff0010,1024) 25.37us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561bff5010,134217728,0xcaa03d0,131072,0x7ffc3fd2dde0,0x7f559bff4010,1024) 13.76us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561bff9010,134217728,0xcaa03e0,131072,0x7ffc3fd2dde0,0x7f559bff8010,1024) 11.35us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561bffd010,134217728,0xcaa03f0,131072,0x7ffc3fd2dde0,0x7f559bffc010,1024) 429.51us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c001010,134217728,0xcaa0400,131072,0x7ffc3fd2dde0,0x7f559c000010,1024) 5.36us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c005010,134217728,0xcaa0410,131072,0x7ffc3fd2dde0,0x7f559c004010,1024) 5.43us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c009010,134217728,0xcaa0420,131072,0x7ffc3fd2dde0,0x7f559c008010,1024) 5.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c00d010,134217728,0xcaa0430,131072,0x7ffc3fd2dde0,0x7f559c00c010,1024) 4.99us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c011010,134217728,0xcaa0440,131072,0x7ffc3fd2dde0,0x7f559c010010,1024) 5.47us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c015010,134217728,0xcaa0450,131072,0x7ffc3fd2dde0,0x7f559c014010,1024) 4.96us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c019010,134217728,0xcaa0460,131072,0x7ffc3fd2dde0,0x7f559c018010,1024) 5.38us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c01d010,134217728,0xcaa0470,131072,0x7ffc3fd2dde0,0x7f559c01c010,1024) 4.95us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c021010,134217728,0xcaa0480,131072,0x7ffc3fd2dde0,0x7f559c020010,1024) 4.71us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c025010,134217728,0xcaa0490,131072,0x7ffc3fd2dde0,0x7f559c024010,1024) 5.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c029010,134217728,0xcaa04a0,131072,0x7ffc3fd2dde0,0x7f559c028010,1024) 4.98us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c02d010,134217728,0xcaa04b0,131072,0x7ffc3fd2dde0,0x7f559c02c010,1024) 4.56us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c031010,134217728,0xcaa04c0,131072,0x7ffc3fd2dde0,0x7f559c030010,1024) 5.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c035010,134217728,0xcaa04d0,131072,0x7ffc3fd2dde0,0x7f559c034010,1024) 5.31us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c039010,134217728,0xcaa04e0,131072,0x7ffc3fd2dde0,0x7f559c038010,1024) 4.87us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c03d010,134217728,0xcaa04f0,131072,0x7ffc3fd2dde0,0x7f559c03c010,1024) 5.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c041010,134217728,0xcaa0500,131072,0x7ffc3fd2dde0,0x7f559c040010,1024) 5.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c045010,134217728,0xcaa0510,131072,0x7ffc3fd2dde0,0x7f559c044010,1024) 4.73us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c049010,134217728,0xcaa0520,131072,0x7ffc3fd2dde0,0x7f559c048010,1024) 5.00us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c04d010,134217728,0xcaa0530,131072,0x7ffc3fd2dde0,0x7f559c04c010,1024) 5.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c051010,134217728,0xcaa0540,131072,0x7ffc3fd2dde0,0x7f559c050010,1024) 5.22us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c055010,134217728,0xcaa0550,131072,0x7ffc3fd2dde0,0x7f559c054010,1024) 4.75us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c059010,134217728,0xcaa0560,131072,0x7ffc3fd2dde0,0x7f559c058010,1024) 5.33us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c05d010,134217728,0xcaa0570,131072,0x7ffc3fd2dde0,0x7f559c05c010,1024) 5.63us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c061010,134217728,0xcaa0580,131072,0x7ffc3fd2dde0,0x7f559c060010,1024) 4.79us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c065010,134217728,0xcaa0590,131072,0x7ffc3fd2dde0,0x7f559c064010,1024) 5.64us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c069010,134217728,0xcaa05a0,131072,0x7ffc3fd2dde0,0x7f559c068010,1024) 5.12us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c06d010,134217728,0xcaa05b0,131072,0x7ffc3fd2dde0,0x7f559c06c010,1024) 5.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c071010,134217728,0xcaa05c0,131072,0x7ffc3fd2dde0,0x7f559c070010,1024) 5.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c075010,134217728,0xcaa05d0,131072,0x7ffc3fd2dde0,0x7f559c074010,1024) 5.19us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c079010,134217728,0xcaa05e0,131072,0x7ffc3fd2dde0,0x7f559c078010,1024) 4.71us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c07d010,134217728,0xcaa05f0,131072,0x7ffc3fd2dde0,0x7f559c07c010,1024) 4.92us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c081010,134217728,0xcaa0600,131072,0x7ffc3fd2dde0,0x7f559c080010,1024) 5.81us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c085010,134217728,0xcaa0610,131072,0x7ffc3fd2dde0,0x7f559c084010,1024) 5.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c089010,134217728,0xcaa0620,131072,0x7ffc3fd2dde0,0x7f559c088010,1024) 4.99us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c08d010,134217728,0xcaa0630,131072,0x7ffc3fd2dde0,0x7f559c08c010,1024) 5.61us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f561c091010,134217728,0xcaa0640,131072,0x7ffc3fd2dde0,0x7f559c090010,1024) 4.99us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... and 131 000 lines more like that...]
+ [... occasionaly turning to something strange: ...]
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f5693171010,134217728,0xcc7c9c0,131072,0x7ffc3fd2dde0,0x7f5613170010,1024) 4.79us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f5693175010,134217728,0xcc7c9d0,131072,0x7ffc3fd2dde0,0x7f5613174010,1024) 17.81us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f5693179010,134217728,0xcc7c9e0,131072,0x7ffc3fd2dde0,0x7f5613178010,1024) 9.91us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f569317d010,134217728,0xcc7c9f0,131072,0x7ffc3fd2dde0,0x7f561317c010,1024) 29.51us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f5693181010,134217728,0xcc7ca00,131072,0x7ffc3fd2dde0,0x7f5613180010,1024) 9.49us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... 8 lines like aobove ]
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931a9010,134217728,0xcc7caa0,131072,0x7ffc3fd2dde0,0x7f56131a8010,1024) 9.56us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931ad010,134217728,0xcc7cab0,131072,0x7ffc3fd2dde0,0x7f56131ac010,1024) 9.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931b1010,134217728,0xcc7cac0,131072,0x7ffc3fd2dde0,0x7f56131b0010,1024) 10.44us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931b5010,134217728,0xcc7cad0,131072,0x7ffc3fd2dde0,0x7f56131b4010,1024) 9.40us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931b9010,134217728,0xcc7cae0,131072,0x7ffc3fd2dde0,0x7f56131b8010,1024) 9.40us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... 10 lines like above...]
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931e5010,134217728,0xcc7cb90,131072,0x7ffc3fd2dde0,0x7f56131e4010,1024) 9.61us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931e9010,134217728,0xcc7cba0,131072,0x7ffc3fd2dde0,0x7f56131e8010,1024) 9.67us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931ed010,134217728,0xcc7cbb0,131072,0x7ffc3fd2dde0,0x7f56131ec010,1024) 9.89us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931f1010,134217728,0xcc7cbc0,131072,0x7ffc3fd2dde0,0x7f56131f0010,1024) 9.83us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931f5010,134217728,0xcc7cbd0,131072,0x7ffc3fd2dde0,0x7f56131f4010,1024) 9.80us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931f9010,134217728,0xcc7cbe0,131072,0x7ffc3fd2dde0,0x7f56131f8010,1024) 4.42us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ ! MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f56931fd010,134217728,0xcc7cbf0,131072,0x7ffc3fd2dde0,0x7f56131fc010,1024) 392.54us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,1024,1,2,0x7ffc3fd2ddd0,0x7f5693201010,134217728,0xcc7cc00,131072,0x7ffc3fd2dde0,0x7f5613200010,1024) 5.01us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... 2362000/131072 = 18 us per loop, but in logs most are 5 us. Why?... ]
+ Duration: 2362 milliseconds
+ * After debug_mkl_contract_sum, duration: 2.362922191619873
+PROF:: perf data process bucket time: 6.452457666397095
+==
+
+PROF:: Bucket contains: [E1697(v_1840,v_1844,v_1849,v_1856,v_1860,v_1865), E1839(v_1840,v_1841,v_1842,v_1843,v_1844,v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 64 134217728 output: 67108864
+ Dimensions: f:32 k:2 n:2097152 m:1
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f569bff1010,67108864,0x4d808c0,32,0x7ffc3fd2dde0,0x7f586fff1010,2097152) 2.97ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f569dff1010,67108864,0x4d808d0,32,0x7ffc3fd2dde0,0x7f5871ff1010,2097152) 2.58ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f569fff1010,67108864,0x4d808e0,32,0x7ffc3fd2dde0,0x7f5873ff1010,2097152) 2.65ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56a1ff1010,67108864,0x4d808f0,32,0x7ffc3fd2dde0,0x7f5875ff1010,2097152) 2.61ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56a3ff1010,67108864,0x4d80900,32,0x7ffc3fd2dde0,0x7f5877ff1010,2097152) 2.54ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56a5ff1010,67108864,0x4d80910,32,0x7ffc3fd2dde0,0x7f5879ff1010,2097152) 2.55ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56a7ff1010,67108864,0x4d80920,32,0x7ffc3fd2dde0,0x7f587bff1010,2097152) 2.49ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56a9ff1010,67108864,0x4d80930,32,0x7ffc3fd2dde0,0x7f587dff1010,2097152) 2.45ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56abff1010,67108864,0x4d80940,32,0x7ffc3fd2dde0,0x7f587fff1010,2097152) 2.39ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56adff1010,67108864,0x4d80950,32,0x7ffc3fd2dde0,0x7f5881ff1010,2097152) 2.56ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56afff1010,67108864,0x4d80960,32,0x7ffc3fd2dde0,0x7f5883ff1010,2097152) 2.52ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56b1ff1010,67108864,0x4d80970,32,0x7ffc3fd2dde0,0x7f5885ff1010,2097152) 2.43ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56b3ff1010,67108864,0x4d80980,32,0x7ffc3fd2dde0,0x7f5887ff1010,2097152) 2.46ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56b5ff1010,67108864,0x4d80990,32,0x7ffc3fd2dde0,0x7f5889ff1010,2097152) 2.45ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56b7ff1010,67108864,0x4d809a0,32,0x7ffc3fd2dde0,0x7f588bff1010,2097152) 2.60ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56b9ff1010,67108864,0x4d809b0,32,0x7ffc3fd2dde0,0x7f588dff1010,2097152) 2.58ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56bbff1010,67108864,0x4d809c0,32,0x7ffc3fd2dde0,0x7f588fff1010,2097152) 2.49ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56bdff1010,67108864,0x4d809d0,32,0x7ffc3fd2dde0,0x7f5891ff1010,2097152) 2.48ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56bfff1010,67108864,0x4d809e0,32,0x7ffc3fd2dde0,0x7f5893ff1010,2097152) 2.52ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56c1ff1010,67108864,0x4d809f0,32,0x7ffc3fd2dde0,0x7f5895ff1010,2097152) 2.60ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56c3ff1010,67108864,0x4d80a00,32,0x7ffc3fd2dde0,0x7f5897ff1010,2097152) 2.57ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56c5ff1010,67108864,0x4d80a10,32,0x7ffc3fd2dde0,0x7f5899ff1010,2097152) 2.54ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56c7ff1010,67108864,0x4d80a20,32,0x7ffc3fd2dde0,0x7f589bff1010,2097152) 2.57ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56c9ff1010,67108864,0x4d80a30,32,0x7ffc3fd2dde0,0x7f589dff1010,2097152) 2.61ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56cbff1010,67108864,0x4d80a40,32,0x7ffc3fd2dde0,0x7f589fff1010,2097152) 2.68ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56cdff1010,67108864,0x4d80a50,32,0x7ffc3fd2dde0,0x7f58a1ff1010,2097152) 2.51ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56cfff1010,67108864,0x4d80a60,32,0x7ffc3fd2dde0,0x7f58a3ff1010,2097152) 2.45ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56d1ff1010,67108864,0x4d80a70,32,0x7ffc3fd2dde0,0x7f58a5ff1010,2097152) 2.41ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56d3ff1010,67108864,0x4d80a80,32,0x7ffc3fd2dde0,0x7f58a7ff1010,2097152) 2.53ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56d5ff1010,67108864,0x4d80a90,32,0x7ffc3fd2dde0,0x7f58a9ff1010,2097152) 2.59ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56d7ff1010,67108864,0x4d80aa0,32,0x7ffc3fd2dde0,0x7f58abff1010,2097152) 2.63ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,2097152,1,2,0x7ffc3fd2ddd0,0x7f56d9ff1010,67108864,0x4d80ab0,32,0x7ffc3fd2dde0,0x7f58adff1010,2097152) 2.54ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 82 milliseconds
+ After debug_mkl_contract_sum, duration: 0.08215188980102539
+PROF:: perf data process bucket time: 1.5319533348083496
+==
+
+PROF:: Bucket contains: [XPhase+(v_1841,v_1846), E1840(v_1841,v_1842,v_1843,v_1844,v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 67108864 output: 33554432
+ Dimensions: f:2 k:2 n:16777216 m:1
+ MKL_VERBOSE ZGEMM(N,T,16777216,1,2,0x7ffc3fd2ddd0,0x7f56dbff1010,33554432,0x4d8ff30,2,0x7ffc3fd2dde0,0x7f56bbff0010,16777216) 16.61ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,16777216,1,2,0x7ffc3fd2ddd0,0x7f56ebff1010,33554432,0x4d8ff40,2,0x7ffc3fd2dde0,0x7f56cbff0010,16777216) 16.17ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 32 milliseconds
+ After debug_mkl_contract_sum, duration: 0.03291201591491699
+PROF:: perf data process bucket time: 0.46090030670166016
+==
+
+PROF:: Bucket contains: [E1841(v_1842,v_1843,v_1844,v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+PROF:: perf data process bucket time: 0.17297935485839844
+PROF:: Bucket contains: [E1842(v_1843,v_1844,v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+PROF:: perf data process bucket time: 0.0806725025177002
+PROF:: Bucket contains: [E1843(v_1844,v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+PROF:: perf data process bucket time: 0.039273977279663086
+==
+
+PROF:: Bucket contains: [E453(v_1845,v_1853), E1776(v_1845,v_1846,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1857,v_1858,v_1859,v_1861,v_1862,v_1863,v_1864,v_1866), E1844(v_1845,v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract, input sizes: 4 65536 output: 65536
+ Dimensions: C[0]:4 C[1]:1 C[2]:16384
+ MKL_VERBOSE ZGEMM(N,T,16384,1,1,0x7ffc3fd2db50,0xcaa03c0,16384,0x4dfb050,1,0x7ffc3fd2db60,0xcba03d0,16384) 1.72ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,16384,1,1,0x7ffc3fd2db50,0xcae03c0,16384,0x4dfb060,1,0x7ffc3fd2db60,0xcbe03d0,16384) 379.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,16384,1,1,0x7ffc3fd2db50,0xcb203c0,16384,0x4dfb070,1,0x7ffc3fd2db60,0xcc203d0,16384) 23.91us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,16384,1,1,0x7ffc3fd2db50,0xcb603c0,16384,0x4dfb080,1,0x7ffc3fd2db60,0xcc603d0,16384) 19.53us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 2 milliseconds
+ After debug_mkl_contract, duration: 0.002335786819458008
+ Starting debug_mkl_contract_sum, input sizes: 65536 4194304 output: 2097152
+ Dimensions: f:32768 k:2 n:64 m:1
+ *
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f1010,2097152,0xcaa03c0,32768,0x7ffc3fd2dde0,0x7f5b041e6010,64) 21.78us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f1410,2097152,0xcaa03d0,32768,0x7ffc3fd2dde0,0x7f5b041e6410,64) 410ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f1810,2097152,0xcaa03e0,32768,0x7ffc3fd2dde0,0x7f5b041e6810,64) 529ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f1c10,2097152,0xcaa03f0,32768,0x7ffc3fd2dde0,0x7f5b041e6c10,64) 2.78us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f2010,2097152,0xcaa0400,32768,0x7ffc3fd2dde0,0x7f5b041e7010,64) 613ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f2410,2097152,0xcaa0410,32768,0x7ffc3fd2dde0,0x7f5b041e7410,64) 590ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f2810,2097152,0xcaa0420,32768,0x7ffc3fd2dde0,0x7f5b041e7810,64) 251ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f2c10,2097152,0xcaa0430,32768,0x7ffc3fd2dde0,0x7f5b041e7c10,64) 2.52us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f3010,2097152,0xcaa0440,32768,0x7ffc3fd2dde0,0x7f5b041e8010,64) 520ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f3410,2097152,0xcaa0450,32768,0x7ffc3fd2dde0,0x7f5b041e8410,64) 680ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f3810,2097152,0xcaa0460,32768,0x7ffc3fd2dde0,0x7f5b041e8810,64) 315ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f3c10,2097152,0xcaa0470,32768,0x7ffc3fd2dde0,0x7f5b041e8c10,64) 2.31us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f4010,2097152,0xcaa0480,32768,0x7ffc3fd2dde0,0x7f5b041e9010,64) 596ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f4410,2097152,0xcaa0490,32768,0x7ffc3fd2dde0,0x7f5b041e9410,64) 417ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f4810,2097152,0xcaa04a0,32768,0x7ffc3fd2dde0,0x7f5b041e9810,64) 281ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f4c10,2097152,0xcaa04b0,32768,0x7ffc3fd2dde0,0x7f5b041e9c10,64) 2.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f5010,2097152,0xcaa04c0,32768,0x7ffc3fd2dde0,0x7f5b041ea010,64) 501ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f5410,2097152,0xcaa04d0,32768,0x7ffc3fd2dde0,0x7f5b041ea410,64) 542ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f5810,2097152,0xcaa04e0,32768,0x7ffc3fd2dde0,0x7f5b041ea810,64) 268ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f5c10,2097152,0xcaa04f0,32768,0x7ffc3fd2dde0,0x7f5b041eac10,64) 3.63us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f6010,2097152,0xcaa0500,32768,0x7ffc3fd2dde0,0x7f5b041eb010,64) 495ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f6410,2097152,0xcaa0510,32768,0x7ffc3fd2dde0,0x7f5b041eb410,64) 654ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f6810,2097152,0xcaa0520,32768,0x7ffc3fd2dde0,0x7f5b041eb810,64) 244ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f6c10,2097152,0xcaa0530,32768,0x7ffc3fd2dde0,0x7f5b041ebc10,64) 2.10us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f7010,2097152,0xcaa0540,32768,0x7ffc3fd2dde0,0x7f5b041ec010,64) 458ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f7410,2097152,0xcaa0550,32768,0x7ffc3fd2dde0,0x7f5b041ec410,64) 348ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f7810,2097152,0xcaa0560,32768,0x7ffc3fd2dde0,0x7f5b041ec810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f7c10,2097152,0xcaa0570,32768,0x7ffc3fd2dde0,0x7f5b041ecc10,64) 2.38us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f8010,2097152,0xcaa0580,32768,0x7ffc3fd2dde0,0x7f5b041ed010,64) 501ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f8410,2097152,0xcaa0590,32768,0x7ffc3fd2dde0,0x7f5b041ed410,64) 430ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f8810,2097152,0xcaa05a0,32768,0x7ffc3fd2dde0,0x7f5b041ed810,64) 268ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f8c10,2097152,0xcaa05b0,32768,0x7ffc3fd2dde0,0x7f5b041edc10,64) 6.41us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f9010,2097152,0xcaa05c0,32768,0x7ffc3fd2dde0,0x7f5b041ee010,64) 494ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f9410,2097152,0xcaa05d0,32768,0x7ffc3fd2dde0,0x7f5b041ee410,64) 542ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f9810,2097152,0xcaa05e0,32768,0x7ffc3fd2dde0,0x7f5b041ee810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079f9c10,2097152,0xcaa05f0,32768,0x7ffc3fd2dde0,0x7f5b041eec10,64) 2.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fa010,2097152,0xcaa0600,32768,0x7ffc3fd2dde0,0x7f5b041ef010,64) 505ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fa410,2097152,0xcaa0610,32768,0x7ffc3fd2dde0,0x7f5b041ef410,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fa810,2097152,0xcaa0620,32768,0x7ffc3fd2dde0,0x7f5b041ef810,64) 596ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fac10,2097152,0xcaa0630,32768,0x7ffc3fd2dde0,0x7f5b041efc10,64) 2.15us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fb010,2097152,0xcaa0640,32768,0x7ffc3fd2dde0,0x7f5b041f0010,64) 862ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fb410,2097152,0xcaa0650,32768,0x7ffc3fd2dde0,0x7f5b041f0410,64) 507ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fb810,2097152,0xcaa0660,32768,0x7ffc3fd2dde0,0x7f5b041f0810,64) 365ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fbc10,2097152,0xcaa0670,32768,0x7ffc3fd2dde0,0x7f5b041f0c10,64) 2.21us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fc010,2097152,0xcaa0680,32768,0x7ffc3fd2dde0,0x7f5b041f1010,64) 587ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fc410,2097152,0xcaa0690,32768,0x7ffc3fd2dde0,0x7f5b041f1410,64) 613ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fc810,2097152,0xcaa06a0,32768,0x7ffc3fd2dde0,0x7f5b041f1810,64) 386ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fcc10,2097152,0xcaa06b0,32768,0x7ffc3fd2dde0,0x7f5b041f1c10,64) 2.76us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fd010,2097152,0xcaa06c0,32768,0x7ffc3fd2dde0,0x7f5b041f2010,64) 509ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fd410,2097152,0xcaa06d0,32768,0x7ffc3fd2dde0,0x7f5b041f2410,64) 427ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fd810,2097152,0xcaa06e0,32768,0x7ffc3fd2dde0,0x7f5b041f2810,64) 296ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fdc10,2097152,0xcaa06f0,32768,0x7ffc3fd2dde0,0x7f5b041f2c10,64) 2.35us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fe010,2097152,0xcaa0700,32768,0x7ffc3fd2dde0,0x7f5b041f3010,64) 520ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fe410,2097152,0xcaa0710,32768,0x7ffc3fd2dde0,0x7f5b041f3410,64) 432ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fe810,2097152,0xcaa0720,32768,0x7ffc3fd2dde0,0x7f5b041f3810,64) 298ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079fec10,2097152,0xcaa0730,32768,0x7ffc3fd2dde0,0x7f5b041f3c10,64) 2.18us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079ff010,2097152,0xcaa0740,32768,0x7ffc3fd2dde0,0x7f5b041f4010,64) 540ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079ff410,2097152,0xcaa0750,32768,0x7ffc3fd2dde0,0x7f5b041f4410,64) 510ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079ff810,2097152,0xcaa0760,32768,0x7ffc3fd2dde0,0x7f5b041f4810,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b079ffc10,2097152,0xcaa0770,32768,0x7ffc3fd2dde0,0x7f5b041f4c10,64) 2.45us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a00010,2097152,0xcaa0780,32768,0x7ffc3fd2dde0,0x7f5b041f5010,64) 419ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a00410,2097152,0xcaa0790,32768,0x7ffc3fd2dde0,0x7f5b041f5410,64) 581ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a00810,2097152,0xcaa07a0,32768,0x7ffc3fd2dde0,0x7f5b041f5810,64) 276ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a00c10,2097152,0xcaa07b0,32768,0x7ffc3fd2dde0,0x7f5b041f5c10,64) 2.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a01010,2097152,0xcaa07c0,32768,0x7ffc3fd2dde0,0x7f5b041f6010,64) 443ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a01410,2097152,0xcaa07d0,32768,0x7ffc3fd2dde0,0x7f5b041f6410,64) 399ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a01810,2097152,0xcaa07e0,32768,0x7ffc3fd2dde0,0x7f5b041f6810,64) 315ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a01c10,2097152,0xcaa07f0,32768,0x7ffc3fd2dde0,0x7f5b041f6c10,64) 2.30us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a02010,2097152,0xcaa0800,32768,0x7ffc3fd2dde0,0x7f5b041f7010,64) 434ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a02410,2097152,0xcaa0810,32768,0x7ffc3fd2dde0,0x7f5b041f7410,64) 643ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a02810,2097152,0xcaa0820,32768,0x7ffc3fd2dde0,0x7f5b041f7810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a02c10,2097152,0xcaa0830,32768,0x7ffc3fd2dde0,0x7f5b041f7c10,64) 2.12us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a03010,2097152,0xcaa0840,32768,0x7ffc3fd2dde0,0x7f5b041f8010,64) 449ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a03410,2097152,0xcaa0850,32768,0x7ffc3fd2dde0,0x7f5b041f8410,64) 389ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a03810,2097152,0xcaa0860,32768,0x7ffc3fd2dde0,0x7f5b041f8810,64) 300ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a03c10,2097152,0xcaa0870,32768,0x7ffc3fd2dde0,0x7f5b041f8c10,64) 2.85us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a04010,2097152,0xcaa0880,32768,0x7ffc3fd2dde0,0x7f5b041f9010,64) 441ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a04410,2097152,0xcaa0890,32768,0x7ffc3fd2dde0,0x7f5b041f9410,64) 374ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a04810,2097152,0xcaa08a0,32768,0x7ffc3fd2dde0,0x7f5b041f9810,64) 298ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a04c10,2097152,0xcaa08b0,32768,0x7ffc3fd2dde0,0x7f5b041f9c10,64) 2.23us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a05010,2097152,0xcaa08c0,32768,0x7ffc3fd2dde0,0x7f5b041fa010,64) 471ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a05410,2097152,0xcaa08d0,32768,0x7ffc3fd2dde0,0x7f5b041fa410,64) 529ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a05810,2097152,0xcaa08e0,32768,0x7ffc3fd2dde0,0x7f5b041fa810,64) 453ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a05c10,2097152,0xcaa08f0,32768,0x7ffc3fd2dde0,0x7f5b041fac10,64) 2.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a06010,2097152,0xcaa0900,32768,0x7ffc3fd2dde0,0x7f5b041fb010,64) 555ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a06410,2097152,0xcaa0910,32768,0x7ffc3fd2dde0,0x7f5b041fb410,64) 440ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a06810,2097152,0xcaa0920,32768,0x7ffc3fd2dde0,0x7f5b041fb810,64) 266ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a06c10,2097152,0xcaa0930,32768,0x7ffc3fd2dde0,0x7f5b041fbc10,64) 2.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a07010,2097152,0xcaa0940,32768,0x7ffc3fd2dde0,0x7f5b041fc010,64) 460ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a07410,2097152,0xcaa0950,32768,0x7ffc3fd2dde0,0x7f5b041fc410,64) 412ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a07810,2097152,0xcaa0960,32768,0x7ffc3fd2dde0,0x7f5b041fc810,64) 298ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a07c10,2097152,0xcaa0970,32768,0x7ffc3fd2dde0,0x7f5b041fcc10,64) 2.12us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a08010,2097152,0xcaa0980,32768,0x7ffc3fd2dde0,0x7f5b041fd010,64) 549ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a08410,2097152,0xcaa0990,32768,0x7ffc3fd2dde0,0x7f5b041fd410,64) 453ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a08810,2097152,0xcaa09a0,32768,0x7ffc3fd2dde0,0x7f5b041fd810,64) 505ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a08c10,2097152,0xcaa09b0,32768,0x7ffc3fd2dde0,0x7f5b041fdc10,64) 2.30us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a09010,2097152,0xcaa09c0,32768,0x7ffc3fd2dde0,0x7f5b041fe010,64) 527ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a09410,2097152,0xcaa09d0,32768,0x7ffc3fd2dde0,0x7f5b041fe410,64) 391ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a09810,2097152,0xcaa09e0,32768,0x7ffc3fd2dde0,0x7f5b041fe810,64) 283ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a09c10,2097152,0xcaa09f0,32768,0x7ffc3fd2dde0,0x7f5b041fec10,64) 2.10us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0a010,2097152,0xcaa0a00,32768,0x7ffc3fd2dde0,0x7f5b041ff010,64) 454ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0a410,2097152,0xcaa0a10,32768,0x7ffc3fd2dde0,0x7f5b041ff410,64) 380ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0a810,2097152,0xcaa0a20,32768,0x7ffc3fd2dde0,0x7f5b041ff810,64) 449ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0ac10,2097152,0xcaa0a30,32768,0x7ffc3fd2dde0,0x7f5b041ffc10,64) 393.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0b010,2097152,0xcaa0a40,32768,0x7ffc3fd2dde0,0x7f5b04200010,64) 840ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0b410,2097152,0xcaa0a50,32768,0x7ffc3fd2dde0,0x7f5b04200410,64) 421ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0b810,2097152,0xcaa0a60,32768,0x7ffc3fd2dde0,0x7f5b04200810,64) 324ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0bc10,2097152,0xcaa0a70,32768,0x7ffc3fd2dde0,0x7f5b04200c10,64) 430ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0c010,2097152,0xcaa0a80,32768,0x7ffc3fd2dde0,0x7f5b04201010,64) 538ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0c410,2097152,0xcaa0a90,32768,0x7ffc3fd2dde0,0x7f5b04201410,64) 427ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a0c810,2097152,0xcaa0aa0,32768,0x7ffc3fd2dde0,0x7f5b04201810,64) 296ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... Some 500 similar lines with occasional spikes ...]
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a89c10,2097152,0xcaa29f0,32768,0x7ffc3fd2dde0,0x7f5b0427ec10,64) 685ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8a010,2097152,0xcaa2a00,32768,0x7ffc3fd2dde0,0x7f5b0427f010,64) 522ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8a410,2097152,0xcaa2a10,32768,0x7ffc3fd2dde0,0x7f5b0427f410,64) 641ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8a810,2097152,0xcaa2a20,32768,0x7ffc3fd2dde0,0x7f5b0427f810,64) 520ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8ac10,2097152,0xcaa2a30,32768,0x7ffc3fd2dde0,0x7f5b0427fc10,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8b010,2097152,0xcaa2a40,32768,0x7ffc3fd2dde0,0x7f5b04280010,64) 484ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8b410,2097152,0xcaa2a50,32768,0x7ffc3fd2dde0,0x7f5b04280410,64) 626ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8b810,2097152,0xcaa2a60,32768,0x7ffc3fd2dde0,0x7f5b04280810,64) 365ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8bc10,2097152,0xcaa2a70,32768,0x7ffc3fd2dde0,0x7f5b04280c10,64) 475ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8c010,2097152,0xcaa2a80,32768,0x7ffc3fd2dde0,0x7f5b04281010,64) 615ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8c410,2097152,0xcaa2a90,32768,0x7ffc3fd2dde0,0x7f5b04281410,64) 507ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8c810,2097152,0xcaa2aa0,32768,0x7ffc3fd2dde0,0x7f5b04281810,64) 356ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8cc10,2097152,0xcaa2ab0,32768,0x7ffc3fd2dde0,0x7f5b04281c10,64) 523ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8d010,2097152,0xcaa2ac0,32768,0x7ffc3fd2dde0,0x7f5b04282010,64) 959ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8d410,2097152,0xcaa2ad0,32768,0x7ffc3fd2dde0,0x7f5b04282410,64) 451ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8d810,2097152,0xcaa2ae0,32768,0x7ffc3fd2dde0,0x7f5b04282810,64) 319ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8dc10,2097152,0xcaa2af0,32768,0x7ffc3fd2dde0,0x7f5b04282c10,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8e010,2097152,0xcaa2b00,32768,0x7ffc3fd2dde0,0x7f5b04283010,64) 1.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8e410,2097152,0xcaa2b10,32768,0x7ffc3fd2dde0,0x7f5b04283410,64) 456ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8e810,2097152,0xcaa2b20,32768,0x7ffc3fd2dde0,0x7f5b04283810,64) 337ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8ec10,2097152,0xcaa2b30,32768,0x7ffc3fd2dde0,0x7f5b04283c10,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8f010,2097152,0xcaa2b40,32768,0x7ffc3fd2dde0,0x7f5b04284010,64) 782ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8f410,2097152,0xcaa2b50,32768,0x7ffc3fd2dde0,0x7f5b04284410,64) 684ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8f810,2097152,0xcaa2b60,32768,0x7ffc3fd2dde0,0x7f5b04284810,64) 587ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a8fc10,2097152,0xcaa2b70,32768,0x7ffc3fd2dde0,0x7f5b04284c10,64) 458ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a90010,2097152,0xcaa2b80,32768,0x7ffc3fd2dde0,0x7f5b04285010,64) 482ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a90410,2097152,0xcaa2b90,32768,0x7ffc3fd2dde0,0x7f5b04285410,64) 434ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a90810,2097152,0xcaa2ba0,32768,0x7ffc3fd2dde0,0x7f5b04285810,64) 322ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a90c10,2097152,0xcaa2bb0,32768,0x7ffc3fd2dde0,0x7f5b04285c10,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a91010,2097152,0xcaa2bc0,32768,0x7ffc3fd2dde0,0x7f5b04286010,64) 654ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a91410,2097152,0xcaa2bd0,32768,0x7ffc3fd2dde0,0x7f5b04286410,64) 412ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a91810,2097152,0xcaa2be0,32768,0x7ffc3fd2dde0,0x7f5b04286810,64) 315ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a91c10,2097152,0xcaa2bf0,32768,0x7ffc3fd2dde0,0x7f5b04286c10,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a92010,2097152,0xcaa2c00,32768,0x7ffc3fd2dde0,0x7f5b04287010,64) 538ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a92410,2097152,0xcaa2c10,32768,0x7ffc3fd2dde0,0x7f5b04287410,64) 451ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a92810,2097152,0xcaa2c20,32768,0x7ffc3fd2dde0,0x7f5b04287810,64) 359ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a92c10,2097152,0xcaa2c30,32768,0x7ffc3fd2dde0,0x7f5b04287c10,64) 454ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a93010,2097152,0xcaa2c40,32768,0x7ffc3fd2dde0,0x7f5b04288010,64) 939ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a93410,2097152,0xcaa2c50,32768,0x7ffc3fd2dde0,0x7f5b04288410,64) 484ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a93810,2097152,0xcaa2c60,32768,0x7ffc3fd2dde0,0x7f5b04288810,64) 309ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a93c10,2097152,0xcaa2c70,32768,0x7ffc3fd2dde0,0x7f5b04288c10,64) 440ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a94010,2097152,0xcaa2c80,32768,0x7ffc3fd2dde0,0x7f5b04289010,64) 691ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a94410,2097152,0xcaa2c90,32768,0x7ffc3fd2dde0,0x7f5b04289410,64) 419ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a94810,2097152,0xcaa2ca0,32768,0x7ffc3fd2dde0,0x7f5b04289810,64) 330ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a94c10,2097152,0xcaa2cb0,32768,0x7ffc3fd2dde0,0x7f5b04289c10,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a95010,2097152,0xcaa2cc0,32768,0x7ffc3fd2dde0,0x7f5b0428a010,64) 747ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a95410,2097152,0xcaa2cd0,32768,0x7ffc3fd2dde0,0x7f5b0428a410,64) 738ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a95810,2097152,0xcaa2ce0,32768,0x7ffc3fd2dde0,0x7f5b0428a810,64) 317ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a95c10,2097152,0xcaa2cf0,32768,0x7ffc3fd2dde0,0x7f5b0428ac10,64) 509ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a96010,2097152,0xcaa2d00,32768,0x7ffc3fd2dde0,0x7f5b0428b010,64) 834ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a96410,2097152,0xcaa2d10,32768,0x7ffc3fd2dde0,0x7f5b0428b410,64) 417ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a96810,2097152,0xcaa2d20,32768,0x7ffc3fd2dde0,0x7f5b0428b810,64) 441ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a96c10,2097152,0xcaa2d30,32768,0x7ffc3fd2dde0,0x7f5b0428bc10,64) 453ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a97010,2097152,0xcaa2d40,32768,0x7ffc3fd2dde0,0x7f5b0428c010,64) 779ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a97410,2097152,0xcaa2d50,32768,0x7ffc3fd2dde0,0x7f5b0428c410,64) 514ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a97810,2097152,0xcaa2d60,32768,0x7ffc3fd2dde0,0x7f5b0428c810,64) 542ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a97c10,2097152,0xcaa2d70,32768,0x7ffc3fd2dde0,0x7f5b0428cc10,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a98010,2097152,0xcaa2d80,32768,0x7ffc3fd2dde0,0x7f5b0428d010,64) 672ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a98410,2097152,0xcaa2d90,32768,0x7ffc3fd2dde0,0x7f5b0428d410,64) 458ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a98810,2097152,0xcaa2da0,32768,0x7ffc3fd2dde0,0x7f5b0428d810,64) 311ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a98c10,2097152,0xcaa2db0,32768,0x7ffc3fd2dde0,0x7f5b0428dc10,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a99010,2097152,0xcaa2dc0,32768,0x7ffc3fd2dde0,0x7f5b0428e010,64) 667ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a99410,2097152,0xcaa2dd0,32768,0x7ffc3fd2dde0,0x7f5b0428e410,64) 2.99us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a99810,2097152,0xcaa2de0,32768,0x7ffc3fd2dde0,0x7f5b0428e810,64) 561ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a99c10,2097152,0xcaa2df0,32768,0x7ffc3fd2dde0,0x7f5b0428ec10,64) 563ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9a010,2097152,0xcaa2e00,32768,0x7ffc3fd2dde0,0x7f5b0428f010,64) 551ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9a410,2097152,0xcaa2e10,32768,0x7ffc3fd2dde0,0x7f5b0428f410,64) 469ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9a810,2097152,0xcaa2e20,32768,0x7ffc3fd2dde0,0x7f5b0428f810,64) 456ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9ac10,2097152,0xcaa2e30,32768,0x7ffc3fd2dde0,0x7f5b0428fc10,64) 400ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9b010,2097152,0xcaa2e40,32768,0x7ffc3fd2dde0,0x7f5b04290010,64) 680ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9b410,2097152,0xcaa2e50,32768,0x7ffc3fd2dde0,0x7f5b04290410,64) 477ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07a9b810,2097152,0xcaa2e60,32768,0x7ffc3fd2dde0,0x7f5b04290810,64) 292ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... another 100 lines with variations from 200ns to 1us ]
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab4c10,2097152,0xcaa34b0,32768,0x7ffc3fd2dde0,0x7f5b042a9c10,64) 479ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab5010,2097152,0xcaa34c0,32768,0x7ffc3fd2dde0,0x7f5b042aa010,64) 643ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab5410,2097152,0xcaa34d0,32768,0x7ffc3fd2dde0,0x7f5b042aa410,64) 414ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab5810,2097152,0xcaa34e0,32768,0x7ffc3fd2dde0,0x7f5b042aa810,64) 296ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab5c10,2097152,0xcaa34f0,32768,0x7ffc3fd2dde0,0x7f5b042aac10,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab6010,2097152,0xcaa3500,32768,0x7ffc3fd2dde0,0x7f5b042ab010,64) 905ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab6410,2097152,0xcaa3510,32768,0x7ffc3fd2dde0,0x7f5b042ab410,64) 635ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab6810,2097152,0xcaa3520,32768,0x7ffc3fd2dde0,0x7f5b042ab810,64) 304ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab6c10,2097152,0xcaa3530,32768,0x7ffc3fd2dde0,0x7f5b042abc10,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab7010,2097152,0xcaa3540,32768,0x7ffc3fd2dde0,0x7f5b042ac010,64) 590ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab7410,2097152,0xcaa3550,32768,0x7ffc3fd2dde0,0x7f5b042ac410,64) 395ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab7810,2097152,0xcaa3560,32768,0x7ffc3fd2dde0,0x7f5b042ac810,64) 320ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab7c10,2097152,0xcaa3570,32768,0x7ffc3fd2dde0,0x7f5b042acc10,64) 471ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab8010,2097152,0xcaa3580,32768,0x7ffc3fd2dde0,0x7f5b042ad010,64) 1.12us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab8410,2097152,0xcaa3590,32768,0x7ffc3fd2dde0,0x7f5b042ad410,64) 777ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab8810,2097152,0xcaa35a0,32768,0x7ffc3fd2dde0,0x7f5b042ad810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab8c10,2097152,0xcaa35b0,32768,0x7ffc3fd2dde0,0x7f5b042adc10,64) 572ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab9010,2097152,0xcaa35c0,32768,0x7ffc3fd2dde0,0x7f5b042ae010,64) 799ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab9410,2097152,0xcaa35d0,32768,0x7ffc3fd2dde0,0x7f5b042ae410,64) 391ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab9810,2097152,0xcaa35e0,32768,0x7ffc3fd2dde0,0x7f5b042ae810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ab9c10,2097152,0xcaa35f0,32768,0x7ffc3fd2dde0,0x7f5b042aec10,64) 669ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aba010,2097152,0xcaa3600,32768,0x7ffc3fd2dde0,0x7f5b042af010,64) 492ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aba410,2097152,0xcaa3610,32768,0x7ffc3fd2dde0,0x7f5b042af410,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aba810,2097152,0xcaa3620,32768,0x7ffc3fd2dde0,0x7f5b042af810,64) 315ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abac10,2097152,0xcaa3630,32768,0x7ffc3fd2dde0,0x7f5b042afc10,64) 676ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abb010,2097152,0xcaa3640,32768,0x7ffc3fd2dde0,0x7f5b042b0010,64) 495ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abb410,2097152,0xcaa3650,32768,0x7ffc3fd2dde0,0x7f5b042b0410,64) 373ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abb810,2097152,0xcaa3660,32768,0x7ffc3fd2dde0,0x7f5b042b0810,64) 354ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abbc10,2097152,0xcaa3670,32768,0x7ffc3fd2dde0,0x7f5b042b0c10,64) 454ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abc010,2097152,0xcaa3680,32768,0x7ffc3fd2dde0,0x7f5b042b1010,64) 769ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abc410,2097152,0xcaa3690,32768,0x7ffc3fd2dde0,0x7f5b042b1410,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abc810,2097152,0xcaa36a0,32768,0x7ffc3fd2dde0,0x7f5b042b1810,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abcc10,2097152,0xcaa36b0,32768,0x7ffc3fd2dde0,0x7f5b042b1c10,64) 469ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abd010,2097152,0xcaa36c0,32768,0x7ffc3fd2dde0,0x7f5b042b2010,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abd410,2097152,0xcaa36d0,32768,0x7ffc3fd2dde0,0x7f5b042b2410,64) 501ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abd810,2097152,0xcaa36e0,32768,0x7ffc3fd2dde0,0x7f5b042b2810,64) 373ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abdc10,2097152,0xcaa36f0,32768,0x7ffc3fd2dde0,0x7f5b042b2c10,64) 767ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abe010,2097152,0xcaa3700,32768,0x7ffc3fd2dde0,0x7f5b042b3010,64) 471ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abe410,2097152,0xcaa3710,32768,0x7ffc3fd2dde0,0x7f5b042b3410,64) 635ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abe810,2097152,0xcaa3720,32768,0x7ffc3fd2dde0,0x7f5b042b3810,64) 507ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abec10,2097152,0xcaa3730,32768,0x7ffc3fd2dde0,0x7f5b042b3c10,64) 4.44us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abf010,2097152,0xcaa3740,32768,0x7ffc3fd2dde0,0x7f5b042b4010,64) 1.39us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abf410,2097152,0xcaa3750,32768,0x7ffc3fd2dde0,0x7f5b042b4410,64) 944ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abf810,2097152,0xcaa3760,32768,0x7ffc3fd2dde0,0x7f5b042b4810,64) 581ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07abfc10,2097152,0xcaa3770,32768,0x7ffc3fd2dde0,0x7f5b042b4c10,64) 417ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac0010,2097152,0xcaa3780,32768,0x7ffc3fd2dde0,0x7f5b042b5010,64) 654ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac0410,2097152,0xcaa3790,32768,0x7ffc3fd2dde0,0x7f5b042b5410,64) 423ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac0810,2097152,0xcaa37a0,32768,0x7ffc3fd2dde0,0x7f5b042b5810,64) 345ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac0c10,2097152,0xcaa37b0,32768,0x7ffc3fd2dde0,0x7f5b042b5c10,64) 486ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac1010,2097152,0xcaa37c0,32768,0x7ffc3fd2dde0,0x7f5b042b6010,64) 1.22us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac1410,2097152,0xcaa37d0,32768,0x7ffc3fd2dde0,0x7f5b042b6410,64) 447ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac1810,2097152,0xcaa37e0,32768,0x7ffc3fd2dde0,0x7f5b042b6810,64) 339ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac1c10,2097152,0xcaa37f0,32768,0x7ffc3fd2dde0,0x7f5b042b6c10,64) 497ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac2010,2097152,0xcaa3800,32768,0x7ffc3fd2dde0,0x7f5b042b7010,64) 566ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac2410,2097152,0xcaa3810,32768,0x7ffc3fd2dde0,0x7f5b042b7410,64) 445ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac2810,2097152,0xcaa3820,32768,0x7ffc3fd2dde0,0x7f5b042b7810,64) 549ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac2c10,2097152,0xcaa3830,32768,0x7ffc3fd2dde0,0x7f5b042b7c10,64) 468ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac3010,2097152,0xcaa3840,32768,0x7ffc3fd2dde0,0x7f5b042b8010,64) 585ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac3410,2097152,0xcaa3850,32768,0x7ffc3fd2dde0,0x7f5b042b8410,64) 434ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac3810,2097152,0xcaa3860,32768,0x7ffc3fd2dde0,0x7f5b042b8810,64) 594ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac3c10,2097152,0xcaa3870,32768,0x7ffc3fd2dde0,0x7f5b042b8c10,64) 488ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac4010,2097152,0xcaa3880,32768,0x7ffc3fd2dde0,0x7f5b042b9010,64) 514ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac4410,2097152,0xcaa3890,32768,0x7ffc3fd2dde0,0x7f5b042b9410,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac4810,2097152,0xcaa38a0,32768,0x7ffc3fd2dde0,0x7f5b042b9810,64) 400ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac4c10,2097152,0xcaa38b0,32768,0x7ffc3fd2dde0,0x7f5b042b9c10,64) 760ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac5010,2097152,0xcaa38c0,32768,0x7ffc3fd2dde0,0x7f5b042ba010,64) 572ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac5410,2097152,0xcaa38d0,32768,0x7ffc3fd2dde0,0x7f5b042ba410,64) 818ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac5810,2097152,0xcaa38e0,32768,0x7ffc3fd2dde0,0x7f5b042ba810,64) 607ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac5c10,2097152,0xcaa38f0,32768,0x7ffc3fd2dde0,0x7f5b042bac10,64) 931ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac6010,2097152,0xcaa3900,32768,0x7ffc3fd2dde0,0x7f5b042bb010,64) 648ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac6410,2097152,0xcaa3910,32768,0x7ffc3fd2dde0,0x7f5b042bb410,64) 414ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac6810,2097152,0xcaa3920,32768,0x7ffc3fd2dde0,0x7f5b042bb810,64) 488ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac6c10,2097152,0xcaa3930,32768,0x7ffc3fd2dde0,0x7f5b042bbc10,64) 732ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac7010,2097152,0xcaa3940,32768,0x7ffc3fd2dde0,0x7f5b042bc010,64) 684ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac7410,2097152,0xcaa3950,32768,0x7ffc3fd2dde0,0x7f5b042bc410,64) 471ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac7810,2097152,0xcaa3960,32768,0x7ffc3fd2dde0,0x7f5b042bc810,64) 302ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac7c10,2097152,0xcaa3970,32768,0x7ffc3fd2dde0,0x7f5b042bcc10,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac8010,2097152,0xcaa3980,32768,0x7ffc3fd2dde0,0x7f5b042bd010,64) 698ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac8410,2097152,0xcaa3990,32768,0x7ffc3fd2dde0,0x7f5b042bd410,64) 732ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac8810,2097152,0xcaa39a0,32768,0x7ffc3fd2dde0,0x7f5b042bd810,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac8c10,2097152,0xcaa39b0,32768,0x7ffc3fd2dde0,0x7f5b042bdc10,64) 386ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac9010,2097152,0xcaa39c0,32768,0x7ffc3fd2dde0,0x7f5b042be010,64) 818ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac9410,2097152,0xcaa39d0,32768,0x7ffc3fd2dde0,0x7f5b042be410,64) 732ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac9810,2097152,0xcaa39e0,32768,0x7ffc3fd2dde0,0x7f5b042be810,64) 305ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ac9c10,2097152,0xcaa39f0,32768,0x7ffc3fd2dde0,0x7f5b042bec10,64) 479ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aca010,2097152,0xcaa3a00,32768,0x7ffc3fd2dde0,0x7f5b042bf010,64) 885ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aca410,2097152,0xcaa3a10,32768,0x7ffc3fd2dde0,0x7f5b042bf410,64) 706ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aca810,2097152,0xcaa3a20,32768,0x7ffc3fd2dde0,0x7f5b042bf810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acac10,2097152,0xcaa3a30,32768,0x7ffc3fd2dde0,0x7f5b042bfc10,64) 395ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acb010,2097152,0xcaa3a40,32768,0x7ffc3fd2dde0,0x7f5b042c0010,64) 870ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acb410,2097152,0xcaa3a50,32768,0x7ffc3fd2dde0,0x7f5b042c0410,64) 352ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acb810,2097152,0xcaa3a60,32768,0x7ffc3fd2dde0,0x7f5b042c0810,64) 792ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acbc10,2097152,0xcaa3a70,32768,0x7ffc3fd2dde0,0x7f5b042c0c10,64) 449ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acc010,2097152,0xcaa3a80,32768,0x7ffc3fd2dde0,0x7f5b042c1010,64) 821ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acc410,2097152,0xcaa3a90,32768,0x7ffc3fd2dde0,0x7f5b042c1410,64) 382ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acc810,2097152,0xcaa3aa0,32768,0x7ffc3fd2dde0,0x7f5b042c1810,64) 292ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07accc10,2097152,0xcaa3ab0,32768,0x7ffc3fd2dde0,0x7f5b042c1c10,64) 454ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acd010,2097152,0xcaa3ac0,32768,0x7ffc3fd2dde0,0x7f5b042c2010,64) 564ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acd410,2097152,0xcaa3ad0,32768,0x7ffc3fd2dde0,0x7f5b042c2410,64) 373ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acd810,2097152,0xcaa3ae0,32768,0x7ffc3fd2dde0,0x7f5b042c2810,64) 292ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acdc10,2097152,0xcaa3af0,32768,0x7ffc3fd2dde0,0x7f5b042c2c10,64) 458ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ace010,2097152,0xcaa3b00,32768,0x7ffc3fd2dde0,0x7f5b042c3010,64) 581ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ace410,2097152,0xcaa3b10,32768,0x7ffc3fd2dde0,0x7f5b042c3410,64) 412ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ace810,2097152,0xcaa3b20,32768,0x7ffc3fd2dde0,0x7f5b042c3810,64) 302ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acec10,2097152,0xcaa3b30,32768,0x7ffc3fd2dde0,0x7f5b042c3c10,64) 453ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acf010,2097152,0xcaa3b40,32768,0x7ffc3fd2dde0,0x7f5b042c4010,64) 715ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acf410,2097152,0xcaa3b50,32768,0x7ffc3fd2dde0,0x7f5b042c4410,64) 399ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acf810,2097152,0xcaa3b60,32768,0x7ffc3fd2dde0,0x7f5b042c4810,64) 486ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07acfc10,2097152,0xcaa3b70,32768,0x7ffc3fd2dde0,0x7f5b042c4c10,64) 458ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad0010,2097152,0xcaa3b80,32768,0x7ffc3fd2dde0,0x7f5b042c5010,64) 719ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad0410,2097152,0xcaa3b90,32768,0x7ffc3fd2dde0,0x7f5b042c5410,64) 427ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad0810,2097152,0xcaa3ba0,32768,0x7ffc3fd2dde0,0x7f5b042c5810,64) 520ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad0c10,2097152,0xcaa3bb0,32768,0x7ffc3fd2dde0,0x7f5b042c5c10,64) 456ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad1010,2097152,0xcaa3bc0,32768,0x7ffc3fd2dde0,0x7f5b042c6010,64) 812ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad1410,2097152,0xcaa3bd0,32768,0x7ffc3fd2dde0,0x7f5b042c6410,64) 387ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad1810,2097152,0xcaa3be0,32768,0x7ffc3fd2dde0,0x7f5b042c6810,64) 311ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad1c10,2097152,0xcaa3bf0,32768,0x7ffc3fd2dde0,0x7f5b042c6c10,64) 453ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad2010,2097152,0xcaa3c00,32768,0x7ffc3fd2dde0,0x7f5b042c7010,64) 494ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad2410,2097152,0xcaa3c10,32768,0x7ffc3fd2dde0,0x7f5b042c7410,64) 373ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad2810,2097152,0xcaa3c20,32768,0x7ffc3fd2dde0,0x7f5b042c7810,64) 291ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad2c10,2097152,0xcaa3c30,32768,0x7ffc3fd2dde0,0x7f5b042c7c10,64) 719ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad3010,2097152,0xcaa3c40,32768,0x7ffc3fd2dde0,0x7f5b042c8010,64) 490ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad3410,2097152,0xcaa3c50,32768,0x7ffc3fd2dde0,0x7f5b042c8410,64) 622ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad3810,2097152,0xcaa3c60,32768,0x7ffc3fd2dde0,0x7f5b042c8810,64) 298ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad3c10,2097152,0xcaa3c70,32768,0x7ffc3fd2dde0,0x7f5b042c8c10,64) 475ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad4010,2097152,0xcaa3c80,32768,0x7ffc3fd2dde0,0x7f5b042c9010,64) 490ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad4410,2097152,0xcaa3c90,32768,0x7ffc3fd2dde0,0x7f5b042c9410,64) 557ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad4810,2097152,0xcaa3ca0,32768,0x7ffc3fd2dde0,0x7f5b042c9810,64) 311ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad4c10,2097152,0xcaa3cb0,32768,0x7ffc3fd2dde0,0x7f5b042c9c10,64) 475ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad5010,2097152,0xcaa3cc0,32768,0x7ffc3fd2dde0,0x7f5b042ca010,64) 827ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad5410,2097152,0xcaa3cd0,32768,0x7ffc3fd2dde0,0x7f5b042ca410,64) 384ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad5810,2097152,0xcaa3ce0,32768,0x7ffc3fd2dde0,0x7f5b042ca810,64) 548ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad5c10,2097152,0xcaa3cf0,32768,0x7ffc3fd2dde0,0x7f5b042cac10,64) 456ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad6010,2097152,0xcaa3d00,32768,0x7ffc3fd2dde0,0x7f5b042cb010,64) 630ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad6410,2097152,0xcaa3d10,32768,0x7ffc3fd2dde0,0x7f5b042cb410,64) 387ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad6810,2097152,0xcaa3d20,32768,0x7ffc3fd2dde0,0x7f5b042cb810,64) 305ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad6c10,2097152,0xcaa3d30,32768,0x7ffc3fd2dde0,0x7f5b042cbc10,64) 458ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad7010,2097152,0xcaa3d40,32768,0x7ffc3fd2dde0,0x7f5b042cc010,64) 700ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad7410,2097152,0xcaa3d50,32768,0x7ffc3fd2dde0,0x7f5b042cc410,64) 747ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad7810,2097152,0xcaa3d60,32768,0x7ffc3fd2dde0,0x7f5b042cc810,64) 350ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad7c10,2097152,0xcaa3d70,32768,0x7ffc3fd2dde0,0x7f5b042ccc10,64) 4.41us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad8010,2097152,0xcaa3d80,32768,0x7ffc3fd2dde0,0x7f5b042cd010,64) 1.27us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad8410,2097152,0xcaa3d90,32768,0x7ffc3fd2dde0,0x7f5b042cd410,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad8810,2097152,0xcaa3da0,32768,0x7ffc3fd2dde0,0x7f5b042cd810,64) 991ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad8c10,2097152,0xcaa3db0,32768,0x7ffc3fd2dde0,0x7f5b042cdc10,64) 1.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad9010,2097152,0xcaa3dc0,32768,0x7ffc3fd2dde0,0x7f5b042ce010,64) 1.27us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad9410,2097152,0xcaa3dd0,32768,0x7ffc3fd2dde0,0x7f5b042ce410,64) 1.01us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad9810,2097152,0xcaa3de0,32768,0x7ffc3fd2dde0,0x7f5b042ce810,64) 952ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ad9c10,2097152,0xcaa3df0,32768,0x7ffc3fd2dde0,0x7f5b042cec10,64) 1.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ada010,2097152,0xcaa3e00,32768,0x7ffc3fd2dde0,0x7f5b042cf010,64) 1.26us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ada410,2097152,0xcaa3e10,32768,0x7ffc3fd2dde0,0x7f5b042cf410,64) 1.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ada810,2097152,0xcaa3e20,32768,0x7ffc3fd2dde0,0x7f5b042cf810,64) 827ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adac10,2097152,0xcaa3e30,32768,0x7ffc3fd2dde0,0x7f5b042cfc10,64) 939ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adb010,2097152,0xcaa3e40,32768,0x7ffc3fd2dde0,0x7f5b042d0010,64) 1.46us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adb410,2097152,0xcaa3e50,32768,0x7ffc3fd2dde0,0x7f5b042d0410,64) 957ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adb810,2097152,0xcaa3e60,32768,0x7ffc3fd2dde0,0x7f5b042d0810,64) 827ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adbc10,2097152,0xcaa3e70,32768,0x7ffc3fd2dde0,0x7f5b042d0c10,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adc010,2097152,0xcaa3e80,32768,0x7ffc3fd2dde0,0x7f5b042d1010,64) 1.34us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adc410,2097152,0xcaa3e90,32768,0x7ffc3fd2dde0,0x7f5b042d1410,64) 888ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adc810,2097152,0xcaa3ea0,32768,0x7ffc3fd2dde0,0x7f5b042d1810,64) 814ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adcc10,2097152,0xcaa3eb0,32768,0x7ffc3fd2dde0,0x7f5b042d1c10,64) 1.21us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07add010,2097152,0xcaa3ec0,32768,0x7ffc3fd2dde0,0x7f5b042d2010,64) 1.29us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07add410,2097152,0xcaa3ed0,32768,0x7ffc3fd2dde0,0x7f5b042d2410,64) 888ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07add810,2097152,0xcaa3ee0,32768,0x7ffc3fd2dde0,0x7f5b042d2810,64) 831ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07addc10,2097152,0xcaa3ef0,32768,0x7ffc3fd2dde0,0x7f5b042d2c10,64) 1.03us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ade010,2097152,0xcaa3f00,32768,0x7ffc3fd2dde0,0x7f5b042d3010,64) 1.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ade410,2097152,0xcaa3f10,32768,0x7ffc3fd2dde0,0x7f5b042d3410,64) 1.25us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ade810,2097152,0xcaa3f20,32768,0x7ffc3fd2dde0,0x7f5b042d3810,64) 956ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adec10,2097152,0xcaa3f30,32768,0x7ffc3fd2dde0,0x7f5b042d3c10,64) 1.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adf010,2097152,0xcaa3f40,32768,0x7ffc3fd2dde0,0x7f5b042d4010,64) 1.29us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adf410,2097152,0xcaa3f50,32768,0x7ffc3fd2dde0,0x7f5b042d4410,64) 902ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adf810,2097152,0xcaa3f60,32768,0x7ffc3fd2dde0,0x7f5b042d4810,64) 885ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07adfc10,2097152,0xcaa3f70,32768,0x7ffc3fd2dde0,0x7f5b042d4c10,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae0010,2097152,0xcaa3f80,32768,0x7ffc3fd2dde0,0x7f5b042d5010,64) 1.17us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae0410,2097152,0xcaa3f90,32768,0x7ffc3fd2dde0,0x7f5b042d5410,64) 1.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae0810,2097152,0xcaa3fa0,32768,0x7ffc3fd2dde0,0x7f5b042d5810,64) 844ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae0c10,2097152,0xcaa3fb0,32768,0x7ffc3fd2dde0,0x7f5b042d5c10,64) 1.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae1010,2097152,0xcaa3fc0,32768,0x7ffc3fd2dde0,0x7f5b042d6010,64) 1.52us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae1410,2097152,0xcaa3fd0,32768,0x7ffc3fd2dde0,0x7f5b042d6410,64) 1.06us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae1810,2097152,0xcaa3fe0,32768,0x7ffc3fd2dde0,0x7f5b042d6810,64) 833ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae1c10,2097152,0xcaa3ff0,32768,0x7ffc3fd2dde0,0x7f5b042d6c10,64) 1.47us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae2010,2097152,0xcaa4000,32768,0x7ffc3fd2dde0,0x7f5b042d7010,64) 1.34us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae2410,2097152,0xcaa4010,32768,0x7ffc3fd2dde0,0x7f5b042d7410,64) 834ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae2810,2097152,0xcaa4020,32768,0x7ffc3fd2dde0,0x7f5b042d7810,64) 834ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae2c10,2097152,0xcaa4030,32768,0x7ffc3fd2dde0,0x7f5b042d7c10,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae3010,2097152,0xcaa4040,32768,0x7ffc3fd2dde0,0x7f5b042d8010,64) 900ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae3410,2097152,0xcaa4050,32768,0x7ffc3fd2dde0,0x7f5b042d8410,64) 557ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae3810,2097152,0xcaa4060,32768,0x7ffc3fd2dde0,0x7f5b042d8810,64) 298ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae3c10,2097152,0xcaa4070,32768,0x7ffc3fd2dde0,0x7f5b042d8c10,64) 766ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae4010,2097152,0xcaa4080,32768,0x7ffc3fd2dde0,0x7f5b042d9010,64) 719ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae4410,2097152,0xcaa4090,32768,0x7ffc3fd2dde0,0x7f5b042d9410,64) 4.42us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae4810,2097152,0xcaa40a0,32768,0x7ffc3fd2dde0,0x7f5b042d9810,64) 1.54us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae4c10,2097152,0xcaa40b0,32768,0x7ffc3fd2dde0,0x7f5b042d9c10,64) 1.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae5010,2097152,0xcaa40c0,32768,0x7ffc3fd2dde0,0x7f5b042da010,64) 1.22us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae5410,2097152,0xcaa40d0,32768,0x7ffc3fd2dde0,0x7f5b042da410,64) 1.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae5810,2097152,0xcaa40e0,32768,0x7ffc3fd2dde0,0x7f5b042da810,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae5c10,2097152,0xcaa40f0,32768,0x7ffc3fd2dde0,0x7f5b042dac10,64) 1.06us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae6010,2097152,0xcaa4100,32768,0x7ffc3fd2dde0,0x7f5b042db010,64) 1.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae6410,2097152,0xcaa4110,32768,0x7ffc3fd2dde0,0x7f5b042db410,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae6810,2097152,0xcaa4120,32768,0x7ffc3fd2dde0,0x7f5b042db810,64) 838ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae6c10,2097152,0xcaa4130,32768,0x7ffc3fd2dde0,0x7f5b042dbc10,64) 1.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae7010,2097152,0xcaa4140,32768,0x7ffc3fd2dde0,0x7f5b042dc010,64) 1.36us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae7410,2097152,0xcaa4150,32768,0x7ffc3fd2dde0,0x7f5b042dc410,64) 1.00us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae7810,2097152,0xcaa4160,32768,0x7ffc3fd2dde0,0x7f5b042dc810,64) 846ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae7c10,2097152,0xcaa4170,32768,0x7ffc3fd2dde0,0x7f5b042dcc10,64) 985ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae8010,2097152,0xcaa4180,32768,0x7ffc3fd2dde0,0x7f5b042dd010,64) 911ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae8410,2097152,0xcaa4190,32768,0x7ffc3fd2dde0,0x7f5b042dd410,64) 836ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae8810,2097152,0xcaa41a0,32768,0x7ffc3fd2dde0,0x7f5b042dd810,64) 836ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae8c10,2097152,0xcaa41b0,32768,0x7ffc3fd2dde0,0x7f5b042ddc10,64) 1.49us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae9010,2097152,0xcaa41c0,32768,0x7ffc3fd2dde0,0x7f5b042de010,64) 1.06us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae9410,2097152,0xcaa41d0,32768,0x7ffc3fd2dde0,0x7f5b042de410,64) 933ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae9810,2097152,0xcaa41e0,32768,0x7ffc3fd2dde0,0x7f5b042de810,64) 840ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ae9c10,2097152,0xcaa41f0,32768,0x7ffc3fd2dde0,0x7f5b042dec10,64) 1.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aea010,2097152,0xcaa4200,32768,0x7ffc3fd2dde0,0x7f5b042df010,64) 933ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aea410,2097152,0xcaa4210,32768,0x7ffc3fd2dde0,0x7f5b042df410,64) 851ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aea810,2097152,0xcaa4220,32768,0x7ffc3fd2dde0,0x7f5b042df810,64) 356ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aeac10,2097152,0xcaa4230,32768,0x7ffc3fd2dde0,0x7f5b042dfc10,64) 535ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aeb010,2097152,0xcaa4240,32768,0x7ffc3fd2dde0,0x7f5b042e0010,64) 702ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07aeb410,2097152,0xcaa4250,32768,0x7ffc3fd2dde0,0x7f5b042e0410,64) 443ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b04810,2097152,0xcaa48a0,32768,0x7ffc3fd2dde0,0x7f5b042f9810,64) 324ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... some 100 stable lines... ]
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b04c10,2097152,0xcaa48b0,32768,0x7ffc3fd2dde0,0x7f5b042f9c10,64) 559ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b05010,2097152,0xcaa48c0,32768,0x7ffc3fd2dde0,0x7f5b042fa010,64) 510ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b05410,2097152,0xcaa48d0,32768,0x7ffc3fd2dde0,0x7f5b042fa410,64) 421ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b05810,2097152,0xcaa48e0,32768,0x7ffc3fd2dde0,0x7f5b042fa810,64) 568ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b05c10,2097152,0xcaa48f0,32768,0x7ffc3fd2dde0,0x7f5b042fac10,64) 799ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b06010,2097152,0xcaa4900,32768,0x7ffc3fd2dde0,0x7f5b042fb010,64) 885ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b06410,2097152,0xcaa4910,32768,0x7ffc3fd2dde0,0x7f5b042fb410,64) 400ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b06810,2097152,0xcaa4920,32768,0x7ffc3fd2dde0,0x7f5b042fb810,64) 328ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b06c10,2097152,0xcaa4930,32768,0x7ffc3fd2dde0,0x7f5b042fbc10,64) 479ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b07010,2097152,0xcaa4940,32768,0x7ffc3fd2dde0,0x7f5b042fc010,64) 497ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b07410,2097152,0xcaa4950,32768,0x7ffc3fd2dde0,0x7f5b042fc410,64) 561ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b07810,2097152,0xcaa4960,32768,0x7ffc3fd2dde0,0x7f5b042fc810,64) 339ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b07c10,2097152,0xcaa4970,32768,0x7ffc3fd2dde0,0x7f5b042fcc10,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b08010,2097152,0xcaa4980,32768,0x7ffc3fd2dde0,0x7f5b042fd010,64) 454ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b08410,2097152,0xcaa4990,32768,0x7ffc3fd2dde0,0x7f5b042fd410,64) 425ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b08810,2097152,0xcaa49a0,32768,0x7ffc3fd2dde0,0x7f5b042fd810,64) 462ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b08c10,2097152,0xcaa49b0,32768,0x7ffc3fd2dde0,0x7f5b042fdc10,64) 460ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b09010,2097152,0xcaa49c0,32768,0x7ffc3fd2dde0,0x7f5b042fe010,64) 697ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b09410,2097152,0xcaa49d0,32768,0x7ffc3fd2dde0,0x7f5b042fe410,64) 423ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b09810,2097152,0xcaa49e0,32768,0x7ffc3fd2dde0,0x7f5b042fe810,64) 330ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b09c10,2097152,0xcaa49f0,32768,0x7ffc3fd2dde0,0x7f5b042fec10,64) 4.36us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0a010,2097152,0xcaa4a00,32768,0x7ffc3fd2dde0,0x7f5b042ff010,64) 1.66us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0a410,2097152,0xcaa4a10,32768,0x7ffc3fd2dde0,0x7f5b042ff410,64) 1.19us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0a810,2097152,0xcaa4a20,32768,0x7ffc3fd2dde0,0x7f5b042ff810,64) 883ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0ac10,2097152,0xcaa4a30,32768,0x7ffc3fd2dde0,0x7f5b042ffc10,64) 1.18us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0b010,2097152,0xcaa4a40,32768,0x7ffc3fd2dde0,0x7f5b04300010,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0b410,2097152,0xcaa4a50,32768,0x7ffc3fd2dde0,0x7f5b04300410,64) 874ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0b810,2097152,0xcaa4a60,32768,0x7ffc3fd2dde0,0x7f5b04300810,64) 825ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0bc10,2097152,0xcaa4a70,32768,0x7ffc3fd2dde0,0x7f5b04300c10,64) 995ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0c010,2097152,0xcaa4a80,32768,0x7ffc3fd2dde0,0x7f5b04301010,64) 1.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0c410,2097152,0xcaa4a90,32768,0x7ffc3fd2dde0,0x7f5b04301410,64) 844ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0c810,2097152,0xcaa4aa0,32768,0x7ffc3fd2dde0,0x7f5b04301810,64) 810ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0cc10,2097152,0xcaa4ab0,32768,0x7ffc3fd2dde0,0x7f5b04301c10,64) 987ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0d010,2097152,0xcaa4ac0,32768,0x7ffc3fd2dde0,0x7f5b04302010,64) 950ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0d410,2097152,0xcaa4ad0,32768,0x7ffc3fd2dde0,0x7f5b04302410,64) 894ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0d810,2097152,0xcaa4ae0,32768,0x7ffc3fd2dde0,0x7f5b04302810,64) 857ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0dc10,2097152,0xcaa4af0,32768,0x7ffc3fd2dde0,0x7f5b04302c10,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0e010,2097152,0xcaa4b00,32768,0x7ffc3fd2dde0,0x7f5b04303010,64) 1.00us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0e410,2097152,0xcaa4b10,32768,0x7ffc3fd2dde0,0x7f5b04303410,64) 844ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0e810,2097152,0xcaa4b20,32768,0x7ffc3fd2dde0,0x7f5b04303810,64) 792ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0ec10,2097152,0xcaa4b30,32768,0x7ffc3fd2dde0,0x7f5b04303c10,64) 1.01us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0f010,2097152,0xcaa4b40,32768,0x7ffc3fd2dde0,0x7f5b04304010,64) 1.10us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0f410,2097152,0xcaa4b50,32768,0x7ffc3fd2dde0,0x7f5b04304410,64) 672ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0f810,2097152,0xcaa4b60,32768,0x7ffc3fd2dde0,0x7f5b04304810,64) 387ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b0fc10,2097152,0xcaa4b70,32768,0x7ffc3fd2dde0,0x7f5b04304c10,64) 473ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b10010,2097152,0xcaa4b80,32768,0x7ffc3fd2dde0,0x7f5b04305010,64) 594ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b10410,2097152,0xcaa4b90,32768,0x7ffc3fd2dde0,0x7f5b04305410,64) 631ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b10810,2097152,0xcaa4ba0,32768,0x7ffc3fd2dde0,0x7f5b04305810,64) 363ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b10c10,2097152,0xcaa4bb0,32768,0x7ffc3fd2dde0,0x7f5b04305c10,64) 687ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b11010,2097152,0xcaa4bc0,32768,0x7ffc3fd2dde0,0x7f5b04306010,64) 773ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b11410,2097152,0xcaa4bd0,32768,0x7ffc3fd2dde0,0x7f5b04306410,64) 617ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b11810,2097152,0xcaa4be0,32768,0x7ffc3fd2dde0,0x7f5b04306810,64) 346ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b11c10,2097152,0xcaa4bf0,32768,0x7ffc3fd2dde0,0x7f5b04306c10,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b12010,2097152,0xcaa4c00,32768,0x7ffc3fd2dde0,0x7f5b04307010,64) 527ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b12410,2097152,0xcaa4c10,32768,0x7ffc3fd2dde0,0x7f5b04307410,64) 400ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b12810,2097152,0xcaa4c20,32768,0x7ffc3fd2dde0,0x7f5b04307810,64) 333ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b12c10,2097152,0xcaa4c30,32768,0x7ffc3fd2dde0,0x7f5b04307c10,64) 510ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b13010,2097152,0xcaa4c40,32768,0x7ffc3fd2dde0,0x7f5b04308010,64) 754ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b13410,2097152,0xcaa4c50,32768,0x7ffc3fd2dde0,0x7f5b04308410,64) 609ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b13810,2097152,0xcaa4c60,32768,0x7ffc3fd2dde0,0x7f5b04308810,64) 369ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b13c10,2097152,0xcaa4c70,32768,0x7ffc3fd2dde0,0x7f5b04308c10,64) 468ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b14010,2097152,0xcaa4c80,32768,0x7ffc3fd2dde0,0x7f5b04309010,64) 924ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b14410,2097152,0xcaa4c90,32768,0x7ffc3fd2dde0,0x7f5b04309410,64) 412ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b14810,2097152,0xcaa4ca0,32768,0x7ffc3fd2dde0,0x7f5b04309810,64) 503ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b14c10,2097152,0xcaa4cb0,32768,0x7ffc3fd2dde0,0x7f5b04309c10,64) 505ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b15010,2097152,0xcaa4cc0,32768,0x7ffc3fd2dde0,0x7f5b0430a010,64) 760ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b15410,2097152,0xcaa4cd0,32768,0x7ffc3fd2dde0,0x7f5b0430a410,64) 400ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b15810,2097152,0xcaa4ce0,32768,0x7ffc3fd2dde0,0x7f5b0430a810,64) 693ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b15c10,2097152,0xcaa4cf0,32768,0x7ffc3fd2dde0,0x7f5b0430ac10,64) 482ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b16010,2097152,0xcaa4d00,32768,0x7ffc3fd2dde0,0x7f5b0430b010,64) 825ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b16410,2097152,0xcaa4d10,32768,0x7ffc3fd2dde0,0x7f5b0430b410,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b16810,2097152,0xcaa4d20,32768,0x7ffc3fd2dde0,0x7f5b0430b810,64) 304ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b16c10,2097152,0xcaa4d30,32768,0x7ffc3fd2dde0,0x7f5b0430bc10,64) 525ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b17010,2097152,0xcaa4d40,32768,0x7ffc3fd2dde0,0x7f5b0430c010,64) 769ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b17410,2097152,0xcaa4d50,32768,0x7ffc3fd2dde0,0x7f5b0430c410,64) 389ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b17810,2097152,0xcaa4d60,32768,0x7ffc3fd2dde0,0x7f5b0430c810,64) 382ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b17c10,2097152,0xcaa4d70,32768,0x7ffc3fd2dde0,0x7f5b0430cc10,64) 747ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b18010,2097152,0xcaa4d80,32768,0x7ffc3fd2dde0,0x7f5b0430d010,64) 406ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b18410,2097152,0xcaa4d90,32768,0x7ffc3fd2dde0,0x7f5b0430d410,64) 371ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b18810,2097152,0xcaa4da0,32768,0x7ffc3fd2dde0,0x7f5b0430d810,64) 363ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b18c10,2097152,0xcaa4db0,32768,0x7ffc3fd2dde0,0x7f5b0430dc10,64) 821ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b19010,2097152,0xcaa4dc0,32768,0x7ffc3fd2dde0,0x7f5b0430e010,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b19410,2097152,0xcaa4dd0,32768,0x7ffc3fd2dde0,0x7f5b0430e410,64) 624ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b19810,2097152,0xcaa4de0,32768,0x7ffc3fd2dde0,0x7f5b0430e810,64) 391ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b19c10,2097152,0xcaa4df0,32768,0x7ffc3fd2dde0,0x7f5b0430ec10,64) 481ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1a010,2097152,0xcaa4e00,32768,0x7ffc3fd2dde0,0x7f5b0430f010,64) 529ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1a410,2097152,0xcaa4e10,32768,0x7ffc3fd2dde0,0x7f5b0430f410,64) 715ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1a810,2097152,0xcaa4e20,32768,0x7ffc3fd2dde0,0x7f5b0430f810,64) 307ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1ac10,2097152,0xcaa4e30,32768,0x7ffc3fd2dde0,0x7f5b0430fc10,64) 382ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1b010,2097152,0xcaa4e40,32768,0x7ffc3fd2dde0,0x7f5b04310010,64) 641ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1b410,2097152,0xcaa4e50,32768,0x7ffc3fd2dde0,0x7f5b04310410,64) 378ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1b810,2097152,0xcaa4e60,32768,0x7ffc3fd2dde0,0x7f5b04310810,64) 296ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1bc10,2097152,0xcaa4e70,32768,0x7ffc3fd2dde0,0x7f5b04310c10,64) 404ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1c010,2097152,0xcaa4e80,32768,0x7ffc3fd2dde0,0x7f5b04311010,64) 471ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1c410,2097152,0xcaa4e90,32768,0x7ffc3fd2dde0,0x7f5b04311410,64) 387ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1c810,2097152,0xcaa4ea0,32768,0x7ffc3fd2dde0,0x7f5b04311810,64) 544ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1cc10,2097152,0xcaa4eb0,32768,0x7ffc3fd2dde0,0x7f5b04311c10,64) 397ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1d010,2097152,0xcaa4ec0,32768,0x7ffc3fd2dde0,0x7f5b04312010,64) 674ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1d410,2097152,0xcaa4ed0,32768,0x7ffc3fd2dde0,0x7f5b04312410,64) 415ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1d810,2097152,0xcaa4ee0,32768,0x7ffc3fd2dde0,0x7f5b04312810,64) 369ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1dc10,2097152,0xcaa4ef0,32768,0x7ffc3fd2dde0,0x7f5b04312c10,64) 469ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1e010,2097152,0xcaa4f00,32768,0x7ffc3fd2dde0,0x7f5b04313010,64) 1.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1e410,2097152,0xcaa4f10,32768,0x7ffc3fd2dde0,0x7f5b04313410,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1e810,2097152,0xcaa4f20,32768,0x7ffc3fd2dde0,0x7f5b04313810,64) 352ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1ec10,2097152,0xcaa4f30,32768,0x7ffc3fd2dde0,0x7f5b04313c10,64) 466ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1f010,2097152,0xcaa4f40,32768,0x7ffc3fd2dde0,0x7f5b04314010,64) 592ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1f410,2097152,0xcaa4f50,32768,0x7ffc3fd2dde0,0x7f5b04314410,64) 469ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1f810,2097152,0xcaa4f60,32768,0x7ffc3fd2dde0,0x7f5b04314810,64) 494ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b1fc10,2097152,0xcaa4f70,32768,0x7ffc3fd2dde0,0x7f5b04314c10,64) 397ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b20010,2097152,0xcaa4f80,32768,0x7ffc3fd2dde0,0x7f5b04315010,64) 741ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b20410,2097152,0xcaa4f90,32768,0x7ffc3fd2dde0,0x7f5b04315410,64) 423ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b20810,2097152,0xcaa4fa0,32768,0x7ffc3fd2dde0,0x7f5b04315810,64) 417ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b20c10,2097152,0xcaa4fb0,32768,0x7ffc3fd2dde0,0x7f5b04315c10,64) 464ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b21010,2097152,0xcaa4fc0,32768,0x7ffc3fd2dde0,0x7f5b04316010,64) 825ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b21410,2097152,0xcaa4fd0,32768,0x7ffc3fd2dde0,0x7f5b04316410,64) 393ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b21810,2097152,0xcaa4fe0,32768,0x7ffc3fd2dde0,0x7f5b04316810,64) 333ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b21c10,2097152,0xcaa4ff0,32768,0x7ffc3fd2dde0,0x7f5b04316c10,64) 479ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b22010,2097152,0xcaa5000,32768,0x7ffc3fd2dde0,0x7f5b04317010,64) 631ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... Some 500 lines of stuff...]
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b9f410,2097152,0xcaa6f50,32768,0x7ffc3fd2dde0,0x7f5b04394410,64) 352ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b9f810,2097152,0xcaa6f60,32768,0x7ffc3fd2dde0,0x7f5b04394810,64) 523ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07b9fc10,2097152,0xcaa6f70,32768,0x7ffc3fd2dde0,0x7f5b04394c10,64) 4.37us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba0010,2097152,0xcaa6f80,32768,0x7ffc3fd2dde0,0x7f5b04395010,64) 1.48us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba0410,2097152,0xcaa6f90,32768,0x7ffc3fd2dde0,0x7f5b04395410,64) 924ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba0810,2097152,0xcaa6fa0,32768,0x7ffc3fd2dde0,0x7f5b04395810,64) 1.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba0c10,2097152,0xcaa6fb0,32768,0x7ffc3fd2dde0,0x7f5b04395c10,64) 1.02us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba1010,2097152,0xcaa6fc0,32768,0x7ffc3fd2dde0,0x7f5b04396010,64) 1.36us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba1410,2097152,0xcaa6fd0,32768,0x7ffc3fd2dde0,0x7f5b04396410,64) 1.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba1810,2097152,0xcaa6fe0,32768,0x7ffc3fd2dde0,0x7f5b04396810,64) 846ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba1c10,2097152,0xcaa6ff0,32768,0x7ffc3fd2dde0,0x7f5b04396c10,64) 1.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07ba2010,2097152,0xcaa7000,32768,0x7ffc3fd2dde0,0x7f5b04397010,64) 1.20us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... Some 900 more lines of stuff...]
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c83010,2097152,0xcaaa840,32768,0x7ffc3fd2dde0,0x7f5b04478010,64) 1.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c83410,2097152,0xcaaa850,32768,0x7ffc3fd2dde0,0x7f5b04478410,64) 469ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c83810,2097152,0xcaaa860,32768,0x7ffc3fd2dde0,0x7f5b04478810,64) 520ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c83c10,2097152,0xcaaa870,32768,0x7ffc3fd2dde0,0x7f5b04478c10,64) 417ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c84010,2097152,0xcaaa880,32768,0x7ffc3fd2dde0,0x7f5b04479010,64) 563ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c84410,2097152,0xcaaa890,32768,0x7ffc3fd2dde0,0x7f5b04479410,64) 359ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c84810,2097152,0xcaaa8a0,32768,0x7ffc3fd2dde0,0x7f5b04479810,64) 328ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c84c10,2097152,0xcaaa8b0,32768,0x7ffc3fd2dde0,0x7f5b04479c10,64) 432ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c85010,2097152,0xcaaa8c0,32768,0x7ffc3fd2dde0,0x7f5b0447a010,64) 728ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c85410,2097152,0xcaaa8d0,32768,0x7ffc3fd2dde0,0x7f5b0447a410,64) 399ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c85810,2097152,0xcaaa8e0,32768,0x7ffc3fd2dde0,0x7f5b0447a810,64) 488ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c85c10,2097152,0xcaaa8f0,32768,0x7ffc3fd2dde0,0x7f5b0447ac10,64) 427ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c86010,2097152,0xcaaa900,32768,0x7ffc3fd2dde0,0x7f5b0447b010,64) 548ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c86410,2097152,0xcaaa910,32768,0x7ffc3fd2dde0,0x7f5b0447b410,64) 341ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c86810,2097152,0xcaaa920,32768,0x7ffc3fd2dde0,0x7f5b0447b810,64) 436ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c86c10,2097152,0xcaaa930,32768,0x7ffc3fd2dde0,0x7f5b0447bc10,64) 421ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c87010,2097152,0xcaaa940,32768,0x7ffc3fd2dde0,0x7f5b0447c010,64) 4.49us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c87410,2097152,0xcaaa950,32768,0x7ffc3fd2dde0,0x7f5b0447c410,64) 1.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c87810,2097152,0xcaaa960,32768,0x7ffc3fd2dde0,0x7f5b0447c810,64) 985ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c87c10,2097152,0xcaaa970,32768,0x7ffc3fd2dde0,0x7f5b0447cc10,64) 1.12us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c88010,2097152,0xcaaa980,32768,0x7ffc3fd2dde0,0x7f5b0447d010,64) 1.10us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c88410,2097152,0xcaaa990,32768,0x7ffc3fd2dde0,0x7f5b0447d410,64) 836ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c88810,2097152,0xcaaa9a0,32768,0x7ffc3fd2dde0,0x7f5b0447d810,64) 842ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c88c10,2097152,0xcaaa9b0,32768,0x7ffc3fd2dde0,0x7f5b0447dc10,64) 1.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c89010,2097152,0xcaaa9c0,32768,0x7ffc3fd2dde0,0x7f5b0447e010,64) 1.07us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c89410,2097152,0xcaaa9d0,32768,0x7ffc3fd2dde0,0x7f5b0447e410,64) 866ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c89810,2097152,0xcaaa9e0,32768,0x7ffc3fd2dde0,0x7f5b0447e810,64) 818ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c89c10,2097152,0xcaaa9f0,32768,0x7ffc3fd2dde0,0x7f5b0447ec10,64) 1.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8a010,2097152,0xcaaaa00,32768,0x7ffc3fd2dde0,0x7f5b0447f010,64) 946ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8a410,2097152,0xcaaaa10,32768,0x7ffc3fd2dde0,0x7f5b0447f410,64) 851ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8a810,2097152,0xcaaaa20,32768,0x7ffc3fd2dde0,0x7f5b0447f810,64) 1.03us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8ac10,2097152,0xcaaaa30,32768,0x7ffc3fd2dde0,0x7f5b0447fc10,64) 991ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8b010,2097152,0xcaaaa40,32768,0x7ffc3fd2dde0,0x7f5b04480010,64) 1.13us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8b410,2097152,0xcaaaa50,32768,0x7ffc3fd2dde0,0x7f5b04480410,64) 942ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8b810,2097152,0xcaaaa60,32768,0x7ffc3fd2dde0,0x7f5b04480810,64) 831ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8bc10,2097152,0xcaaaa70,32768,0x7ffc3fd2dde0,0x7f5b04480c10,64) 1.01us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8c010,2097152,0xcaaaa80,32768,0x7ffc3fd2dde0,0x7f5b04481010,64) 993ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8c410,2097152,0xcaaaa90,32768,0x7ffc3fd2dde0,0x7f5b04481410,64) 885ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8c810,2097152,0xcaaaaa0,32768,0x7ffc3fd2dde0,0x7f5b04481810,64) 827ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8cc10,2097152,0xcaaaab0,32768,0x7ffc3fd2dde0,0x7f5b04481c10,64) 1.24us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8d010,2097152,0xcaaaac0,32768,0x7ffc3fd2dde0,0x7f5b04482010,64) 1.15us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8d410,2097152,0xcaaaad0,32768,0x7ffc3fd2dde0,0x7f5b04482410,64) 937ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8d810,2097152,0xcaaaae0,32768,0x7ffc3fd2dde0,0x7f5b04482810,64) 862ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8dc10,2097152,0xcaaaaf0,32768,0x7ffc3fd2dde0,0x7f5b04482c10,64) 1.06us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8e010,2097152,0xcaaab00,32768,0x7ffc3fd2dde0,0x7f5b04483010,64) 1.18us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8e410,2097152,0xcaaab10,32768,0x7ffc3fd2dde0,0x7f5b04483410,64) 853ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8e810,2097152,0xcaaab20,32768,0x7ffc3fd2dde0,0x7f5b04483810,64) 844ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8ec10,2097152,0xcaaab30,32768,0x7ffc3fd2dde0,0x7f5b04483c10,64) 1.01us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8f010,2097152,0xcaaab40,32768,0x7ffc3fd2dde0,0x7f5b04484010,64) 982ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8f410,2097152,0xcaaab50,32768,0x7ffc3fd2dde0,0x7f5b04484410,64) 924ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8f810,2097152,0xcaaab60,32768,0x7ffc3fd2dde0,0x7f5b04484810,64) 911ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c8fc10,2097152,0xcaaab70,32768,0x7ffc3fd2dde0,0x7f5b04484c10,64) 1.31us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c90010,2097152,0xcaaab80,32768,0x7ffc3fd2dde0,0x7f5b04485010,64) 1.31us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c90410,2097152,0xcaaab90,32768,0x7ffc3fd2dde0,0x7f5b04485410,64) 946ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c90810,2097152,0xcaaaba0,32768,0x7ffc3fd2dde0,0x7f5b04485810,64) 834ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c90c10,2097152,0xcaaabb0,32768,0x7ffc3fd2dde0,0x7f5b04485c10,64) 430ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c91010,2097152,0xcaaabc0,32768,0x7ffc3fd2dde0,0x7f5b04486010,64) 821ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c91410,2097152,0xcaaabd0,32768,0x7ffc3fd2dde0,0x7f5b04486410,64) 574ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c91810,2097152,0xcaaabe0,32768,0x7ffc3fd2dde0,0x7f5b04486810,64) 248ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c91c10,2097152,0xcaaabf0,32768,0x7ffc3fd2dde0,0x7f5b04486c10,64) 708ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c92010,2097152,0xcaaac00,32768,0x7ffc3fd2dde0,0x7f5b04487010,64) 706ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c92410,2097152,0xcaaac10,32768,0x7ffc3fd2dde0,0x7f5b04487410,64) 535ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c92810,2097152,0xcaaac20,32768,0x7ffc3fd2dde0,0x7f5b04487810,64) 261ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c92c10,2097152,0xcaaac30,32768,0x7ffc3fd2dde0,0x7f5b04487c10,64) 389ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c93010,2097152,0xcaaac40,32768,0x7ffc3fd2dde0,0x7f5b04488010,64) 848ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c93410,2097152,0xcaaac50,32768,0x7ffc3fd2dde0,0x7f5b04488410,64) 641ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c93810,2097152,0xcaaac60,32768,0x7ffc3fd2dde0,0x7f5b04488810,64) 257ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c93c10,2097152,0xcaaac70,32768,0x7ffc3fd2dde0,0x7f5b04488c10,64) 408ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c94010,2097152,0xcaaac80,32768,0x7ffc3fd2dde0,0x7f5b04489010,64) 421ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c94410,2097152,0xcaaac90,32768,0x7ffc3fd2dde0,0x7f5b04489410,64) 674ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c94810,2097152,0xcaaaca0,32768,0x7ffc3fd2dde0,0x7f5b04489810,64) 270ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c94c10,2097152,0xcaaacb0,32768,0x7ffc3fd2dde0,0x7f5b04489c10,64) 423ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c95010,2097152,0xcaaacc0,32768,0x7ffc3fd2dde0,0x7f5b0448a010,64) 698ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c95410,2097152,0xcaaacd0,32768,0x7ffc3fd2dde0,0x7f5b0448a410,64) 330ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c95810,2097152,0xcaaace0,32768,0x7ffc3fd2dde0,0x7f5b0448a810,64) 574ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b07c95c10,2097152,0xcaaacf0,32768,0x7ffc3fd2dde0,0x7f5b0448ac10,64) 400ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ [... Some 30 000 lines of mess...]
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e2410,2097152,0xcb20010,32768,0x7ffc3fd2dde0,0x7f5b061d7410,64) 374ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e2810,2097152,0xcb20020,32768,0x7ffc3fd2dde0,0x7f5b061d7810,64) 577ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e2c10,2097152,0xcb20030,32768,0x7ffc3fd2dde0,0x7f5b061d7c10,64) 2.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e3010,2097152,0xcb20040,32768,0x7ffc3fd2dde0,0x7f5b061d8010,64) 587ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e3410,2097152,0xcb20050,32768,0x7ffc3fd2dde0,0x7f5b061d8410,64) 386ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e3810,2097152,0xcb20060,32768,0x7ffc3fd2dde0,0x7f5b061d8810,64) 557ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e3c10,2097152,0xcb20070,32768,0x7ffc3fd2dde0,0x7f5b061d8c10,64) 2.14us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e4010,2097152,0xcb20080,32768,0x7ffc3fd2dde0,0x7f5b061d9010,64) 475ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e4410,2097152,0xcb20090,32768,0x7ffc3fd2dde0,0x7f5b061d9410,64) 745ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e4810,2097152,0xcb200a0,32768,0x7ffc3fd2dde0,0x7f5b061d9810,64) 272ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e4c10,2097152,0xcb200b0,32768,0x7ffc3fd2dde0,0x7f5b061d9c10,64) 2.31us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e5010,2097152,0xcb200c0,32768,0x7ffc3fd2dde0,0x7f5b061da010,64) 419ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e5410,2097152,0xcb200d0,32768,0x7ffc3fd2dde0,0x7f5b061da410,64) 410ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e5810,2097152,0xcb200e0,32768,0x7ffc3fd2dde0,0x7f5b061da810,64) 276ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e5c10,2097152,0xcb200f0,32768,0x7ffc3fd2dde0,0x7f5b061dac10,64) 3.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e6010,2097152,0xcb20100,32768,0x7ffc3fd2dde0,0x7f5b061db010,64) 698ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e6410,2097152,0xcb20110,32768,0x7ffc3fd2dde0,0x7f5b061db410,64) 378ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e6810,2097152,0xcb20120,32768,0x7ffc3fd2dde0,0x7f5b061db810,64) 264ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e6c10,2097152,0xcb20130,32768,0x7ffc3fd2dde0,0x7f5b061dbc10,64) 2.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e7010,2097152,0xcb20140,32768,0x7ffc3fd2dde0,0x7f5b061dc010,64) 667ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e7410,2097152,0xcb20150,32768,0x7ffc3fd2dde0,0x7f5b061dc410,64) 514ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e7810,2097152,0xcb20160,32768,0x7ffc3fd2dde0,0x7f5b061dc810,64) 285ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e7c10,2097152,0xcb20170,32768,0x7ffc3fd2dde0,0x7f5b061dcc10,64) 2.16us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e8010,2097152,0xcb20180,32768,0x7ffc3fd2dde0,0x7f5b061dd010,64) 617ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e8410,2097152,0xcb20190,32768,0x7ffc3fd2dde0,0x7f5b061dd410,64) 434ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e8810,2097152,0xcb201a0,32768,0x7ffc3fd2dde0,0x7f5b061dd810,64) 279ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e8c10,2097152,0xcb201b0,32768,0x7ffc3fd2dde0,0x7f5b061ddc10,64) 6.21us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e9010,2097152,0xcb201c0,32768,0x7ffc3fd2dde0,0x7f5b061de010,64) 702ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e9410,2097152,0xcb201d0,32768,0x7ffc3fd2dde0,0x7f5b061de410,64) 590ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e9810,2097152,0xcb201e0,32768,0x7ffc3fd2dde0,0x7f5b061de810,64) 302ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099e9c10,2097152,0xcb201f0,32768,0x7ffc3fd2dde0,0x7f5b061dec10,64) 2.08us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ea010,2097152,0xcb20200,32768,0x7ffc3fd2dde0,0x7f5b061df010,64) 510ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ea410,2097152,0xcb20210,32768,0x7ffc3fd2dde0,0x7f5b061df410,64) 652ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ea810,2097152,0xcb20220,32768,0x7ffc3fd2dde0,0x7f5b061df810,64) 266ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099eac10,2097152,0xcb20230,32768,0x7ffc3fd2dde0,0x7f5b061dfc10,64) 2.21us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099eb010,2097152,0xcb20240,32768,0x7ffc3fd2dde0,0x7f5b061e0010,64) 482ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099eb410,2097152,0xcb20250,32768,0x7ffc3fd2dde0,0x7f5b061e0410,64) 587ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099eb810,2097152,0xcb20260,32768,0x7ffc3fd2dde0,0x7f5b061e0810,64) 274ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ebc10,2097152,0xcb20270,32768,0x7ffc3fd2dde0,0x7f5b061e0c10,64) 2.09us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ec010,2097152,0xcb20280,32768,0x7ffc3fd2dde0,0x7f5b061e1010,64) 652ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ec410,2097152,0xcb20290,32768,0x7ffc3fd2dde0,0x7f5b061e1410,64) 417ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ec810,2097152,0xcb202a0,32768,0x7ffc3fd2dde0,0x7f5b061e1810,64) 311ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ecc10,2097152,0xcb202b0,32768,0x7ffc3fd2dde0,0x7f5b061e1c10,64) 2.21us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ed010,2097152,0xcb202c0,32768,0x7ffc3fd2dde0,0x7f5b061e2010,64) 644ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ed410,2097152,0xcb202d0,32768,0x7ffc3fd2dde0,0x7f5b061e2410,64) 451ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ed810,2097152,0xcb202e0,32768,0x7ffc3fd2dde0,0x7f5b061e2810,64) 315ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099edc10,2097152,0xcb202f0,32768,0x7ffc3fd2dde0,0x7f5b061e2c10,64) 2.05us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ee010,2097152,0xcb20300,32768,0x7ffc3fd2dde0,0x7f5b061e3010,64) 680ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ee410,2097152,0xcb20310,32768,0x7ffc3fd2dde0,0x7f5b061e3410,64) 525ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ee810,2097152,0xcb20320,32768,0x7ffc3fd2dde0,0x7f5b061e3810,64) 661ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099eec10,2097152,0xcb20330,32768,0x7ffc3fd2dde0,0x7f5b061e3c10,64) 2.04us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ef010,2097152,0xcb20340,32768,0x7ffc3fd2dde0,0x7f5b061e4010,64) 704ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ef410,2097152,0xcb20350,32768,0x7ffc3fd2dde0,0x7f5b061e4410,64) 650ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099ef810,2097152,0xcb20360,32768,0x7ffc3fd2dde0,0x7f5b061e4810,64) 281ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099efc10,2097152,0xcb20370,32768,0x7ffc3fd2dde0,0x7f5b061e4c10,64) 2.21us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099f0010,2097152,0xcb20380,32768,0x7ffc3fd2dde0,0x7f5b061e5010,64) 490ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099f0410,2097152,0xcb20390,32768,0x7ffc3fd2dde0,0x7f5b061e5410,64) 471ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099f0810,2097152,0xcb203a0,32768,0x7ffc3fd2dde0,0x7f5b061e5810,64) 268ns CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,64,1,2,0x7ffc3fd2ddd0,0x7f5b099f0c10,2097152,0xcb203b0,32768,0x7ffc3fd2dde0,0x7f5b061e5c10,64) 2.11us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 585 milliseconds
+ [... 585000 / 32768 = 17.85 us , but most of data in logs are below 3us ??...]
+ *
+ After debug_mkl_contract_sum, duration: 0.5853049755096436
+PROF:: perf data process bucket time: 0.6231610774993896
+==
+
+PROF:: Bucket contains: [E460(v_1846,v_1854), E1845(v_1846,v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 2097152 output: 1048576
+ Dimensions: f:2 k:2 n:524288 m:1
+ MKL_VERBOSE ZGEMM(N,T,524288,1,2,0x7ffc3fd2ddd0,0x7f5b099f1010,1048576,0x4dfacf0,2,0x7ffc3fd2dde0,0xd2a03e0,524288) 5.36ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,524288,1,2,0x7ffc3fd2ddd0,0x7f5b0a1f1010,1048576,0x4dfad00,2,0x7ffc3fd2dde0,0xdaa03e0,524288) 1.16ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 6 milliseconds
+ After debug_mkl_contract_sum, duration: 0.00663447380065918
+PROF:: perf data process bucket time: 0.028242111206054688
+==
+
+PROF:: Bucket contains: [E1846(v_1847,v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+PROF:: perf data process bucket time: 0.007658958435058594
+==
+
+PROF:: Bucket contains: [E467(v_1848,v_1852), E1847(v_1848,v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 524288 output: 262144
+ Dimensions: f:2 k:2 n:131072 m:1
+ MKL_VERBOSE ZGEMM(N,T,131072,1,2,0x7ffc3fd2ddd0,0xeaa0400,262144,0x4d322e0,2,0x7ffc3fd2dde0,0xcaa03c0,131072) 123.42us CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ MKL_VERBOSE ZGEMM(N,T,131072,1,2,0x7ffc3fd2ddd0,0xeca0400,262144,0x4d322f0,2,0x7ffc3fd2dde0,0xcca03c0,131072) 2.38ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:56
+ Duration: 2 milliseconds
+ After debug_mkl_contract_sum, duration: 0.002573728561401367
+PROF:: perf data process bucket time: 0.005625009536743164
+==
+
+PROF:: Bucket contains: [E480(v_1849,v_1856), E1848(v_1849,v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 262144 output: 131072
+ After debug_mkl_contract_sum, duration: 0.00018668174743652344
+PROF:: perf data process bucket time: 0.0014035701751708984
+PROF:: Bucket contains: [E1849(v_1850,v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+PROF:: perf data process bucket time: 0.0005922317504882812
+PROF:: Bucket contains: [E497(v_1851,v_1855), E1850(v_1851,v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 65536 output: 32768
+ After debug_mkl_contract_sum, duration: 0.00012922286987304688
+PROF:: perf data process bucket time: 0.0007731914520263672
+PROF:: Bucket contains: [E1851(v_1852,v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+PROF:: perf data process bucket time: 0.0008180141448974609
+PROF:: Bucket contains: [XPhase(v_1853,v_1858), E1852(v_1853,v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 16384 output: 8192
+ After debug_mkl_contract_sum, duration: 0.0001068115234375
+PROF:: perf data process bucket time: 0.0003962516784667969
+PROF:: Bucket contains: [XPhase(v_1854,v_1857), E1853(v_1854,v_1855,v_1856,v_1857,v_1858,v_1859,v_1860,v_1861,v_1862,v_1863,v_1864,v_1865,v_1866)]
+ Starting debug_mkl_contract_sum, input sizes: 4 8192 output: 4096
+ After debug_mkl_contract_sum, duration: 5.7697296142578125e-05
+
+[... And some boring mostly summation operations while we eliminate the final clique ...]
+
+
83%|████████▎ | 5/6 [00:23<00:06, 6.31s/it]PROF:: Bucket contains: [M(v_0,v_277)]
diff --git a/qtensor/DebugFrameworks.py b/qtensor/DebugFrameworks.py
new file mode 100644
index 00000000..c15f2655
--- /dev/null
+++ b/qtensor/DebugFrameworks.py
@@ -0,0 +1,146 @@
+""" Farmeworks that print a lot of info.
+This file is meant to be temporary and will not be updated
+"""
+import sys
+import time
+from functools import reduce
+import numpy as np
+from qtree import np_framework
+from qtree import optimizer as opt
+from qtensor.ProcessingFrameworks import BucketBackend, PerfBackend
+
+class MockModule:
+ def __getattribute__(self, attr):
+ # Fail spectacularly
+ raise ImportError(f'Module tcontract is not imported! Please install it and try again.')
+
+tcontract = MockModule()
+try:
+ import tcontract
+except ImportError:
+ pass
+
+class _CMKLExtendedBackend(BucketBackend):
+ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
+ return np_framework.get_sliced_np_buckets(buckets, data_dict, slice_dict)
+
+ def process_bucket(self, bucket, no_sum=False):
+ result_indices = bucket[0].indices
+ result_data = bucket[0].data
+
+ # -- Contract first n-1 bucketns
+ def merge_with_result(result_data, result_indices, tensor):
+ # ---- Prepare inputs: transpose + reshape
+ ixa, ixb = result_indices, tensor.indices
+ common_ids = sorted(list(set.intersection(set(ixa), set(ixb))), key=int)
+ distinct_a = [x for x in ixa if x not in common_ids]
+ distinct_b = [x for x in ixb if x not in common_ids]
+ transp_a = [ixa.index(x) for x in common_ids+distinct_a]
+ transp_b = [ixb.index(x) for x in common_ids+distinct_b]
+ a = result_data.transpose(transp_a)
+ b = tensor.data.transpose(transp_b)
+ n, m, k = 2**len(common_ids), 2**len(distinct_a), 2**len(distinct_b)
+ a = a.reshape(n, m)
+ b = b.reshape(n, k)
+ # ----
+
+ c = np.empty((n, m, k), dtype=np.complex128)
+ start = time.time()
+ print(f'Starting debug_mkl_contract, input sizes: {a.size} {b.size} output: {c.size}', file=sys.stderr)
+ tcontract.debug_mkl_contract_complex(a, b, c)
+ end = time.time()
+ print(f'After debug_mkl_contract, duration: {end - start}', file=sys.stderr)
+
+ # ---- Post-process output
+ result_indices = tuple(sorted(
+ set(result_indices + tensor.indices),
+ key=int)
+ )
+ ixc = common_ids + distinct_a + distinct_b
+ assert len(result_indices) == len(ixc), 'Wrong transposition, please submit an issue'
+ transp_c = [ixc.index(x) for x in result_indices]
+ result_data = c.reshape(*[2 for _ in result_indices])
+ result_data = result_data.transpose(transp_c)
+ return result_data, result_indices
+ # ----
+
+ for tensor in bucket[1:-1]:
+ result_data, result_indices = merge_with_result(result_data, result_indices, tensor)
+ # --
+
+
+ if len(result_indices) > 0:
+ tag = result_indices[0].identity
+ else:
+ tag = 'f'
+
+ if no_sum:
+ if len(bucket)>1:
+ last_tensor = bucket[-1]
+ result_data, result_indices = merge_with_result(result_data, result_indices, last_tensor)
+
+ result = opt.Tensor(f'E{tag}', result_indices,
+ data=result_data)
+ return result
+
+ if len(bucket)<2:
+ result = opt.Tensor(f'E{tag}', result_indices[1:],
+ data=np.sum(result_data, axis=0))
+ return result
+ last_tensor = bucket[-1]
+
+ # -- Contract with summation
+ ixa, ixb = result_indices, last_tensor.indices
+ # ---- Prepare inputs: transpose + reshape
+ k, fm = result_indices[:1], result_indices[1:]
+ fn = last_tensor.indices[1:]
+
+ f = tuple(sorted(list(set.intersection(set(fm), set(fn))), key=int))
+ # Sets don't store order, so use lists. Do we need order here?
+ m = tuple([x for x in fm if x not in f])
+ n = tuple([x for x in fn if x not in f])
+ transp_a = [ixa.index(x) for x in k+f+m]
+ transp_b = [ixb.index(x) for x in k+f+n]
+ a = result_data.transpose(transp_a)
+ b = last_tensor.data.transpose(transp_b)
+ shapes_a = {i:s for i,s in zip(k+f+m, a.shape)}
+ shapes_b = {i:s for i,s in zip(k+f+n, b.shape)}
+ shapes = {**shapes_b, **shapes_a}
+ K, F, M, N = [reduce(np.multiply, (shapes[i] for i in x), 1) for x in (k, f, m, n)]
+ a = a.reshape(K, F, M)
+ b = b.reshape(K, F, N)
+ # ----
+
+ # \sum_k A_{kfm} * B_{kfn} = C_{fmn}
+ c = np.empty((F, M, N), dtype=np.complex128)
+ start = time.time()
+ print(f'Starting debug_mkl_contract_sum, input sizes: {a.size} {b.size} output: {c.size}', file=sys.stderr)
+ tcontract.debug_mkl_contract_sum(a, b, c)
+ end = time.time()
+ print(f'After debug_mkl_contract_sum, duration: {end - start}', file=sys.stderr)
+
+ # ---- Post-process output
+ result_indices = tuple(sorted(
+ set(result_indices + last_tensor.indices),
+ key=int)
+ )
+ assert result_indices[0] == k[0], 'Broken ordering, please report'
+ result_indices = result_indices[1:]
+ ixc = f + m + n
+ assert len(result_indices) == len(ixc), 'Wrong transposition, please submit an issue'
+ result_data = c.reshape([shapes[i] for i in ixc])
+ transp_c = [ixc.index(x) for x in result_indices]
+ result_data = result_data.transpose(transp_c)
+ # ----
+ # --
+ result = opt.Tensor(f'E{tag}', result_indices, data=result_data)
+ return result
+
+ def get_result_data(self, result):
+ return result.data
+
+class DebugMKLBackend(PerfBackend):
+ Backend = _CMKLExtendedBackend
+ # Just use print by default
+ def __init__(self, *args, print=True, num_lines=20, **kwargs):
+ super().__init__(*args, print=print, num_lines=num_lines, **kwargs)
diff --git a/qtensor/ProcessingFrameworks.py b/qtensor/ProcessingFrameworks.py
index 272d7db4..2e14cdda 100644
--- a/qtensor/ProcessingFrameworks.py
+++ b/qtensor/ProcessingFrameworks.py
@@ -1,8 +1,28 @@
+import sys
+import numpy as np
+from functools import reduce
+import time
+import lazy_import
from qtree import np_framework
+from qtree import optimizer as opt
from pyrofiler import timing
-from qtensor.utils import ReportTable
from tqdm import tqdm
+class MockModule:
+ def __getattribute__(self, attr):
+ # Fail spectacularly
+ raise ImportError(f'Module tcontract is not imported! Please install it and try again.')
+
+tcontract = MockModule()
+try:
+ import tcontract
+except ImportError:
+ pass
+
+
+from qtensor.utils import ReportTable
+from . import exatn_framework
+
class BucketBackend:
def process_bucket(self, bucket, no_sum=False):
raise NotImplementedError
@@ -13,6 +33,7 @@ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
def get_result_data(self, result):
raise NotImplementedError
+
class NumpyBackend(BucketBackend):
def __init__(self):
super().__init__()
@@ -20,15 +41,140 @@ def __init__(self):
#self.status_bar = tqdm(desc='Current status', position=3, bar_format='{desc}')
def process_bucket(self, bucket, no_sum=False):
+ return np_framework.process_bucket_np(bucket, no_sum=no_sum)
+
+ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
+ return np_framework.get_sliced_np_buckets(buckets, data_dict, slice_dict)
+
+ def get_result_data(self, result):
+ return result.data
+
+
+class ExaTnBackend(BucketBackend):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ exatn_framework.import_exatn()
+
+ def process_bucket(self, bucket, no_sum=False):
+ res = exatn_framework.process_bucket_exatn(bucket, no_sum=no_sum)
total_indices = set.union(*[set(t.indices) for t in bucket])
#self.status_bar.set_description_str(f'Current bucker result size: {len(total_indices)}')
res = np_framework.process_bucket_np(bucket, no_sum=no_sum)
#self.pbar.update(1)
return res
+ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
+ return exatn_framework.get_sliced_exatn_buckets(buckets, data_dict, slice_dict)
+
+
+class CMKLExtendedBackend(BucketBackend):
def get_sliced_buckets(self, buckets, data_dict, slice_dict):
return np_framework.get_sliced_np_buckets(buckets, data_dict, slice_dict)
+ def process_bucket(self, bucket, no_sum=False):
+ result_indices = bucket[0].indices
+ result_data = bucket[0].data
+
+ # -- Contract first n-1 bucketns
+ def merge_with_result(result_data, result_indices, tensor):
+ # ---- Prepare inputs: transpose + reshape
+ ixa, ixb = result_indices, tensor.indices
+ common_ids = sorted(list(set.intersection(set(ixa), set(ixb))), key=int)
+ distinct_a = [x for x in ixa if x not in common_ids]
+ distinct_b = [x for x in ixb if x not in common_ids]
+ transp_a = [ixa.index(x) for x in common_ids+distinct_a]
+ transp_b = [ixb.index(x) for x in common_ids+distinct_b]
+ a = result_data.transpose(transp_a)
+ b = tensor.data.transpose(transp_b)
+ n, m, k = 2**len(common_ids), 2**len(distinct_a), 2**len(distinct_b)
+ a = a.reshape(n, m)
+ b = b.reshape(n, k)
+ # ----
+
+ c = np.empty((n, m, k), dtype=np.complex128)
+ tcontract.mkl_contract_complex(a, b, c)
+
+ # ---- Post-process output
+ result_indices = tuple(sorted(
+ set(result_indices + tensor.indices),
+ key=int)
+ )
+ ixc = common_ids + distinct_a + distinct_b
+ assert len(result_indices) == len(ixc), 'Wrong transposition, please submit an issue'
+ transp_c = [ixc.index(x) for x in result_indices]
+ result_data = c.reshape(*[2 for _ in result_indices])
+ result_data = result_data.transpose(transp_c)
+ return result_data, result_indices
+ # ----
+
+ for tensor in bucket[1:-1]:
+ result_data, result_indices = merge_with_result(result_data, result_indices, tensor)
+ # --
+
+
+ if len(result_indices) > 0:
+ tag = result_indices[0].identity
+ else:
+ tag = 'f'
+
+ if no_sum:
+ if len(bucket)>1:
+ last_tensor = bucket[-1]
+ result_data, result_indices = merge_with_result(result_data, result_indices, last_tensor)
+
+ result = opt.Tensor(f'E{tag}', result_indices,
+ data=result_data)
+ return result
+
+ if len(bucket)<2:
+ result = opt.Tensor(f'E{tag}', result_indices[1:],
+ data=np.sum(result_data, axis=0))
+ return result
+ last_tensor = bucket[-1]
+
+ # -- Contract with summation
+ ixa, ixb = result_indices, last_tensor.indices
+ # ---- Prepare inputs: transpose + reshape
+ k, fm = result_indices[:1], result_indices[1:]
+ fn = last_tensor.indices[1:]
+
+ f = tuple(sorted(list(set.intersection(set(fm), set(fn))), key=int))
+ # Sets don't store order, so use lists. Do we need order here?
+ m = tuple([x for x in fm if x not in f])
+ n = tuple([x for x in fn if x not in f])
+ transp_a = [ixa.index(x) for x in k+f+m]
+ transp_b = [ixb.index(x) for x in k+f+n]
+ a = result_data.transpose(transp_a)
+ b = last_tensor.data.transpose(transp_b)
+ shapes_a = {i:s for i,s in zip(k+f+m, a.shape)}
+ shapes_b = {i:s for i,s in zip(k+f+n, b.shape)}
+ shapes = {**shapes_b, **shapes_a}
+ K, F, M, N = [reduce(np.multiply, (shapes[i] for i in x), 1) for x in (k, f, m, n)]
+ a = a.reshape(K, F, M)
+ b = b.reshape(K, F, N)
+ # ----
+
+ # \sum_k A_{kfm} * B_{kfn} = C_{fmn}
+ c = np.empty((F, M, N), dtype=np.complex128)
+ tcontract.mkl_contract_sum(a, b, c)
+
+ # ---- Post-process output
+ result_indices = tuple(sorted(
+ set(result_indices + last_tensor.indices),
+ key=int)
+ )
+ assert result_indices[0] == k[0], 'Broken ordering, please report'
+ result_indices = result_indices[1:]
+ ixc = f + m + n
+ assert len(result_indices) == len(ixc), 'Wrong transposition, please submit an issue'
+ result_data = c.reshape([shapes[i] for i in ixc])
+ transp_c = [ixc.index(x) for x in result_indices]
+ result_data = result_data.transpose(transp_c)
+ # ----
+ # --
+ result = opt.Tensor(f'E{tag}', result_indices, data=result_data)
+ return result
+
def get_result_data(self, result):
return result.data
@@ -47,13 +193,36 @@ def _profile_callback(self, time, label, indices):
print(f"PROF:: perf data {label}: {time}")
self._profile_results[str(indices)] = indices, time
+ @classmethod
+ def from_backend(cls, backend, *args, **kwargs):
+ """ Dynamically create and instantiate a class with a given backend. """
+ class CustomGeneratedBackend(cls):
+ Backend = backend
+ return CustomGeneratedBackend(*args, **kwargs)
- def process_bucket(self, bucket, no_sum=False):
+ def process_bucket_pyrofiler(self, bucket, no_sum=False):
+ """ This method was original, but let's try what is with vanilia time.time()
+ Using pyrofiler allows to easily add more profilers like memory or cpu,
+ but adds a couple of function calls.
+ """
indices = [tensor.indices for tensor in bucket]
with timing('process bucket time', indices
, callback=self._profile_callback):
return self.backend.process_bucket(bucket, no_sum=no_sum)
+ def process_bucket(self, bucket, no_sum=False):
+ indices = [tensor.indices for tensor in bucket]
+ start = time.time()
+ if self._print:
+ print(f"PROF:: Bucket contains: {bucket}", file=sys.stderr)
+ result = self.backend.process_bucket(bucket, no_sum=no_sum)
+ end = time.time()
+ duration = end - start
+ if self._print:
+ print(f"PROF:: perf data process bucket time: {duration}", file=sys.stderr)
+ self._profile_results[str(indices)] = indices, duration
+ return result
+
def get_sliced_buckets(self, buckets, data_dict, slice_dict):
return self.backend.get_sliced_buckets(buckets, data_dict, slice_dict)
@@ -87,7 +256,7 @@ def gen_report(self):
# -- report on totals
for indices, time in data[:max_lines]:
- self.report_table.record(
+ kwargs= dict(
bucket_len = len(indices)
, time = time
, flop = self._perfect_bucket_flop(indices)
@@ -96,6 +265,7 @@ def gen_report(self):
, min_size = min([len(ixs) for ixs in indices])
, result_size = len(set.union(*[set(i) for i in indices])) - 1
)
+ self.report_table.record( **kwargs)
print(self.report_table.markdown())
diff --git a/qtensor/QAOASimulator.py b/qtensor/QAOASimulator.py
index 0a27af82..6a74e6b4 100644
--- a/qtensor/QAOASimulator.py
+++ b/qtensor/QAOASimulator.py
@@ -1,16 +1,18 @@
-from qtensor.Simulate import Simulator, QtreeSimulator, CirqSimulator
-from qtensor.utils import get_edge_subgraph
import numpy as np
-import networkx as nx
from tqdm.auto import tqdm
from multiprocessing import Pool
from loguru import logger as log
+from qtensor.Simulate import Simulator, QtreeSimulator, CirqSimulator
+from qtensor.utils import get_edge_subgraph
+from qtensor.lib import graph_hash
+
class QAOASimulator(Simulator):
def __init__(self, composer, profile=False, *args, **kwargs):
super().__init__(*args, **kwargs)
self.composer = composer
self.profile = profile
+ self._subgraph_energy_cache = {}
def _get_edge_energy(self, G, gamma, beta, edge):
circuit = self._edge_energy_circuit(G, gamma, beta, edge)
@@ -86,6 +88,21 @@ def energy_expectation_parallel(self, G, gamma, beta, n_processes=4):
return C
+class CachedQAOASimulator(QAOASimulator):
+ def __init__(self, composer, profile=False, *args, **kwargs):
+ super().__init__(composer, profile=profile, *args, **kwargs)
+ self._subgraph_energy_cache = {}
+
+ def _get_edge_energy(self, G, gamma, beta, edge):
+ graph = get_edge_subgraph(G, edge, len(gamma))
+ ghash = graph_hash(graph)
+ cached_value = self._subgraph_energy_cache.get(ghash)
+ if cached_value is None:
+ E = super()._get_edge_energy(G, gamma, beta, edge)
+ self._subgraph_energy_cache[ghash] = E
+ return E
+ else:
+ return cached_value
class QAOAQtreeSimulator(QAOASimulator, QtreeSimulator):
pass
diff --git a/qtensor/Simulate.py b/qtensor/Simulate.py
index 83724f3b..2638a55d 100644
--- a/qtensor/Simulate.py
+++ b/qtensor/Simulate.py
@@ -6,10 +6,21 @@
from qtensor.optimisation.Optimizer import DefaultOptimizer
from tqdm.auto import tqdm
+
+
from loguru import logger as log
from qtensor import utils
+def int_slice(value, vars_to_slice):
+ """
+ Creates a slice dict with integers an values.
+ """
+ dimensions = [var.size for var in vars_to_slice]
+ multiindex = qtree.utils.unravel_index(value, dimensions)
+
+ return {idx: val for idx, val in zip(vars_to_slice, multiindex)}
+
class Simulator:
def __init__(self):
pass
diff --git a/qtensor/__init__.py b/qtensor/__init__.py
index 15922cc3..f23c0a79 100644
--- a/qtensor/__init__.py
+++ b/qtensor/__init__.py
@@ -14,6 +14,8 @@
from qtensor.QAOASimulator import QAOACirqSimulator
from qtensor.FeynmanSimulator import FeynmanSimulator
from qtensor.ProcessingFrameworks import PerfNumpyBackend, NumpyBackend
+from qtensor import DebugFrameworks
+from qtensor import lib
class CirqQAOAComposer(QAOAComposer):
def _get_builder_class(self):
diff --git a/qtensor/cli.py b/qtensor/cli.py
index 180dcb70..0e89c92d 100644
--- a/qtensor/cli.py
+++ b/qtensor/cli.py
@@ -10,17 +10,28 @@
import qtensor.optimisation as qop
from qtensor.FeynmanSimulator import FeynmanSimulator
+from qtensor.ProcessingFrameworks import CMKLExtendedBackend, PerfBackend, ExaTnBackend
from qtensor.ProcessingFrameworks import PerfNumpyBackend
from qtensor.toolbox import qaoa_energy_tw_from_graph
from qtensor.optimisation.TensorNet import QtreeTensorNet
from qtensor.optimisation.Optimizer import OrderingOptimizer, TamakiOptimizer, WithoutOptimizer
from qtensor.optimisation.Optimizer import TamakiTrimSlicing, SlicesOptimizer
-from qtensor import QtreeQAOAComposer
+from qtensor import QtreeQAOAComposer, QAOAQtreeSimulator
+import qtensor.ProcessingFrameworks as backends
+import qtensor.optimisation.Optimizer as optimizers
@click.group()
def cli():
pass
+def choose_backend(backend_str):
+ if backend_str=='numpy':
+ return backends.NumpyBackend
+ elif backend_str=='mkl':
+ return backends.CMKLExtendedBackend
+ elif backend_str=='exatn':
+ return backends.ExaTnBackend
+
@cli.command()
@click.argument('filename', nargs=-1)
@click.option('-p','--num-processes', default=1)
@@ -40,18 +51,22 @@ def sim_file(filename, profile=False, num_processes=1, max_tw=25, backend='numpy
,max_tw=max_tw
, pool_type='thread'
)
+ Backend = choose_backend(backend)
if profile:
- if backend == 'numpy':
- backend_obj = PerfNumpyBackend(print=False)
+ class DynamicallyGeneratedBackend(PerfBackend):
+ Backend = Backend
+ backend_obj = DynamicallyGeneratedBackend(print=False)
kwargs['bucket_backend'] = backend_obj
+ else:
+ kwargs['bucket_backend'] = Backend()
+
if optimizer=='tamaki':
- kwargs['optimizer'] = TamakiTrimSlicing(wait_time=23)
+ kwargs['optimizer'] = TamakiTrimSlicing(max_tw=max_tw, wait_time=23)
else:
- kwargs['optimizer'] = SlicesOptimizer()
+ kwargs['optimizer'] = SlicesOptimizer(max_tw=max_tw, tw_bias=0)
kwargs['optimizer'].max_tw = max_tw
-
sim = FeynmanSimulator(**kwargs)
circuit = sum(circuit, [])
result = sim.simulate(circuit, batch_vars=0, tw_bias=0)
@@ -177,7 +192,9 @@ def generate_qaoa_energy_circuit(seed, degree, nodes, p, graph_type, edge_index)
@click.option('-T','--max-time', default=0, help='Max time for every evaluation')
@click.option('--max-tw', default=0, help='Max tw after wich no point to calculate')
@click.option('-O','--ordering-algo', default='greedy', help='Algorithm for elimination order')
-def qaoa_energy_tw(nodes, seed, degree, p, graph_type, max_time, max_tw, ordering_algo):
+@click.option('--tamaki_time', default=20, help='Algorithm for elimination order')
+@click.option('--n_processes', default=1, help='Number of processes.')
+def qaoa_energy_tw(nodes, seed, degree, p, graph_type, max_time, max_tw, ordering_algo, tamaki_time, n_processes):
np.random.seed(seed)
if graph_type=='random_regular':
G = nx.random_regular_graph(degree, nodes)
@@ -186,7 +203,61 @@ def qaoa_energy_tw(nodes, seed, degree, p, graph_type, max_time, max_tw, orderin
else:
raise Exception('Unsupported graph type')
- qaoa_energy_tw_from_graph(G, p, max_time, max_tw, ordering_algo, print_stats=True)
+ qaoa_energy_tw_from_graph(G, p, max_time, max_tw, ordering_algo, print_stats=True, tamaki_time=tamaki_time, n_processes=n_processes)
+
+
+@cli.command()
+@click.option('-s','--seed', default=42)
+@click.option('-d','--degree', default=3)
+@click.option('-n','--nodes', default=10)
+@click.option('-p','--p', default=1)
+@click.option('-G','--graph-type', default='random_regular')
+@click.option('-T','--max-time', default=0, help='Max time for every evaluation')
+@click.option('--max-tw', default=0, help='Max tw after wich no point to calculate')
+@click.option('-O','--ordering-algo', default='greedy', help='Algorithm for elimination order')
+@click.option('--tamaki_time', default=20, help='Algorithm for elimination order')
+@click.option('-B','--backend', default='numpy')
+@click.option('--n_processes', default=1)
+@click.option('-P','--profile', default=False, is_flag=True)
+def qaoa_energy_sim(nodes, seed,
+ degree, p, graph_type,
+ max_time, max_tw, ordering_algo, tamaki_time,
+ backend, n_processes, profile):
+ np.random.seed(seed)
+ if graph_type=='random_regular':
+ G = nx.random_regular_graph(degree, nodes, seed=seed)
+ elif graph_type=='erdos_renyi':
+ G = nx.erdos_renyi_graph(nodes, degree/(nodes-1), seed=seed)
+ else:
+ raise Exception('Unsupported graph type')
+ gamma, beta = [np.pi/3]*p, [np.pi/2]*p
+
+
+ if ordering_algo=='tamaki_slice':
+ optimizer = TamakiTrimSlicing(max_tw=max_tw, wait_time=tamaki_time)
+ elif ordering_algo=='tamaki':
+ optimizer = optimizers.TamakiOptimizer(wait_time=tamaki_time)
+ else:
+ optimizer = optimizers.DefaultOptimizer()
+
+ Backend = choose_backend(backend)
+ backend_obj = Backend()
+ if profile:
+ backend_obj = PerfBackend(print=False)
+ backend_obj.backend = Backend()
+
+ sim = QAOAQtreeSimulator(QtreeQAOAComposer, bucket_backend=backend_obj, optimizer=optimizer)
+ start = time.time()
+ if n_processes==1:
+ result = sim.energy_expectation(G, gamma, beta)
+ if profile:
+ print('Profiling results')
+ backend_obj.gen_report()
+ else:
+ result = sim.energy_expectation_parallel(G, gamma, beta, n_processes=n_processes)
+ end = time.time()
+ print(f"Simutation time: {end - start}")
+ print(result)
cli()
diff --git a/qtensor/exatn_framework.py b/qtensor/exatn_framework.py
new file mode 100644
index 00000000..5d46d3bb
--- /dev/null
+++ b/qtensor/exatn_framework.py
@@ -0,0 +1,156 @@
+"""
+This file implements Numpy framework of the
+simulator. It's main use is in conjunction with the :py:mod:`optimizer`
+module, and example programs are listed in :py:mod:`simulator` module.
+"""
+
+import lazy_import
+import numpy as np
+
+import qtree.utils as utils
+
+exatn = lazy_import.lazy_module('exatn')
+
+from collections import namedtuple
+
+TensorInfo = namedtuple("TensorInfo", "name indices")
+
+def get_sliced_exatn_buckets(buckets, data_dict, slice_dict):
+ """
+ Takes placeholder buckets and populates them with
+ actual sliced values. This function is a sum of
+ :func:`get_np_buckets` and :func:`slice_np_buckets`
+
+ Parameters
+ ----------
+ buckets : list of list
+ buckets as returned by :py:meth:`circ2buckets`
+ and :py:meth:`reorder_buckets`.
+ data_dict : dict
+ dictionary containing values for the placeholder Tensors
+ slice_dict : dict
+ Current subtensor along the sliced variables
+ in the form {variable: slice}
+ Returns
+ -------
+ sliced_buckets : list of lists
+ buckets with sliced Numpy tensors
+ """
+ # import pdb
+ # pdb.set_trace()
+
+ # Create np buckets from buckets
+ sliced_buckets = []
+ for bucket in buckets:
+ sliced_bucket = []
+ for tensor in bucket:
+ # get data
+ # sort tensor dimensions
+ transpose_order = np.argsort(list(map(int, tensor.indices)))
+ data = np.transpose(data_dict[tensor.data_key],
+ transpose_order)
+ # transpose indices
+ indices_sorted = [tensor.indices[pp]
+ for pp in transpose_order]
+
+ # slice data
+ slice_bounds = []
+ for idx in indices_sorted:
+ try:
+ slice_bounds.append(slice_dict[idx])
+ except KeyError:
+ slice_bounds.append(slice(None))
+
+ data = data[tuple(slice_bounds)]
+
+ # update indices
+ indices_sliced = [idx.copy(size=size) for idx, size in
+ zip(indices_sorted, data.shape)]
+ indices_sliced = [i for sl, i in zip(slice_bounds, indices_sliced) if not isinstance(sl, int)]
+ assert len(data.shape) == len(indices_sliced)
+
+ print(f"creating {tensor.name}")
+ exatn.createTensor(tensor.name, data)
+
+ sliced_bucket.append(TensorInfo(tensor.name, indices_sliced))
+ sliced_buckets.append(sliced_bucket)
+
+ return sliced_buckets
+
+def idx_to_string(idx):
+ idx = map(int, idx)
+ letters = list(map(utils.num_to_alpha, idx))
+ return ",".join(letters)
+
+def tensor_to_string(tensor):
+ print(tensor.indices)
+ idx = idx_to_string(tensor.indices)
+ return tensor.name + "(" + idx + ")"
+
+def get_exatn_expr(tensor1, tensor2, result_name, result_idx):
+ # remap indices to reduce their order, as einsum does not like
+ # large numbers
+ all_indices = set.union(set(tensor1.indices), set(tensor2.indices))
+ idx_to_least_idx = {old_idx: new_idx for new_idx, old_idx
+ in enumerate(all_indices)}
+ tensor1 = TensorInfo(name=tensor1.name, indices=[idx_to_least_idx[idx] for idx in tensor1.indices])
+ tensor2 = TensorInfo(name=tensor2.name, indices=[idx_to_least_idx[idx] for idx in tensor2.indices])
+ result_idx = [idx_to_least_idx[idx] for idx in result_idx]
+
+ # T(a,b,c) = A(a,b) * B(b,c)
+ str1 = tensor_to_string(tensor1)
+ str2 = tensor_to_string(tensor2)
+ str3 = f"{result_name}({idx_to_string(result_idx)})"
+
+ return f"{str3} = {str2} * {str1}"
+
+def get_result_indices(idx1, idx2, contract=True):
+ result_indices = tuple(sorted(set(idx1 + idx2), key=int))
+ if contract:
+ result_indices = result_indices[1:]
+ return result_indices
+
+
+def process_bucket_exatn(bucket, no_sum=False, result_id=0):
+ """
+ Process bucket in the bucket elimination algorithm.
+ We multiply all tensors in the bucket and sum over the
+ variable which the bucket corresponds to. This way the
+ variable of the bucket is removed from the expression.
+
+ Parameters
+ ----------
+ bucket : list
+ List containing tuples of tensors (gates) with their indices.
+
+ no_sum : bool
+ If no summation should be done over the buckets's variable
+
+ Returns
+ -------
+ tensor : optimizer.Tensor
+ wrapper tensor object holding the result
+ """
+
+ pr_info = bucket[0]
+ n = len(bucket)
+
+ tmp_id = 0
+
+ for i, t_info in enumerate(bucket[1:]):
+ no_hcon = n == 2 or i == n - 1 # TODO better check if hypercontraction is required
+ result_indices = get_result_indices(pr_info.indices, t_info.indices, contract=no_hcon)
+ if no_hcon:
+ no_sum = True
+ else:
+ # raise Exception('QTensorError: Exatn Hyper-contractions are not supported at the moment')
+ no_sum = False
+
+ new_name = f"C{np.random.randint(0, 1000000000)}"
+ exatn.createTensor(new_name, np.empty([2]*len(result_indices), dtype=complex))
+ expr = get_exatn_expr(pr_info, t_info, new_name, result_indices)
+
+ pr_info = TensorInfo(new_name, result_indices)
+ exatn.contractTensors(expr)
+
+ return pr_info
diff --git a/qtensor/lib/__init__.py b/qtensor/lib/__init__.py
new file mode 100644
index 00000000..391650d5
--- /dev/null
+++ b/qtensor/lib/__init__.py
@@ -0,0 +1,2 @@
+from .graph_hashing import weisfeiler_lehman_graph_hash
+graph_hash = weisfeiler_lehman_graph_hash
diff --git a/qtensor/lib/graph_hashing.py b/qtensor/lib/graph_hashing.py
new file mode 100644
index 00000000..b0d6df1b
--- /dev/null
+++ b/qtensor/lib/graph_hashing.py
@@ -0,0 +1,158 @@
+
+"""
+Functions for hashing graphs to strings.
+Isomorphic graphs should be assigned identical hashes.
+For now, only Weisfeiler-Lehman hashing is implemented.
+"""
+
+"""
+DL:
+ Ripped off from networkx 2.5. There's no reason to ask for a newer version just
+ because of a single small function
+"""
+
+from collections import Counter
+from hashlib import blake2b
+
+__all__ = ["weisfeiler_lehman_graph_hash"]
+
+
+def weisfeiler_lehman_graph_hash(
+ G, edge_attr=None, node_attr=None, iterations=3, digest_size=16
+):
+ """Return Weisfeiler Lehman (WL) graph hash.
+
+ The function iteratively aggregates and hashes neighbourhoods of each node.
+ After each node's neighbors are hashed to obtain updated node labels,
+ a hashed histogram of resulting labels is returned as the final hash.
+
+ Hashes are identical for isomorphic graphs and strong guarantees that
+ non-isomorphic graphs will get different hashes. See [1] for details.
+
+ Note: Similarity between hashes does not imply similarity between graphs.
+
+ If no node or edge attributes are provided, the degree of each node
+ is used as its initial label.
+ Otherwise, node and/or edge labels are used to compute the hash.
+
+ Parameters
+ ----------
+ G: graph
+ The graph to be hashed.
+ Can have node and/or edge attributes. Can also have no attributes.
+ edge_attr: string
+ The key in edge attribute dictionary to be used for hashing.
+ If None, edge labels are ignored.
+ node_attr: string
+ The key in node attribute dictionary to be used for hashing.
+ If None, and no edge_attr given, use
+ degree of node as label.
+ iterations: int
+ Number of neighbor aggregations to perform.
+ Should be larger for larger graphs.
+ digest_size: int
+ Size of blake2b hash digest to use for hashing node labels.
+
+ Returns
+ -------
+ h : string
+ Hexadecimal string corresponding to hash of the input graph.
+
+ Examples
+ --------
+ Two graphs with edge attributes that are isomorphic, except for
+ differences in the edge labels.
+
+ >>> G1 = nx.Graph()
+ >>> G1.add_edges_from(
+ ... [
+ ... (1, 2, {"label": "A"}),
+ ... (2, 3, {"label": "A"}),
+ ... (3, 1, {"label": "A"}),
+ ... (1, 4, {"label": "B"}),
+ ... ]
+ ... )
+ >>> G2 = nx.Graph()
+ >>> G2.add_edges_from(
+ ... [
+ ... (5, 6, {"label": "B"}),
+ ... (6, 7, {"label": "A"}),
+ ... (7, 5, {"label": "A"}),
+ ... (7, 8, {"label": "A"}),
+ ... ]
+ ... )
+
+ Omitting the `edge_attr` option, results in identical hashes.
+
+ >>> weisfeiler_lehman_graph_hash(G1)
+ '0db442538bb6dc81d675bd94e6ebb7ca'
+ >>> weisfeiler_lehman_graph_hash(G2)
+ '0db442538bb6dc81d675bd94e6ebb7ca'
+
+ With edge labels, the graphs are no longer assigned
+ the same hash digest.
+
+ >>> weisfeiler_lehman_graph_hash(G1, edge_attr="label")
+ '408c18537e67d3e56eb7dc92c72cb79e'
+ >>> weisfeiler_lehman_graph_hash(G2, edge_attr="label")
+ 'f9e9cb01c6d2f3b17f83ffeaa24e5986'
+
+ References
+ -------
+ .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,
+ Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman
+ Graph Kernels. Journal of Machine Learning Research. 2011.
+ http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf
+ """
+
+ def neighborhood_aggregate(G, node, node_labels, edge_attr=None):
+ """
+ Compute new labels for given node by aggregating
+ the labels of each node's neighbors.
+ """
+ label_list = [node_labels[node]]
+ for nei in G.neighbors(node):
+ prefix = "" if not edge_attr else G[node][nei][edge_attr]
+ label_list.append(prefix + node_labels[nei])
+ return "".join(sorted(label_list))
+
+ def weisfeiler_lehman_step(G, labels, edge_attr=None, node_attr=None):
+ """
+ Apply neighborhood aggregation to each node
+ in the graph.
+ Computes a dictionary with labels for each node.
+ """
+ new_labels = dict()
+ for node in G.nodes():
+ new_labels[node] = neighborhood_aggregate(
+ G, node, labels, edge_attr=edge_attr
+ )
+ return new_labels
+
+ items = []
+ node_labels = dict()
+ # set initial node labels
+ for node in G.nodes():
+ if (not node_attr) and (not edge_attr):
+ node_labels[node] = str(G.degree(node))
+ elif node_attr:
+ node_labels[node] = str(G.nodes[node][node_attr])
+ else:
+ node_labels[node] = ""
+
+ for k in range(iterations):
+ node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr)
+ counter = Counter()
+ # count node labels
+ for node, d in node_labels.items():
+ h = blake2b(digest_size=digest_size)
+ h.update(d.encode("ascii"))
+ counter.update([h.hexdigest()])
+ # sort the counter, extend total counts
+ items.extend(sorted(counter.items(), key=lambda x: x[0]))
+
+ # hash the final counter
+ h = blake2b(digest_size=digest_size)
+ h.update(str(tuple(items)).encode("ascii"))
+ h = h.hexdigest()
+ return h
diff --git a/qtensor/tests/test_bucket_backends.py b/qtensor/tests/test_bucket_backends.py
index b139c057..f0d5aeb9 100644
--- a/qtensor/tests/test_bucket_backends.py
+++ b/qtensor/tests/test_bucket_backends.py
@@ -1,27 +1,42 @@
+import numpy as np
+import networkx as nx
+import pytest
+
from qtensor import QtreeQAOAComposer
+from qtensor.Simulate import CirqSimulator, QtreeSimulator
+
+from qtensor.ProcessingFrameworks import PerfBackend
from qtensor.ProcessingFrameworks import PerfNumpyBackend
-from qtensor.Simulate import CirqSimulator, QtreeSimulator
-import numpy as np
-import networkx as nx
+from qtensor.ProcessingFrameworks import CMKLExtendedBackend
-def get_test_problem():
+@pytest.fixture(scope="module")
+def test_problem():
w = np.array([[0,1,1,0],[1,0,1,1],[1,1,0,1],[0,1,1,0]])
G = nx.from_numpy_matrix(w)
- G = nx.random_regular_graph(5, 14)
- gamma, beta = [np.pi/3], [np.pi/2]
- return G, gamma, beta
+ G = nx.random_regular_graph(3, 18)
+ gamma, beta = [np.pi/3]*2, [np.pi/2]*2
+ yield G, gamma, beta
+
+@pytest.fixture(scope='module')
+def ground_truth_energy(test_problem):
+ G, gamma, beta = test_problem
+ composer = QtreeQAOAComposer(graph=G, gamma=gamma, beta=beta)
+ composer.ansatz_state()
+
+ sim = QtreeSimulator()
-def test_profiled(capsys):
- G, gamma, beta = get_test_problem()
+ result = sim.simulate(composer.circuit)
+ yield result
- composer = QtreeQAOAComposer(
- graph=G, gamma=[np.pi/3], beta=[np.pi/4])
+
+def test_profiled(capsys, ground_truth_energy, test_problem):
+ G, gamma, beta = test_problem
+ composer = QtreeQAOAComposer(graph=G, gamma=gamma, beta=beta)
composer.ansatz_state()
- print(composer.circuit)
backend = PerfNumpyBackend()
sim = QtreeSimulator(bucket_backend=backend)
@@ -29,6 +44,22 @@ def test_profiled(capsys):
print("Profile results")
print(backend.gen_report())
- qtree_amp = result
+ assert np.allclose(result, ground_truth_energy)
+
+def test_mkl(capsys, test_problem, ground_truth_energy):
+ G, gamma, beta = test_problem
+ composer = QtreeQAOAComposer(graph=G, gamma=gamma, beta=beta)
+ composer.ansatz_state()
+
+ backend = PerfBackend.from_backend(CMKLExtendedBackend, print=False)
+ sim = QtreeSimulator(bucket_backend=backend)
+
+ result = sim.simulate(composer.circuit)
+ print("Profile results")
+ print(backend.gen_report())
+
+ assert np.allclose(result, ground_truth_energy)
+
- assert qtree_amp
+if __name__=='__main__':
+ test_profiled(None)
diff --git a/qtensor/toolbox.py b/qtensor/toolbox.py
index f7015488..342021b8 100644
--- a/qtensor/toolbox.py
+++ b/qtensor/toolbox.py
@@ -1,7 +1,9 @@
import networkx as nx
import numpy as np
+from itertools import repeat
from tqdm.auto import tqdm
import time
+from multiprocessing.dummy import Pool
from qtensor.optimisation.TensorNet import QtreeTensorNet
from qtensor.optimisation.Optimizer import OrderingOptimizer, TamakiOptimizer, WithoutOptimizer
@@ -34,12 +36,14 @@ def random_graph(nodes, type='random', **kwargs):
-def optimize_circuit(circ, algo='greedy'):
+def optimize_circuit(circ, algo='greedy', tamaki_time=15):
+ # Should I somomehow generalize the tamaki-time argument? provide something like
+ # Optimizer-params argument? How would cli parse this?
if algo=='greedy':
opt = OrderingOptimizer()
elif algo=='tamaki':
- opt = TamakiOptimizer(wait_time=45)
+ opt = TamakiOptimizer(wait_time=tamaki_time)
elif algo=='without':
opt = WithoutOptimizer()
else:
@@ -49,8 +53,8 @@ def optimize_circuit(circ, algo='greedy'):
peo, tn = opt.optimize(tn)
return peo, tn, opt
-def get_tw(circ, ordering_algo='greedy'):
- peo, tn, opt = optimize_circuit(circ, algo=ordering_algo)
+def get_tw(circ, ordering_algo='greedy', tamaki_time=15):
+ peo, tn, opt = optimize_circuit(circ, algo=ordering_algo, tamaki_time=tamaki_time)
treewidth = opt.treewidth
return treewidth
@@ -95,25 +99,43 @@ def qaoa_energy_cost_params_stats_from_graph(G, p, max_time=0, max_tw=None,
return tw, mem, flop
+def _twidth_parallel_unit(args):
+ circ_graph, ordering_algo, tamaki_time, max_tw = args
+ circuit, subgraph = circ_graph
+ tw = get_tw(circuit, ordering_algo=ordering_algo, tamaki_time=tamaki_time)
+ if max_tw:
+ if tw>max_tw:
+ print(f'Encountered treewidth of {tw}, which is larger {max_tw}')
+ raise ValueError(f'Encountered treewidth of {tw}, which is larger {max_tw}')
+ return tw
+
def qaoa_energy_tw_from_graph(G, p, max_time=0, max_tw=0,
- ordering_algo='greedy', print_stats=False):
- twidths = []
- with tqdm(total=G.number_of_edges(), desc='Edge iteration') as pbar:
- for circuit, subgraph in qaoa_energy_lightcone_iterator(G, p, max_time=max_time):
- tw = get_tw(circuit, ordering_algo=ordering_algo)
- pbar.update()
- pbar.set_postfix(current_tw=tw, subgraph_nodes=subgraph.number_of_nodes())
- if max_tw:
- if tw>max_tw:
- print(f'Encountered treewidth of {tw}, which is larger {max_tw}')
- break
- twidths.append(tw)
+ ordering_algo='greedy', print_stats=False,
+ tamaki_time=15, n_processes=1):
+
+ lightcone_gen = qaoa_energy_lightcone_iterator(G, p, max_time=max_time)
+ arggen = zip(lightcone_gen, repeat(ordering_algo), repeat(tamaki_time), repeat(max_tw))
+ if n_processes > 1:
+ print('n_processes', n_processes)
+ with Pool(n_processes) as p:
+ twidths = list(tqdm(p.imap(_twidth_parallel_unit, arggen), total=G.number_of_edges()))
+ else:
+ twidths = []
+ with tqdm(total=G.number_of_edges(), desc='Edge iteration') as pbar:
+ for args in arggen:
+ circ_graph, ordering_algo, tamaki_time, max_tw = args
+ circuit, subgraph = circ_graph
+ tw = _twidth_parallel_unit(args)
+ pbar.update()
+ pbar.set_postfix(current_tw=tw, subgraph_nodes=subgraph.number_of_nodes())
+ twidths.append(tw)
if print_stats:
print(f'med={np.median(twidths)} mean={round(np.mean(twidths), 2)} max={np.max(twidths)}')
return twidths
+
def qaoa_energy_cost_params_from_graph(G, p, max_time=0, max_tw=0,
ordering_algo='greedy', print_stats=False):
costs = []
diff --git a/qtensor/utils.py b/qtensor/utils.py
index a54ce915..1d3e1a9b 100644
--- a/qtensor/utils.py
+++ b/qtensor/utils.py
@@ -174,7 +174,7 @@ def record(self, **kwargs):
if set(self.columns) != set(kwargs.keys()):
raise ValueError(f"columns doesn't match: {kwargs.keys()}, expect: {self.columns}")
else:
- self.columns = set(kwargs.keys())
+ self.columns = list(kwargs.keys())
self.records += [[kwargs[key] for key in self.columns]]
def _title_row(self):
diff --git a/run/automake/publish.sh b/run/automake/publish.sh
index 6614f1e7..b2f2bae1 100755
--- a/run/automake/publish.sh
+++ b/run/automake/publish.sh
@@ -3,11 +3,11 @@
echo "## Automake run result" >> results/result.md
echo "### Performance summary:" >> results/result.md
-tail -n 4 time_vs_flops.log >> results/result.md
+tail -n 5 results/time_vs_flops.txt >> results/result.md
echo "\n" >> results/result.md
echo "\n" >> results/result.md
-echo "Backend used: numpy.einsum" >> results/result.md
+echo "Backend used: mkl (full)" >> results/result.md
echo "\n" >> results/result.md
echo "### Performance plot:" >> results/result.md
diff --git a/run/automake/qsub_entry.sh b/run/automake/qsub_entry.sh
index 1a3d368a..423383a8 100755
--- a/run/automake/qsub_entry.sh
+++ b/run/automake/qsub_entry.sh
@@ -10,4 +10,4 @@ lscpu
echo $PYTHONPATH
echo $PATH
echo $SHELL
-qtensor-specs-time-flop-plot time-vs-flops-plot results/time_vs_flops.png > time_vs_flops.log
+qtensor-specs-time-flop-plot time-vs-flops-plot results/time_vs_flops.png --backend=mkl --max-memory=5e10 --min-memory=1e6 --seed=111 --ordering_algo=tamaki_10 > results/time_vs_flops.txt
diff --git a/run/automake/results/result.md b/run/automake/results/result.md
index 6734b893..9eb71a29 100644
--- a/run/automake/results/result.md
+++ b/run/automake/results/result.md
@@ -1,14 +1,10 @@
## Automake run result
### Performance summary:
-===Results===
-Simulator fitted flops: 0.32148 G
-Matmul flops: 416.78 G
-Simulator optimality: 0.0007713454853975739
\n
\n
-Backend used: numpy.einsum
+Backend used: mkl (full)
\n
### Performance plot:
-
+
\n
-Run date: Wed Oct 7 11:39:25 UTC 2020
+Run date: Sun Oct 11 03:45:15 UTC 2020
diff --git a/run/automake/results/time_vs_flops.png b/run/automake/results/time_vs_flops.png
index 6b538b8d..60e38482 100644
Binary files a/run/automake/results/time_vs_flops.png and b/run/automake/results/time_vs_flops.png differ
diff --git a/run/automake/results/time_vs_flops.txt b/run/automake/results/time_vs_flops.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/scratchpad/cpp_connections/vanilia/README.md b/scratchpad/cpp_connections/vanilia/README.md
new file mode 100644
index 00000000..1c7e9761
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/README.md
@@ -0,0 +1,6 @@
+
+## Tutorial
+
+- Python C++ API https://docs.python.org/3/extending/extending.html
+
+- Using C++ API to extend Numpy https://numpy.org/doc/1.19/user/c-info.how-to-extend.html
diff --git a/scratchpad/cpp_connections/vanilia/nparray/README.md b/scratchpad/cpp_connections/vanilia/nparray/README.md
new file mode 100644
index 00000000..2e2f91bb
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/README.md
@@ -0,0 +1,7 @@
+https://stackoverflow.com/a/52958940
+
+## Running stats
+
+```bash
+ seq 10 100 700 | xargs -L1 python contract.py
+```
diff --git a/scratchpad/cpp_connections/vanilia/nparray/cli-command.sh b/scratchpad/cpp_connections/vanilia/nparray/cli-command.sh
new file mode 100644
index 00000000..38580ae3
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/cli-command.sh
@@ -0,0 +1 @@
+seq 23 25 | xargs -L1 python transposes.py | grep duration --line-buffered | cut -d'=' -f2
diff --git a/scratchpad/cpp_connections/vanilia/nparray/contract.py b/scratchpad/cpp_connections/vanilia/nparray/contract.py
new file mode 100644
index 00000000..4eaeb5d7
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/contract.py
@@ -0,0 +1,81 @@
+import tcontract
+import sys
+import torch as t
+import numpy as np
+try:
+ from opt_einsum import contract as opt_einsum
+except ImportError:
+ opt_einsum = None
+
+import time
+from pyrofiler import Profiler
+
+def random_complex(*shape):
+ return np.random.randn(*shape) + 1j*np.random.randn(*shape)
+
+def stats_callback(elapsed_time, description, cop, flop):
+ print(f'{description}: Elapsed time={round(elapsed_time,3)} COPS={cop/elapsed_time:e} FLOPS={flop/elapsed_time:e}')
+
+def contract():
+ try:
+ N = int(sys.argv[1])
+ except LookupError:
+ N = 0
+ prof = Profiler(callback=stats_callback)
+
+ n, m, k = 2+N, 3+N, 4+N
+ A, B = np.random.randn(n, m), np.random.randn(n, k)
+
+ C = np.empty((n, m, k))
+ cop = C.size
+ flop = 6*C.size
+ size = sys.getsizeof(C)
+ print('Result size = {C_size:e} bytes'.format(C_size=size))
+
+ with prof.timing('Einsum', cop=cop, flop=flop):
+ C_einsum =np.einsum('ij,ik -> ijk', A, B)
+
+
+ with prof.timing('MKL', cop=cop, flop=flop):
+ tcontract.mkl_contract(A, B, C)
+
+ if opt_einsum:
+ with prof.timing('Opt Einsum', cop=cop, flop=flop):
+ _ = opt_einsum('ij,ik -> ijk', t.Tensor(A), t.Tensor(B), backend='torch')
+
+ assert np.array_equal(C_einsum, C)
+
+
+def contract_sum():
+ try:
+ N = int(sys.argv[1])
+ except LookupError:
+ N = 100
+ prof = Profiler(callback=stats_callback)
+
+ n, m, k, f = N, 1+N, 2+N, 3+N
+ #k = 2
+ print('Summation size:', k)
+ A, B = random_complex(k, f, m), random_complex(k, f, n)
+
+ C = np.empty((f, m, n), dtype=np.complex128)
+ flop = 6*C.size * (2*k - 1)
+ cop = C.size * (k - 1)
+ size = sys.getsizeof(C)
+ print('Result size = {C_size:e} bytes'.format(C_size=size))
+
+ with prof.timing('Einsum', cop=cop, flop=flop):
+ C_einsum =np.einsum('kfm,kfn -> fmn', A, B)
+
+ with prof.timing('MKL contract_summ', cop=cop, flop=flop):
+ tcontract.mkl_contract_sum(A, B, C)
+
+ assert np.allclose(C_einsum, C)
+
+if __name__=="__main__":
+ print('**With summ**')
+ contract_sum()
+ print()
+ print('**Just multiply**')
+ contract()
+ print()
diff --git a/scratchpad/cpp_connections/vanilia/nparray/jlse_setup.py b/scratchpad/cpp_connections/vanilia/nparray/jlse_setup.py
new file mode 100644
index 00000000..197527ac
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/jlse_setup.py
@@ -0,0 +1,18 @@
+from setuptools import setup, Extension # use setuptools instead of distutils from tutorial
+import numpy as np
+
+extra_link_args = ['-I', '/soft/compilers/intel-2019/compilers_and_libraries/linux/mkl/include', '-l', 'mkl_intel_lp64', '-l', 'mkl_intel_thread', '-l', 'mkl_core', '-l', 'iomp5', '-l', 'pthread', '-l', 'm', '-l', 'dl', '-L', '/soft/compilers/intel-2019/compilers_and_libraries/linux/mkl/lib/intel64', '-L', '/soft/compilers/intel-2019/compilers_and_libraries/linux/mkl/../compiler/lib/intel64']
+
+module = Extension('tcontract'
+ , sources=['tcontract.cpp']
+ , include_dirs=[np.get_include(), '/soft/compilers/intel-2019/compilers_and_libraries/linux/mkl/include']
+ , extra_link_args=extra_link_args
+ , extra_compile_args=extra_link_args
+ )
+
+setup(
+ name='tcontract',
+ version='0.0.0',
+ description='Contract two tensors',
+ ext_modules=[module]
+)
diff --git a/scratchpad/cpp_connections/vanilia/nparray/setup.py b/scratchpad/cpp_connections/vanilia/nparray/setup.py
new file mode 100644
index 00000000..4626f9cf
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/setup.py
@@ -0,0 +1,48 @@
+from setuptools import setup, Extension # use setuptools instead of distutils from tutorial
+import numpy as np
+import os
+
+"""
+Use this before:
+
+export LD_PRELOAD=$MKLROOT/lib/intel64/libmkl_def.so:$MKLROOT/lib/intel64/libmkl_avx2.so:$MKLROOT/lib/intel64/libmkl_core.so:$MKLROOT/lib/intel64/libmkl_intel_lp64.so:$MKLROOT/lib/intel64/libmkl_intel_thread.so
+"""
+
+# :/usr/lib/libomp.so
+
+mklroot = os.environ['MKLROOT']
+mklinclude = mklroot + '/include'
+mkllib = mklroot + '/lib/intel64'
+
+extra_link_args = ['-I', mklinclude
+ , '-L', mkllib
+ , '-Wl,--no-as-needed'
+ , '-lmkl_intel_lp64'
+ , '-lmkl_gnu_thread'
+ , '-lmkl_core'
+ , '-lpthread'
+ , '-lgomp'
+ , '-lm'
+ , '-ldl'
+ ]
+
+extra_compile_args = ['-I', mklinclude
+ ,'-std=c++11'
+ ,'-m64'
+ ,'-fopenmp'
+ ]
+
+module = Extension('tcontract'
+ , sources=['tcontract.cpp']
+ , include_dirs=[np.get_include()]
+ , extra_compile_args=extra_compile_args
+ , extra_link_args=extra_link_args
+ )
+
+setup(
+ name='tcontract',
+ version='0.0.0',
+ description='Contract two tensors',
+ ext_modules=[module]
+)
+
diff --git a/scratchpad/cpp_connections/vanilia/nparray/tcontract.cpp b/scratchpad/cpp_connections/vanilia/nparray/tcontract.cpp
new file mode 100644
index 00000000..810bb213
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/tcontract.cpp
@@ -0,0 +1,691 @@
+#include "Python.h"
+#include
+#include "numpy/arrayobject.h"
+#include
+#include
+#include
+
+#include "mkl.h"
+
+using namespace std::chrono;
+using namespace std;
+
+
+// Helper function to parse numpy arguments complex-valued matrices A, B and C
+//
+int python_abc_complex_args(PyObject *dummy, PyObject *args, PyObject **Obj, std::complex **Data) {
+ PyObject *argA, *argB, *argC;
+ int fail = 0;
+
+ if (!PyArg_ParseTuple(args, "OOO!", &argA, &argB,
+ &PyArray_Type, &argC)) return 1;
+
+ Obj[0]= PyArray_FROM_OTF(argA, NPY_COMPLEX128, NPY_ARRAY_IN_ARRAY);
+ if (Obj[0] == NULL) fail = 1;
+ Obj[1] = PyArray_FROM_OTF(argB, NPY_COMPLEX128, NPY_ARRAY_IN_ARRAY);
+ if (Obj[1] == NULL) fail = 1;
+#if NPY_API_VERSION >= 0x0000000c
+ Obj[2] = PyArray_FROM_OTF(argC, NPY_COMPLEX128, NPY_ARRAY_INOUT_ARRAY2);
+#else
+ Obj[2] = PyArray_FROM_OTF(argC, NPY_COMPLEX128, NPY_ARRAY_INOUT_ARRAY);
+#endif
+ if (Obj[2] == NULL) fail = 1;
+
+ if (fail != 0) {
+ for (int i=0; i<3; i++) {
+ Py_XDECREF(Obj[i]);
+ }
+ return fail;
+ }else{
+
+ for (int i=0; i<3; i++) {
+ Data[i] = (std::complex *)PyArray_DATA( Obj[i] );
+ }
+ return 0;
+ }
+}
+//
+
+// DEBUG
+static PyObject *
+debug_mkl_contract_sum(PyObject *dummy, PyObject *args)
+{
+ std::complex alpha(1, 0);
+ std::complex beta(0, 0);
+ // -- Parse Python arguments
+ PyObject *Obj[3];
+ std::complex *Data[3];
+ int parse_fail;
+ parse_fail = python_abc_complex_args(dummy, args, Obj, Data);
+
+ if (parse_fail != 0) {
+ std::cerr << "Failed to parse arguments" << std::endl;
+ return NULL;
+ }
+ // --
+ PyObject *A, *B, *C;
+ A = Obj[0]; B = Obj[1]; C = Obj[2];
+
+ std::complex *Aptr, *Bptr, *Cptr;
+ Aptr = Data[0]; Bptr = Data[1]; Cptr = Data[2];
+ npy_intp *dimC = PyArray_DIMS(C);
+ npy_intp *dimA = PyArray_DIMS(A);
+
+ int m = dimC[1]; // Row length of A, third index
+ int n = dimC[2]; // Row length of B, third index
+ int k = dimA[0]; // Summation length, first index of A and B
+ int f = dimA[1]; // Multiplication-only index, second index of A and B
+
+ std::cerr << "Dimensions: f:" << f << " k:" << k << " n:" << n << " m:" << m << std::endl;
+ auto start = high_resolution_clock::now();
+ /*
+ * Performs opearation
+ * \sum_k A_{kfm} * B_{kfn} = C_{fmn}
+ */
+
+ for (int i=0; i(stop - start).count();
+ std::cerr << "Duration: " << millis << " milliseconds" << std::endl;
+
+ /*
+ * Works as well:
+ cblas_zgemm(CblasColMajor,
+ CblasNoTrans,
+ CblasTrans,
+ n, m, k, &alpha,
+ Bptr + i*n, f*n,
+ Aptr + i*m, f*m,
+ &beta,
+ Cptr + i*n*m, n);
+ */
+ // -- Clean up python pointers
+ Py_DECREF(A);
+ Py_DECREF(B);
+ Py_DECREF(C);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+
+static PyObject *
+mkl_contract_sum(PyObject *dummy, PyObject *args)
+{
+ std::complex alpha(1, 0);
+ std::complex beta(0, 0);
+
+ // -- Parse Python arguments
+ PyObject *Obj[3];
+ std::complex *Data[3];
+ int parse_fail;
+ parse_fail = python_abc_complex_args(dummy, args, Obj, Data);
+
+ if (parse_fail != 0) {
+ std::cerr << "Failed to parse arguments" << std::endl;
+ return NULL;
+ }
+ // --
+ PyObject *A, *B, *C;
+ A = Obj[0]; B = Obj[1]; C = Obj[2];
+ std::complex *Aptr, *Bptr, *Cptr;
+ Aptr = Data[0]; Bptr = Data[1]; Cptr = Data[2];
+ npy_intp *dimC = PyArray_DIMS(C);
+ npy_intp *dimA = PyArray_DIMS(A);
+
+ int m = dimC[1]; // Row length of A, third index
+ int n = dimC[2]; // Row length of B, third index
+ int k = dimA[0]; // Summation length, first index of A and B
+ int f = dimA[1]; // Multiplication-only index, second index of A and B
+
+
+ /*
+ * Performs opearation
+ * \sum_k A_{kfm} * B_{kfn} = C_{fmn}
+ */
+
+ for (int i=0; i alpha(1, 0);
+ std::complex beta(0, 0);
+
+ // -- Parse Python arguments
+ PyObject *Obj[3];
+ std::complex *Data[3];
+ int parse_fail;
+ parse_fail = python_abc_complex_args(dummy, args, Obj, Data);
+
+ if (parse_fail != 0) {
+ std::cerr << "Failed to parse arguments" << std::endl;
+ return NULL;
+ }
+ // --
+
+ PyObject *A, *B, *C;
+ A = Obj[0]; B = Obj[1]; C = Obj[2];
+
+ std::complex *Aptr, *Bptr, *Cptr;
+ Aptr = Data[0]; Bptr = Data[1]; Cptr = Data[2];
+
+ //auto now = high_resolution_clock::now();
+ //auto millis = duration_cast(now - epoch).count();
+ //std::cout << "after convert. duration (μs) = " << millis << std::endl;
+
+ npy_intp *dimC = PyArray_DIMS(C);
+
+ std::cerr << "Dimensions: C[0]:" << dimC[0] << " C[1]:" << dimC[1] << " C[2]:" << dimC[2] << std::endl;
+ auto start = high_resolution_clock::now();
+
+ for (int i=0; i(stop - start).count();
+ std::cerr << "Duration: " << millis << " milliseconds" << std::endl;
+
+ Py_DECREF(A);
+ Py_DECREF(B);
+ Py_DECREF(C);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+
+static PyObject *
+mkl_contract_complex(PyObject *dummy, PyObject *args)
+{
+ std::complex alpha(1, 0);
+ std::complex beta(0, 0);
+
+ // -- Parse Python arguments
+ PyObject *Obj[3];
+ std::complex *Data[3];
+ int parse_fail;
+ parse_fail = python_abc_complex_args(dummy, args, Obj, Data);
+
+ if (parse_fail != 0) {
+ std::cerr << "Failed to parse arguments" << std::endl;
+ return NULL;
+ }
+ // --
+
+ PyObject *A, *B, *C;
+ A = Obj[0]; B = Obj[1]; C = Obj[2];
+
+ std::complex *Aptr, *Bptr, *Cptr;
+ Aptr = Data[0]; Bptr = Data[1]; Cptr = Data[2];
+
+ //auto now = high_resolution_clock::now();
+ //auto millis = duration_cast(now - epoch).count();
+ //std::cout << "after convert. duration (μs) = " << millis << std::endl;
+
+ npy_intp *dimC = PyArray_DIMS(C);
+
+ for (int i=0; i *Aptr, *Bptr, *Cptr;
+ std::complex alpha(1, 0);
+ std::complex beta(0, 0);
+
+ int nd;
+ npy_intp * dimC;
+ npy_intp * dimA;
+
+ if (!PyArg_ParseTuple(args, "OOO!", &argA, &argB,
+ &PyArray_Type, &argC)) return NULL;
+
+ A = PyArray_FROM_OTF(argA, NPY_COMPLEX128, NPY_ARRAY_IN_ARRAY);
+ if (A == NULL) return NULL;
+ B = PyArray_FROM_OTF(argB, NPY_COMPLEX128, NPY_ARRAY_IN_ARRAY);
+ if (B == NULL) goto fail;
+#if NPY_API_VERSION >= 0x0000000c
+ C = PyArray_FROM_OTF(argC, NPY_COMPLEX128, NPY_ARRAY_INOUT_ARRAY2);
+#else
+ C = PyArray_FROM_OTF(argC, NPY_COMPLEX128, NPY_ARRAY_INOUT_ARRAY);
+#endif
+ if (C == NULL) goto fail;
+
+
+
+ //auto now = high_resolution_clock::now();
+ //auto millis = duration_cast(now - epoch).count();
+
+ nd = PyArray_NDIM(C);
+ if (nd!=2) goto fail;
+ dimC = PyArray_DIMS(C);
+ dimA = PyArray_DIMS(A);
+ Aptr = (std::complex *)PyArray_DATA(A);
+ Bptr = (std::complex *)PyArray_DATA(B);
+ Cptr = (std::complex *)PyArray_DATA(C);
+ std::cout << "A[0][1]" << Aptr[1] << std::endl;
+ std::cout << "dimC" << dimC[0] << "," <= 0x0000000c
+ C = PyArray_FROM_OTF(argC, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY2);
+#else
+ C = PyArray_FROM_OTF(argC, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+#endif
+ if (C == NULL) goto fail;
+
+
+
+ //auto now = high_resolution_clock::now();
+ //auto millis = duration_cast(now - epoch).count();
+ //std::cout << "after convert. duration (μs) = " << millis << std::endl;
+
+ nd = PyArray_NDIM(C);
+ if (nd!=3) goto fail;
+ dimC = PyArray_DIMS(C);
+ Aptr = (double *)PyArray_DATA(A);
+ Bptr = (double *)PyArray_DATA(B);
+ Cptr = (double *)PyArray_DATA(C);
+
+ for (int i=0; i) -- number of dimensions
+ dims = PyArray_DIMS(<..>) -- npy_intp array of length nd
+ showing length in each dim.
+ dptr = (double *)PyArray_DATA(<..>) -- pointer to data.
+
+ If an error occurs goto fail.
+ */
+
+ Py_DECREF(A);
+ Py_DECREF(B);
+ Py_DECREF(C);
+ Py_INCREF(Py_None);
+ return Py_None;
+
+ fail:
+ Py_XDECREF(A);
+ Py_XDECREF(B);
+ Py_XDECREF(C);
+ return NULL;
+
+}
+
+static PyObject *
+triple_loop_contract(PyObject *dummy, PyObject *args)
+{
+ PyObject *argA=NULL, *argB, *argC;
+ PyObject *A=NULL, *B, *C;
+ double *Aptr, *Bptr, *Cptr;
+
+ auto epoch = high_resolution_clock::now();
+ int nd;
+ npy_intp * dimC;
+
+ if (!PyArg_ParseTuple(args, "OOO!", &argA, &argB,
+ &PyArray_Type, &argC)) return NULL;
+
+ A = PyArray_FROM_OTF(argA, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+ if (A == NULL) return NULL;
+ B = PyArray_FROM_OTF(argB, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+ if (B == NULL) goto fail;
+#if NPY_API_VERSION >= 0x0000000c
+ C = PyArray_FROM_OTF(argC, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY2);
+#else
+ C = PyArray_FROM_OTF(argC, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+#endif
+ if (C == NULL) goto fail;
+
+
+
+ //auto now = high_resolution_clock::now();
+ //auto millis = duration_cast(now - epoch).count();
+ //std::cout << "after convert. duration (μs) = " << millis << std::endl;
+
+ nd = PyArray_NDIM(C);
+ if (nd!=3) goto fail;
+ dimC = PyArray_DIMS(C);
+ Aptr = (double *)PyArray_DATA(A);
+ Bptr = (double *)PyArray_DATA(B);
+ Cptr = (double *)PyArray_DATA(C);
+
+ for (int i=0; i(now - epoch).count();
+ std::cout << "after convert. duration (μs) = " << millis << std::endl;
+
+
+ dptr = (double *)PyArray_DATA(arr);
+ std::cout << "arr[0] = " << *dptr << std::endl;
+ std::cout << "arr[1] = " << *(dptr+1) << std::endl;
+ std::cout << "arr[2] = " << *(dptr+2) << std::endl;
+ std::cout << "arr[3] = " << *(dptr+3) << std::endl;
+
+
+ Py_DECREF(arr);
+ Py_INCREF(Py_None);
+ return Py_None;
+
+}
+
+// -- Examples
+
+static PyObject * integrate3(PyObject * module, PyObject * args)
+{
+ PyObject * argy=NULL; // Regular Python/C API
+ PyArrayObject * yarr=NULL; // Extended Numpy/C API
+ double dx,dy,dz;
+
+ std::cout << "in func" < read argument as a PyObject type into argy (Python/C API)
+ if (!PyArg_ParseTuple(args, "Oddd", &argy,&dx,&dy,&dz))
+ {
+ PyErr_SetString(PyExc_ValueError, "Error parsing arguments.");
+ return NULL;
+ }
+
+ std::cout << "parsed" << std::endl;
+ // Determine if it's a complex number array (Numpy/C API)
+ int DTYPE = PyArray_ObjectType(argy, NPY_FLOAT);
+ int iscomplex = PyTypeNum_ISCOMPLEX(DTYPE);
+ std::cout << "Is complex" << iscomplex << std::endl;
+
+ // parse python object into numpy array (Numpy/C API)
+ yarr = (PyArrayObject *)PyArray_FROM_OTF(argy, DTYPE, NPY_ARRAY_IN_ARRAY);
+ if (yarr==NULL) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+
+ //just assume this for 3 dimensional array...you can generalize to N dims
+ if (PyArray_NDIM(yarr) != 3) {
+ Py_CLEAR(yarr);
+ PyErr_SetString(PyExc_ValueError, "Expected 3 dimensional integrand");
+ return NULL;
+ }
+
+ npy_intp * dims = PyArray_DIMS(yarr);
+ npy_intp i,j,k,m;
+ double * p;
+
+ //initialize variable to hold result
+ Py_complex result = {.real = 0, .imag = 0};
+ std::cout << "Is complex" << iscomplex << std::endl;
+
+ if (iscomplex) {
+ for (i=0;i= 0x0000000c
+ oarr = PyArray_FROM_OTF(out, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY2);
+#else
+ oarr = PyArray_FROM_OTF(out, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+#endif
+ if (oarr == NULL) goto fail;
+
+
+ std::cout << "arr1" << std::endl;
+ dptr = (double *)PyArray_DATA(arr1);
+ std::cout << "arrval = " << *dptr << std::endl;
+ std::cout << "arrval = " << *(dptr+1) << std::endl;
+ std::cout << "arrval = " << *(dptr+2) << std::endl;
+ std::cout << "arrval = " << *dptr+3 << std::endl;
+ std::cout << "arrval = " << *dptr+4 << std::endl;
+
+
+ /* code that makes use of arguments */
+ /* You will probably need at least
+ nd = PyArray_NDIM(<..>) -- number of dimensions
+ dims = PyArray_DIMS(<..>) -- npy_intp array of length nd
+ showing length in each dim.
+ dptr = (double *)PyArray_DATA(<..>) -- pointer to data.
+
+ If an error occurs goto fail.
+ */
+
+ Py_DECREF(arr1);
+ Py_DECREF(arr2);
+ Py_DECREF(oarr);
+ Py_INCREF(Py_None);
+ return Py_None;
+
+ fail:
+ Py_XDECREF(arr1);
+ Py_XDECREF(arr2);
+ Py_XDECREF(oarr);
+ return NULL;
+}
+
+// --
+
+static PyMethodDef tcontract_Methods[] = {
+ {"integrate3", integrate3, METH_VARARGS,
+ "Pass 3D numpy array (double or complex) and dx,dy,dz step size. Returns Reimman integral"},
+ {"example", example_wrapper, METH_VARARGS,
+ "Example from https://numpy.org/doc/stable/user/c-info.how-to-extend.html"},
+ {"print_4", print_4, METH_VARARGS,
+ "Prints first 4 values of numpy array"},
+ {"mkl_dotmul", mkl_dotmul, METH_VARARGS,
+ "Matrix multiplication"},
+
+ {"triple_loop_contract", triple_loop_contract, METH_VARARGS,
+ "Contracts two arrays with first common index"},
+ {"mkl_contract", mkl_contract, METH_VARARGS,
+ "Contracts two arrays with first common index using MKL"},
+ {"mkl_contract_complex", mkl_contract_complex, METH_VARARGS,
+ "Contracts two arrays with first common index using MKL"},
+ {"mkl_contract_sum", mkl_contract_sum, METH_VARARGS,
+ "Performs opearation:\
+ \\sum_k A_{kfm} * B_{kfn} = C_{fmn}"},
+
+ {"debug_mkl_contract_sum", debug_mkl_contract_sum, METH_VARARGS,
+ "DEBUG Performs opearation:\
+ \\sum_k A_{kfm} * B_{kfn} = C_{fmn}"},
+ {"debug_mkl_contract_complex", debug_mkl_contract_complex, METH_VARARGS,
+ "DEBUG Contracts two arrays with first common index using MKL"},
+ {NULL, NULL, 0, NULL} /* Sentinel */
+};
+
+
+static struct PyModuleDef module = {
+ PyModuleDef_HEAD_INIT,
+ "tcontract", /* name of module */
+ NULL, /* module documentation, may be NULL */
+ -1, /* size of per-interpreter state of the module,
+ or -1 if the module keeps state in global variables. */
+ tcontract_Methods
+};
+
+PyMODINIT_FUNC
+PyInit_tcontract(void)
+{
+ // Called on import
+ // Returns a pointer to module, which is insected into `sys.modules`
+ import_array(); // needed for numpy to work
+ return PyModule_Create(&module);
+}
diff --git a/scratchpad/cpp_connections/vanilia/nparray/test.py b/scratchpad/cpp_connections/vanilia/nparray/test.py
new file mode 100644
index 00000000..d4b1d67a
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/test.py
@@ -0,0 +1,40 @@
+import tcontract
+import sys
+import numpy as np
+
+def test(func):
+ def wraped():
+ print('Testing', func.__name__)
+ func()
+ return wraped
+
+@test
+def test_transpose():
+ arr = np.array([[0,1],[2,3]])
+ arr = np.array(arr, dtype=np.double)
+ print('in python:\n', arr)
+ _ = tcontract.print_4(arr)
+
+ arr = arr.T
+ print('in python:\n', arr)
+ _ = tcontract.print_4(arr)
+
+test_transpose()
+
+@test
+def test_transpose_large():
+ N = 25
+ arr = np.random.randn(*[2]*N)
+ tcontract.print_4(arr)
+ print('transposed')
+ arr = arr.transpose(*reversed(range(N)))
+ tcontract.print_4(arr)
+
+test_transpose_large()
+
+arr = np.random.randn(4,8,16) + 1j*np.random.randn(4,8,16)
+print(np.sum(arr))
+
+# arbitrary step size dx = 1., y=0.5, dz = 0.25
+ans = tcontract.integrate3(arr, 1.0, 1.0, 1.0)
+print(ans)
diff --git a/scratchpad/cpp_connections/vanilia/nparray/transposes.py b/scratchpad/cpp_connections/vanilia/nparray/transposes.py
new file mode 100644
index 00000000..3974d544
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/nparray/transposes.py
@@ -0,0 +1,29 @@
+import tcontract
+import sys
+import numpy as np
+
+def large_transpose():
+ try:
+ N = int(sys.argv[1])
+ except LookupError:
+ N = 24
+ print('Numel = ', 2**N)
+ arr = np.random.randn(*[2]*N)
+ size = sys.getsizeof(arr)
+ print('Array size = {C_size:e} bytes'.format(C_size=size))
+ tcontract.print_4(arr)
+ print('\n== transposed: reverse (worst case) ==')
+ arr = arr.transpose(*reversed(range(N)))
+ tcontract.print_4(arr)
+
+ arr = np.random.randn(*[2]*N)
+ print('\n== transposed: start (good cache efficiency) ==')
+ arr = arr.swapaxes(1,0)
+ tcontract.print_4(arr)
+
+ arr = np.random.randn(*[2]*N)
+ print('\n== transposed: end (low cache efficiency) ==')
+ arr = arr.swapaxes(N-1,N-2)
+ tcontract.print_4(arr)
+
+large_transpose()
diff --git a/scratchpad/cpp_connections/vanilia/tutorial/setup.py b/scratchpad/cpp_connections/vanilia/tutorial/setup.py
new file mode 100644
index 00000000..84452d31
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/tutorial/setup.py
@@ -0,0 +1,10 @@
+from setuptools import setup, Extension # use setuptools instead of distutils from tutorial
+
+module = Extension('spam', sources=['spammodule.c'])
+
+setup(
+ name='spam',
+ version='0.0.0',
+ description='Binding python to cpp',
+ ext_modules=[module]
+)
diff --git a/scratchpad/cpp_connections/vanilia/tutorial/spammodule.c b/scratchpad/cpp_connections/vanilia/tutorial/spammodule.c
new file mode 100644
index 00000000..01ea3379
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/tutorial/spammodule.c
@@ -0,0 +1,48 @@
+//-- Pull the Python API
+#define PY_SSIZE_T_CLEAN // What this does? https://docs.python.org/3/extending/extending.html#parsetuple
+#include
+//--
+
+// The self argument points to the module object for module-level functions
+static PyObject *
+spam_system(PyObject *self, PyObject *args) {
+ const char * command;
+ int sts;
+
+ // int PyArg_ParseTuple(PyObject *args, const char *format, ...)
+ if (!PyArg_ParseTuple(args, "s", &command))
+ return NULL;
+ // Python.h above includes stdlib.h and other libraries
+ sts = system(command);
+ // Will return an integer object. (Yes, even integers are objects on the heap in Python!)
+ return PyLong_FromLong(sts);
+}
+
+// "Method table"
+static PyMethodDef SpamMethods[] = {
+ {"system", spam_system, METH_VARARGS, "Execute a shell command."},
+ {NULL, NULL, 0, NULL} /* Sentinel */
+};
+
+// Module definition struct
+static struct PyModuleDef spammodule = {
+ PyModuleDef_HEAD_INIT,
+ "spam", /* name of module */
+ NULL, /* module documentation, may be NULL */
+ -1, /* size of per-interpreter state of the module,
+ or -1 if the module keeps state in global variables. */
+ SpamMethods
+};
+
+/*
+ PyMODINIT_FUNC declares the function as PyObject * return type,
+ declares any special linkage declarations required by the platform, and for
+ C++ declares the function as extern "C". PyMODINIT_FUNC
+*/
+PyMODINIT_FUNC
+PyInit_spam(void)
+{
+ // Called on import
+ // Returns a pointer to module, which is insected into `sys.modules`
+ return PyModule_Create(&spammodule);
+}
diff --git a/scratchpad/cpp_connections/vanilia/tutorial/test_spam.py b/scratchpad/cpp_connections/vanilia/tutorial/test_spam.py
new file mode 100644
index 00000000..1dc80a17
--- /dev/null
+++ b/scratchpad/cpp_connections/vanilia/tutorial/test_spam.py
@@ -0,0 +1,6 @@
+import spam
+"""
+-a all except session-leaders and non-terminal
+-l long format
+"""
+spam.system('ps -al')
diff --git a/setup.py b/setup.py
index c410413b..feacc9f7 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,7 @@
,'loguru'
,'tqdm'
,'click'
+ ,'lazy-import'
,'qtensor-qtree'
]