From 02ede3fe985431a3f7077dc006a256a95a413bb0 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Tue, 11 Jun 2019 12:03:07 +0100 Subject: [PATCH 01/20] init autodiff benchmarking --- docs/benchmark/completion.py | 13 ++ docs/benchmark/run_completion.py | 7 + docs/benchmark/utils.py | 211 +++++++++++++++++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 docs/benchmark/completion.py create mode 100644 docs/benchmark/run_completion.py create mode 100644 docs/benchmark/utils.py diff --git a/docs/benchmark/completion.py b/docs/benchmark/completion.py new file mode 100644 index 00000000..27f2c4f8 --- /dev/null +++ b/docs/benchmark/completion.py @@ -0,0 +1,13 @@ +import argparse +import utils + + +parser = argparse.ArgumentParser() +parser.add_argument('--n', type=int) +parser.add_argument('--d', type=int) +parser.add_argument('--tt_rank', type=int) +args = parser.parse_args() + +case = utils.Completion(args.n, args.d, args.tt_rank) + +all_logs.append(utils.benchmark(case)) diff --git a/docs/benchmark/run_completion.py b/docs/benchmark/run_completion.py new file mode 100644 index 00000000..16650b29 --- /dev/null +++ b/docs/benchmark/run_completion.py @@ -0,0 +1,7 @@ +import subprocess + + +for n in [20, 100, 500]: + for d in [10, 20, 30]: + for r in [5, 10, 20]: + print(subprocess.check_output(['python3', 'completion.py', '--n=%d' % n, '--d=%d' % d, '--r=%d' % r])) \ No newline at end of file diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py new file mode 100644 index 00000000..1139fa3f --- /dev/null +++ b/docs/benchmark/utils.py @@ -0,0 +1,211 @@ +import numpy as np +import tensorflow as tf +import numpy as np +import t3f +import json +import pickle + + +def robust_cumprod(arr): + """Cumulative product with large values replaced by the MAX_DTYPE. + + robust_cumprod([10] * 100) = [10, 100, 1000, ..., MAX_INT, ..., MAX_INT] + """ + + res = np.ones(arr.size, dtype=arr.dtype) + change_large_to = np.iinfo(arr.dtype).max + res[0] = arr[0] + for i in range(1, arr.size): + next_value = np.array(res[i - 1]) * np.array(arr[i]) + if next_value / np.array(arr[i]) != np.array(res[i - 1]): + next_value = change_large_to + res[i] = next_value + return res + + +def max_tt_ranks(raw_shape): + """Maximal TT-ranks for a TT-object of given shape. + + For example, a tensor of shape (2, 3, 5, 7) has maximal TT-ranks + (1, 2, 6, 7, 1) + making the TT-ranks larger will not increase flexibility. + + If maximum TT-ranks result in integer overflows, it substitutes + the too-large-values with MAX_INT. + + Args: + shape: an integer vector + Returns: + tt_ranks: an integer vector, maximal tt-rank for each dimension + """ + raw_shape = np.array(raw_shape).astype(np.int64) + d = raw_shape.size + tt_ranks = np.zeros(d + 1, dtype='int64') + tt_ranks[0] = 1 + tt_ranks[d] = 1 + left_to_right = robust_cumprod(raw_shape) + right_to_left = robust_cumprod(raw_shape[::-1])[::-1] + tt_ranks[1:-1] = np.minimum(left_to_right[:-1], right_to_left[1:]) + return tt_ranks + +def sparse(idx, shape, dtype=None): + cores = [] + for k in range(len(idx)): + eye = tf.eye(shape[k], dtype=dtype) + cores.append(tf.reshape(eye[idx[k]], (1, shape[k], 1))) + return t3f.TensorTrain(cores) + +def batch_sparse(idx_list, shape, weights=None, dtype=None): + cores = [] + for k in range(len(idx_list[0])): + curr_core = [] + eye = tf.eye(shape[k], dtype=dtype) + cores.append(tf.reshape(tf.gather(eye, idx_list[:, k]), (-1, 1, shape[k], 1))) + if weights is not None: + cores[0] *= weights[:, None, None, None] + return t3f.TensorTrainBatch(cores) + + +def reduce_sum_batch(x): + tt_cores = list(x.tt_cores) + for i, core in enumerate(tt_cores): + bs, r1, n, r2 = core.shape.as_list() + assert r1 == 1 and r2 == 1 + if i == 0: + core = tf.reshape(core, (bs, 1, n)) + core = tf.transpose(core, (1, 2, 0)) + elif i == len(tt_cores) - 1: + core = tf.reshape(core, (bs, n, 1)) + else: + core = tf.tile(core[:, :, :, None, :], (1, 1, 1, bs, 1)) + core = tf.reshape(core, (bs, n, bs)) + core *= tf.tile(tf.eye(bs, dtype=x.dtype)[:, None, :], (1, n, 1)) + tt_cores[i] = core + return t3f.TensorTrain(tt_cores) + +def compare_tensors(tensors): + for a in tensors: + for b in tensors: + a_np, b_np = sess.run([t3f.full(a), t3f.full(b)]) + diff = np.linalg.norm((a_np - b_np).flatten()) / np.linalg.norm(b_np) + assert diff < 1e-8 + +def test(case, sess): + tensors = [] + tensors.append(case.naive_grad()) + try: + tensors.append(case.smart_grad()) + except NotImplementedError: + pass + auto_g = t3f.gradients(case.loss, case.x, runtime_check=True) + tensors.append(auto_g) + compare_tensors(tensors) + + tensors = [] + tensors.append(case.naive_hessian_by_vector()) + try: + tensors.append(case.smart_hessian_by_vector()) + except NotImplementedError: + pass + auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=True) + tensors.append(auto_hv) + compare_tensors(tensors) + + + +class Task(object): + + def smart_grad(self): + return NotImplementedError() + + def naive_hessian_by_vector(self): + return NotImplementedError() + + def smart_hessian_by_vector(self): + return NotImplementedError() + + + +class Completion(Task): + + def __init__(self, n, d, tt_rank): + self.settings = {'n': n, 'd': d, 'tt_rank': tt_rank} + shape = [n] * d + self.num_observed = 10 * d * n * tt_rank**2 ############################################################### + self.observation_idx = np.random.randint(0, n, size=(self.num_observed, len(shape))) + self.observations_np = np.random.randn(self.num_observed) + self.observations = tf.constant(self.observations_np) + tt_rank_x = [1] + [tt_rank] * (d - 1) + [1] + tt_rank_x = np.minimum(tt_rank_x, max_tt_ranks(shape)) + initialization = t3f.random_tensor(shape, tt_rank=tt_rank_x, dtype=tf.float64) + self.x = t3f.get_variable('x', initializer=initialization) + self.x *= 1.0 # Dtype bug + tt_rank_v = [1] + [2 * tt_rank] * (d - 1) + [1] + tt_rank_v = np.minimum(tt_rank_v, max_tt_ranks(shape)) + initialization = t3f.random_tensor(shape, tt_rank=tt_rank_v, dtype=tf.float64) + self.vector = t3f.get_variable('vector', initializer=initialization) + self.sparsity_mask_list_tt = batch_sparse(self.observation_idx, shape, dtype=tf.float64) + self.sparsity_mask_tt = reduce_sum_batch(self.sparsity_mask_list_tt) + self.sparse_observation_tt = reduce_sum_batch(batch_sparse(self.observation_idx, shape, self.observations_np, dtype=tf.float64)) + + def loss(self, x): + estimated_vals = t3f.gather_nd(x, self.observation_idx) + return 0.5 * tf.reduce_sum((estimated_vals - self.observations_np) ** 2) + + def naive_grad(self): + grad = self.sparsity_mask_tt * self.x - self.sparse_observation_tt + return t3f.project(grad, self.x) + + def smart_grad(self): + estimated_vals = t3f.gather_nd(self.x, self.observation_idx) + diff = estimated_vals - self.observations + return t3f.project_sum(self.sparsity_mask_list_tt, self.x, diff) + + def naive_hessian_by_vector(self): + return t3f.project(self.sparsity_mask_tt * t3f.project(self.vector, self.x), self.x) + + def smart_hessian_by_vector(self): + vector_nonzero = t3f.gather_nd(t3f.project(self.vector, self.x), self.observation_idx) + return t3f.project_sum(self.sparsity_mask_list_tt, self.x, vector_nonzero) + + +def benchmark(case, prev_log=None): + naive_grad = case.naive_grad() + smart_grad = case.smart_grad() + auto_grad = t3f.gradients(case.loss, case.x, runtime_check=False) + + naive_hv = case.naive_hessian_by_vector() + smart_hv = case.smart_hessian_by_vector() + auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=False) + try: + with open(r"logs.pickle", "rb") as output_file: + all_logs_list = pickle.load(output_file) + except: + all_logs_list = [] + all_logs = {} + with tf.Session(config=tf.test.benchmark_config()) as sess: + sess.run(tf.global_variables_initializer()) + benchmark = tf.test.Benchmark() + + all_logs['settings'] = case.settings + + def benchmark_single(op, name, all_logs): + try: + if prev_log is not None and prev_log[name] is None: + # No point in trying again, a smaller example failed already. + raise ValueError() + logs = benchmark.run_op_benchmark(sess, op) + all_logs[name] = logs + except: + all_logs[name] = None + + with open(r"logs.pickle", "wb") as output_file: + pickle.dump(all_logs_list + [all_logs], output_file) + + benchmark_single(auto_grad.op, 'auto_grad', all_logs) + benchmark_single(auto_hv.op, 'auto_hv', all_logs) + benchmark_single(smart_grad.op, 'smart_grad', all_logs) + benchmark_single(smart_hv.op, 'smart_hv', all_logs) + # benchmark_single(naive_grad.op, 'naive_grad', all_logs) + # benchmark_single(naive_hv.op, 'naive_hv', all_logs) + return all_logs From 6260050b2e6654c12d30c6589724fda14efcd5aa Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Tue, 11 Jun 2019 12:56:02 +0100 Subject: [PATCH 02/20] small fixes: do not repeat work, identation fix, print current settings --- docs/benchmark/completion.py | 3 ++- docs/benchmark/run_completion.py | 2 +- docs/benchmark/utils.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/benchmark/completion.py b/docs/benchmark/completion.py index 27f2c4f8..0e73012b 100644 --- a/docs/benchmark/completion.py +++ b/docs/benchmark/completion.py @@ -10,4 +10,5 @@ case = utils.Completion(args.n, args.d, args.tt_rank) -all_logs.append(utils.benchmark(case)) +print(case.settings) +utils.benchmark(case) diff --git a/docs/benchmark/run_completion.py b/docs/benchmark/run_completion.py index 16650b29..97b2ed8c 100644 --- a/docs/benchmark/run_completion.py +++ b/docs/benchmark/run_completion.py @@ -4,4 +4,4 @@ for n in [20, 100, 500]: for d in [10, 20, 30]: for r in [5, 10, 20]: - print(subprocess.check_output(['python3', 'completion.py', '--n=%d' % n, '--d=%d' % d, '--r=%d' % r])) \ No newline at end of file + print(subprocess.check_output(['python3', 'completion.py', '--n=%d' % n, '--d=%d' % d, '--r=%d' % r])) \ No newline at end of file diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py index 1139fa3f..304f7c76 100644 --- a/docs/benchmark/utils.py +++ b/docs/benchmark/utils.py @@ -169,6 +169,18 @@ def smart_hessian_by_vector(self): return t3f.project_sum(self.sparsity_mask_list_tt, self.x, vector_nonzero) +def exist(all_logs, case): + for l in all_logs: + s = l['settings'] + coincide = True + for k in case.settings: + if s[k] != case.settings[k]: + coincide = False + if coincide: + return True + return False + + def benchmark(case, prev_log=None): naive_grad = case.naive_grad() smart_grad = case.smart_grad() @@ -188,6 +200,9 @@ def benchmark(case, prev_log=None): benchmark = tf.test.Benchmark() all_logs['settings'] = case.settings + if exist(all_logs_list, case): + print('skipping') + return None def benchmark_single(op, name, all_logs): try: From 0e42094ea6a059f997dcfbc2917102b7b96775da Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Mon, 24 Jun 2019 16:35:31 +0100 Subject: [PATCH 03/20] add bilinear_xaby --- t3f/__init__.py | 1 + t3f/ops.py | 3 ++- t3f/ops_test.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/t3f/__init__.py b/t3f/__init__.py index 1b57c500..68218428 100644 --- a/t3f/__init__.py +++ b/t3f/__init__.py @@ -15,6 +15,7 @@ from t3f.ops import multiply from t3f.ops import quadratic_form from t3f.ops import bilinear_form +from t3f.ops import bilinear_form_two_mat from t3f.ops import transpose from t3f.ops import gather_nd from t3f.ops import renormalize_tt_cores diff --git a/t3f/ops.py b/t3f/ops.py index 86be42ec..338f217e 100644 --- a/t3f/ops.py +++ b/t3f/ops.py @@ -8,6 +8,8 @@ from t3f import decompositions from t3f import initializers +from opt_einsum import contract + # TODO: add complexities to the comments. @@ -1120,7 +1122,6 @@ def bilinear_form(A, b, c, name='t3f_bilinear_form'): def bilinear_form_two_mat(x, A, B, y, name='t3f_bilinear_xaby'): """Bilinear form x^t A B y; A are B are TT-matrices, x and y can be batches. - Args: x: `TensorTrain` object containing a TT-matrix of size N x 1 or `TensorTrainBatch` with a batch of TT-matrices of size N x 1. diff --git a/t3f/ops_test.py b/t3f/ops_test.py index dbad6cf5..f79fef14 100644 --- a/t3f/ops_test.py +++ b/t3f/ops_test.py @@ -358,8 +358,8 @@ def testBilinearForm(self): self.assertAllClose(res_actual_val, np.squeeze(res_desired), atol=1e-5, rtol=1e-5) - def testBilinearFormBatch(self): - # Test bilinear form for batch of tensors. + def testQuadraticFormBatch(self): + # Test quadratic form for batch of tensors. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2) From 0fee37f577a0e0301817c7dc0f2226acd0347231 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Mon, 24 Jun 2019 17:16:13 +0100 Subject: [PATCH 04/20] init bilinear xabx autodiff benchmark --- docs/benchmark/bilinear_xabx.py | 18 ++++++ docs/benchmark/run_completion.py | 3 + docs/benchmark/utils.py | 101 ++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 27 deletions(-) create mode 100644 docs/benchmark/bilinear_xabx.py diff --git a/docs/benchmark/bilinear_xabx.py b/docs/benchmark/bilinear_xabx.py new file mode 100644 index 00000000..55d41a70 --- /dev/null +++ b/docs/benchmark/bilinear_xabx.py @@ -0,0 +1,18 @@ +import argparse +import utils + +# test_case = utils.BilinearXABX(3, 3, 3, 4, 5) +# utils.test(test_case) + +parser = argparse.ArgumentParser() +parser.add_argument('--m', type=int) +parser.add_argument('--n', type=int) +parser.add_argument('--d', type=int) +parser.add_argument('--tt_rank_mat', type=int) +parser.add_argument('--tt_rank_vec', type=int) +args = parser.parse_args() + +case = utils.BilinearXABX(args.m, args.n, args.d, args.tt_rank_mat, args.tt_rank_vec) + +print(case.settings) +utils.benchmark(case) diff --git a/docs/benchmark/run_completion.py b/docs/benchmark/run_completion.py index 97b2ed8c..4b9d64b8 100644 --- a/docs/benchmark/run_completion.py +++ b/docs/benchmark/run_completion.py @@ -1,5 +1,8 @@ import subprocess +import utils +test_case = utils.Completion(3, 3, 4) +utils.test(test_case) for n in [20, 100, 500]: for d in [10, 20, 30]: diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py index 304f7c76..5ad156e2 100644 --- a/docs/benchmark/utils.py +++ b/docs/benchmark/utils.py @@ -83,35 +83,41 @@ def reduce_sum_batch(x): tt_cores[i] = core return t3f.TensorTrain(tt_cores) -def compare_tensors(tensors): +def compare_tensors(tensors, sess): for a in tensors: for b in tensors: a_np, b_np = sess.run([t3f.full(a), t3f.full(b)]) diff = np.linalg.norm((a_np - b_np).flatten()) / np.linalg.norm(b_np) assert diff < 1e-8 -def test(case, sess): - tensors = [] - tensors.append(case.naive_grad()) - try: - tensors.append(case.smart_grad()) - except NotImplementedError: - pass - auto_g = t3f.gradients(case.loss, case.x, runtime_check=True) - tensors.append(auto_g) - compare_tensors(tensors) - - tensors = [] - tensors.append(case.naive_hessian_by_vector()) - try: - tensors.append(case.smart_hessian_by_vector()) - except NotImplementedError: - pass - auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=True) - tensors.append(auto_hv) - compare_tensors(tensors) +def test(case): + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + tensors = [] + tensors.append(case.naive_grad()) + try: + tensors.append(case.smart_grad()) + except NotImplementedError: + pass + auto_g = t3f.gradients(case.loss, case.x, runtime_check=True) + tensors.append(auto_g) + compare_tensors(tensors, sess) + + tensors = [] + tensors.append(case.naive_hessian_by_vector()) + try: + tensors.append(case.smart_hessian_by_vector()) + except NotImplementedError: + pass + auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=True) + tensors.append(auto_hv) + compare_tensors(tensors, sess) +def prune_ranks(tt_rank, shape): + tt_rank_arr = [1] + [tt_rank] * (len(shape) - 1) + [1] + return np.minimum(tt_rank_arr, max_tt_ranks(shape)) + class Task(object): @@ -169,6 +175,39 @@ def smart_hessian_by_vector(self): return t3f.project_sum(self.sparsity_mask_list_tt, self.x, vector_nonzero) +class BilinearXABX(Task): + + def __init__(self, m, n, d, tt_rank_mat, tt_rank_vec): + self.settings = {'n': n, 'm': m, 'd': d, 'tt_rank_mat': tt_rank_mat, 'tt_rank_vec': tt_rank_vec} + shape = ([m] * d, [n] * d) + ranks = prune_ranks(tt_rank_vec, shape[1]) + initialization = t3f.random_matrix((shape[1], None), tt_rank=ranks, dtype=tf.float64) + self.x = t3f.get_variable('x', initializer=initialization) + ranks = prune_ranks(2 * tt_rank_vec, shape[1]) + initialization = t3f.random_matrix((shape[1], None), tt_rank=ranks, dtype=tf.float64) + self.vector = t3f.get_variable('vector', initializer=initialization) + ranks = prune_ranks(tt_rank_mat, np.prod(shape, axis=0)) + initialization = t3f.random_matrix(shape, tt_rank=ranks, dtype=tf.float64) + self.mat = t3f.get_variable('mat', initializer=initialization) + + def loss(self, x): + return 0.5 * t3f.bilinear_xaby(x, t3f.transpose(self.mat), self.mat, x) + + def naive_grad(self): + grad = t3f.matmul(t3f.transpose(self.mat), t3f.matmul(self.mat, self.x)) + return t3f.project(grad, self.x) + + def smart_grad(self): + raise NotImplementedError() + + def naive_hessian_by_vector(self): + projected_vec = t3f.project(self.vector, self.x) + return t3f.project(t3f.matmul(t3f.transpose(self.mat), t3f.matmul(self.mat, projected_vec)), self.x) + + def smart_hessian_by_vector(self): + raise NotImplementedError() + + def exist(all_logs, case): for l in all_logs: s = l['settings'] @@ -183,12 +222,18 @@ def exist(all_logs, case): def benchmark(case, prev_log=None): naive_grad = case.naive_grad() - smart_grad = case.smart_grad() auto_grad = t3f.gradients(case.loss, case.x, runtime_check=False) + try: + smart_grad = case.smart_grad() + except NotImplementedError: + smart_grad = None naive_hv = case.naive_hessian_by_vector() - smart_hv = case.smart_hessian_by_vector() auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=False) + try: + smart_hv = case.smart_hessian_by_vector() + except NotImplementedError: + smart_hv = None try: with open(r"logs.pickle", "rb") as output_file: all_logs_list = pickle.load(output_file) @@ -219,8 +264,10 @@ def benchmark_single(op, name, all_logs): benchmark_single(auto_grad.op, 'auto_grad', all_logs) benchmark_single(auto_hv.op, 'auto_hv', all_logs) - benchmark_single(smart_grad.op, 'smart_grad', all_logs) - benchmark_single(smart_hv.op, 'smart_hv', all_logs) - # benchmark_single(naive_grad.op, 'naive_grad', all_logs) - # benchmark_single(naive_hv.op, 'naive_hv', all_logs) + if smart_grad is not None: + benchmark_single(smart_grad.op, 'smart_grad', all_logs) + if smart_hv is not None: + benchmark_single(smart_hv.op, 'smart_hv', all_logs) + benchmark_single(naive_grad.op, 'naive_grad', all_logs) + benchmark_single(naive_hv.op, 'naive_hv', all_logs) return all_logs From 02ba3a77c1ac1255bfcb57fe8b46d0168733a1cf Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Mon, 23 Sep 2019 13:57:57 +0100 Subject: [PATCH 05/20] bilinear xabx runner and adding opt_einsum to the install requiraments --- docs/benchmark/run_bilinear_xabx.py | 10 ++++++++++ setup.py | 1 + 2 files changed, 11 insertions(+) create mode 100644 docs/benchmark/run_bilinear_xabx.py diff --git a/docs/benchmark/run_bilinear_xabx.py b/docs/benchmark/run_bilinear_xabx.py new file mode 100644 index 00000000..fc252c4a --- /dev/null +++ b/docs/benchmark/run_bilinear_xabx.py @@ -0,0 +1,10 @@ +import subprocess +import utils + +test_case = utils.BilinearXABX(3, 3, 3, 4, 5) +utils.test(test_case) + +for n in [20, 100, 500]: + for d in [10, 20, 30]: + for r in [5, 10, 20]: + print(subprocess.check_output(['python3', 'completion.py', '--n=%d' % n, '--d=%d' % d, '--r=%d' % r])) \ No newline at end of file diff --git a/setup.py b/setup.py index 5b165d43..b02fca67 100644 --- a/setup.py +++ b/setup.py @@ -10,5 +10,6 @@ packages=['t3f'], install_requires=[ 'numpy', + 'opt_einsum', ], zip_safe=False) From a4f182bfa8c3789a4a90bfbfe435a70819a22487 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Thu, 3 Oct 2019 18:13:08 +0100 Subject: [PATCH 06/20] add xax, Rayleigh Quotient, and expmachines --- docs/benchmark/utils.py | 172 +++++++++++++++++++++++++++++++++------- 1 file changed, 142 insertions(+), 30 deletions(-) diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py index 5ad156e2..512dfd29 100644 --- a/docs/benchmark/utils.py +++ b/docs/benchmark/utils.py @@ -82,36 +82,6 @@ def reduce_sum_batch(x): core *= tf.tile(tf.eye(bs, dtype=x.dtype)[:, None, :], (1, n, 1)) tt_cores[i] = core return t3f.TensorTrain(tt_cores) - -def compare_tensors(tensors, sess): - for a in tensors: - for b in tensors: - a_np, b_np = sess.run([t3f.full(a), t3f.full(b)]) - diff = np.linalg.norm((a_np - b_np).flatten()) / np.linalg.norm(b_np) - assert diff < 1e-8 - -def test(case): - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - tensors = [] - tensors.append(case.naive_grad()) - try: - tensors.append(case.smart_grad()) - except NotImplementedError: - pass - auto_g = t3f.gradients(case.loss, case.x, runtime_check=True) - tensors.append(auto_g) - compare_tensors(tensors, sess) - - tensors = [] - tensors.append(case.naive_hessian_by_vector()) - try: - tensors.append(case.smart_hessian_by_vector()) - except NotImplementedError: - pass - auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=True) - tensors.append(auto_hv) - compare_tensors(tensors, sess) def prune_ranks(tt_rank, shape): @@ -175,6 +145,89 @@ def smart_hessian_by_vector(self): return t3f.project_sum(self.sparsity_mask_list_tt, self.x, vector_nonzero) +class BilinearXAX(Task): + + def __init__(self, m, n, d, tt_rank_mat, tt_rank_vec): + self.settings = {'n': n, 'm': m, 'd': d, 'tt_rank_mat': tt_rank_mat, 'tt_rank_vec': tt_rank_vec} + shape = ([m] * d, [n] * d) + ranks = prune_ranks(tt_rank_vec, shape[1]) + initialization = t3f.random_matrix((shape[1], None), tt_rank=ranks, dtype=tf.float64) + self.x = t3f.get_variable('x', initializer=initialization) + ranks = prune_ranks(2 * tt_rank_vec, shape[1]) + initialization = t3f.random_matrix((shape[1], None), tt_rank=ranks, dtype=tf.float64) + self.vector = t3f.get_variable('vector', initializer=initialization) + ranks = prune_ranks(tt_rank_mat, np.prod(shape, axis=0)) + mat = t3f.random_matrix(shape, tt_rank=ranks, dtype=tf.float64) + mat = t3f.transpose(mat) + mat + self.mat = t3f.get_variable('mat', initializer=mat) + + def loss(self, x): + return 0.5 * t3f.quadratic_form(self.mat, x, x) # DO NOT SUBMIT + + def naive_grad(self): + grad = t3f.matmul(self.mat, self.x) # DO NOT SUBMIT + return t3f.project(grad, self.x) + + def smart_grad(self): + return t3f.project_matmul(t3f.expand_batch_dim(self.x), self.x, self.mat)[0] # DO NOT SUBMIT + + def naive_hessian_by_vector(self): + grad = t3f.matmul(self.mat, self.vector) + return t3f.project(grad, self.x) + + def smart_hessian_by_vector(self): + return t3f.project_matmul(t3f.expand_batch_dim(self.vector), self.x, self.mat)[0] + + +class ExpMachines(Task): + + def __init__(self, n, d, tt_rank_vec, batch_size=32): + self.settings = {'n': n, 'd': d, 'tt_rank_vec': tt_rank_vec} + shape = [n] * d + ranks = prune_ranks(tt_rank_vec, shape) + initialization = t3f.random_tensor(shape, tt_rank=ranks, dtype=tf.float64) + self.x = t3f.get_variable('x', initializer=initialization) + initialization = t3f.random_tensor_batch(shape, tt_rank=1, dtype=tf.float64, batch_size=batch_size) + self.w = t3f.get_variable('w', initializer=initialization) + ranks = prune_ranks(2 * tt_rank_vec, shape) + initialization = t3f.random_tensor(shape, tt_rank=ranks, dtype=tf.float64) + self.vector = t3f.get_variable('vector', initializer=initialization) + + def loss(self, x): + l = t3f.flat_inner(x, self.w) + return tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=tf.ones(self.w.batch_size, dtype=tf.float64))) + + def naive_grad(self): + e = tf.exp(-1. * t3f.flat_inner(self.x, self.w)) + c = -e / (1 + e) + grad = c[0] * self.w[0] + for i in range(1, self.w.batch_size): + grad += c[i] * self.w[i] + return t3f.project(grad, self.x) + + def smart_grad(self): + e = tf.exp(-1. * t3f.flat_inner(self.x, self.w)) + c = -e / (1 + e) + return t3f.project_sum(self.w, 1. * self.x, c) + + def naive_hessian_by_vector(self): + e = tf.exp(-1. * t3f.flat_inner(self.x, self.w)) + s = 1. / (1 + e) + c = s * (1 - s) + c *= t3f.flat_inner(self.vector, self.w) + res = c[0] * self.w[0] + for i in range(1, self.w.batch_size): + res += c[i] * self.w[i] + return t3f.project(res, self.x) + + def smart_hessian_by_vector(self): + e = tf.exp(-1. * t3f.flat_inner(self.x, self.w)) + s = 1. / (1 + e) + c = s * (1 - s) + c *= t3f.flat_inner(self.vector, self.w) + return t3f.project_sum(self.w, 1. * self.x, c) + + class BilinearXABX(Task): def __init__(self, m, n, d, tt_rank_mat, tt_rank_vec): @@ -208,6 +261,65 @@ def smart_hessian_by_vector(self): raise NotImplementedError() +class RayleighQuotient(Task): + + def __init__(self, m, n, d, tt_rank_mat, tt_rank_vec): + self.settings = {'n': n, 'm': m, 'd': d, 'tt_rank_mat': tt_rank_mat, 'tt_rank_vec': tt_rank_vec} + shape = ([m] * d, [n] * d) + ranks = prune_ranks(tt_rank_vec, shape[1]) + initialization = t3f.random_matrix((shape[1], None), tt_rank=ranks, dtype=tf.float64) + self.x = t3f.get_variable('x', initializer=initialization) + ranks = prune_ranks(2 * tt_rank_vec, shape[1]) + initialization = t3f.random_matrix((shape[1], None), tt_rank=ranks, dtype=tf.float64) + self.vector = t3f.get_variable('vector', initializer=initialization) + ranks = prune_ranks(tt_rank_mat, np.prod(shape, axis=0)) + mat = t3f.random_matrix(shape, tt_rank=ranks, dtype=tf.float64) + mat = t3f.transpose(mat) + mat + self.mat = t3f.get_variable('mat', initializer=mat) + + def loss(self, x): + xAx = t3f.quadratic_form(self.mat, x, x) # bilinear_form + xx = t3f.flat_inner(x, x) + return xAx / xx + + def naive_grad(self): + xAx = t3f.quadratic_form(self.mat, self.x, self.x) # bilinear_form + xx = t3f.flat_inner(self.x, self.x) + grad = (1. / xx) * t3f.matmul(self.mat, self.x) + grad -= (xAx / (xx**2)) * self.x + return t3f.project(2 * grad, self.x) + + def smart_grad(self): + xAx = t3f.quadratic_form(self.mat, self.x, self.x) # bilinear_form + xx = t3f.frobenius_norm_squared(self.x, differentiable=True) + grad = (1. / xx) * t3f.project_matmul(t3f.expand_batch_dim(self.x), self.x, self.mat)[0] + grad -= (xAx / xx**2) * self.x + return 2 * grad + + def naive_hessian_by_vector(self): + xAx = t3f.quadratic_form(self.mat, self.x, self.x) # bilinear_form + xx = t3f.frobenius_norm_squared(self.x, differentiable=True) + res = (2 / xx) * t3f.matmul(self.mat, self.vector) + res -= (2 * xAx / xx**2) * self.vector + xv = t3f.flat_inner(self.x, self.vector) + res -= (4 * t3f.quadratic_form(self.mat, self.vector, self.x) / xx**2) * self.x + res -= (4 * xv / xx**2) * t3f.matmul(self.mat, self.x) + res += (8 * xAx * xv / xx**3) * self.x + return t3f.project(res, self.x) + + def smart_hessian_by_vector(self): + xAx = t3f.quadratic_form(self.mat, self.x, self.x) # bilinear_form + xx = t3f.frobenius_norm_squared(self.x, differentiable=True) + projected_vec = t3f.project(self.vector, self.x) + res = (2 / xx) * t3f.project_matmul(t3f.expand_batch_dim(self.vector), self.x, self.mat)[0] + res -= (2 * xAx / xx**2) * projected_vec + xv = t3f.flat_inner(self.x, projected_vec) + res -= (4 * t3f.quadratic_form(self.mat, self.vector, self.x) / xx**2) * self.x + res -= (4 * xv / xx**2) * t3f.project_matmul(t3f.expand_batch_dim(self.x), self.x, self.mat)[0] + res += (8 * xAx * xv / xx**3) * self.x + return res + + def exist(all_logs, case): for l in all_logs: s = l['settings'] From 6d9f794a051379573070eb088224a34e7092a6bd Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Thu, 3 Oct 2019 18:13:40 +0100 Subject: [PATCH 07/20] init tests for the cases --- docs/benchmark/utils_tet.py | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 docs/benchmark/utils_tet.py diff --git a/docs/benchmark/utils_tet.py b/docs/benchmark/utils_tet.py new file mode 100644 index 00000000..2ce7dbb1 --- /dev/null +++ b/docs/benchmark/utils_tet.py @@ -0,0 +1,85 @@ +import numpy as np +import tensorflow as tf + +import utils +import t3f + + +class UtilsTest(tf.test.TestCase): + + def _TestCaseGrad(self, case): + with self.session(): + tf.global_variables_initializer().run() + tensors = [] + manual = case.naive_grad() + try: + manual_2 = case.smart_grad() + self.assertAllClose(t3f.full(manual).eval(), t3f.full(manual_2).eval()) + except NotImplementedError: + pass + auto_g = t3f.gradients(case.loss, case.x, runtime_check=True) + self.assertAllClose(t3f.full(manual).eval(), t3f.full(auto_g).eval(), rtol=1e-5) + + def _TestCaseHess(self, case): + with self.session(): + tf.global_variables_initializer().run() + manual = case.naive_hessian_by_vector() + try: + manual_2 = case.smart_hessian_by_vector() + self.assertAllClose(t3f.full(manual).eval(), t3f.full(manual_2).eval()) + except NotImplementedError: + pass + auto_hv = t3f.hessian_vector_product(case.loss, case.x, case.vector, runtime_check=True) + self.assertAllClose(t3f.full(manual).eval(), t3f.full(auto_hv).eval(), rtol=1e-5) + + def testCompletionGrad(self): + test_case = utils.Completion(3, 3, 4) + self._TestCaseGrad(test_case) + + def testCompletionHess(self): + test_case = utils.Completion(3, 3, 4) + self._TestCaseHess(test_case) + + def testXAXGrad(self): + test_case = utils.BilinearXAX(3, 3, 3, 4, 5) + self._TestCaseGrad(test_case) + + def testXAXHess(self): + test_case = utils.BilinearXAX(3, 3, 3, 4, 5) + self._TestCaseHess(test_case) + + def testXABXGrad(self): + test_case = utils.BilinearXABX(3, 3, 3, 4, 5) + self._TestCaseGrad(test_case) + + def testXABXHess(self): + test_case = utils.BilinearXABX(3, 3, 3, 4, 5) + self._TestCaseHess(test_case) + + def testExpMachinesGrad(self): + test_case = utils.ExpMachines(3, 4, 5, batch_size=3) + self._TestCaseGrad(test_case) + + def testExpMachinesHess(self): + test_case = utils.ExpMachines(3, 3, 3, batch_size=2) + self._TestCaseHess(test_case) + + def testRayleighQuotientGrad(self): + test_case = utils.RayleighQuotient(3, 3, 3, 4, 5) + self._TestCaseGrad(test_case) + + def testRayleighQuotientHess(self): + test_case = utils.RayleighQuotient(3, 3, 3, 4, 5) + self._TestCaseHess(test_case) + + +# class AutodiffTestFloat32(tf.test.TestCase, _AutodiffTest): +# dtype = tf.float32 + + +# class AutodiffTestFloat64(tf.test.TestCase, _AutodiffTest): +# dtype = tf.float64 + + +if __name__ == "__main__": + tf.test.main() From eaf1408001c1bdfafb3b8bbbd44eca42c462a9b7 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 4 Oct 2019 12:49:23 +0100 Subject: [PATCH 08/20] Make a single runner for all autodiff benchmarks --- docs/benchmark/bilinear_xabx.py | 18 ------- docs/benchmark/completion.py | 14 ----- docs/benchmark/run_all_autodiff.py | 26 ++++++++++ docs/benchmark/run_bilinear_xabx.py | 10 ---- docs/benchmark/run_completion.py | 10 ---- docs/benchmark/run_single_autodiff.py | 34 ++++++++++++ docs/benchmark/utils.py | 74 ++++++++++++++++++--------- 7 files changed, 111 insertions(+), 75 deletions(-) delete mode 100644 docs/benchmark/bilinear_xabx.py delete mode 100644 docs/benchmark/completion.py create mode 100644 docs/benchmark/run_all_autodiff.py delete mode 100644 docs/benchmark/run_bilinear_xabx.py delete mode 100644 docs/benchmark/run_completion.py create mode 100644 docs/benchmark/run_single_autodiff.py diff --git a/docs/benchmark/bilinear_xabx.py b/docs/benchmark/bilinear_xabx.py deleted file mode 100644 index 55d41a70..00000000 --- a/docs/benchmark/bilinear_xabx.py +++ /dev/null @@ -1,18 +0,0 @@ -import argparse -import utils - -# test_case = utils.BilinearXABX(3, 3, 3, 4, 5) -# utils.test(test_case) - -parser = argparse.ArgumentParser() -parser.add_argument('--m', type=int) -parser.add_argument('--n', type=int) -parser.add_argument('--d', type=int) -parser.add_argument('--tt_rank_mat', type=int) -parser.add_argument('--tt_rank_vec', type=int) -args = parser.parse_args() - -case = utils.BilinearXABX(args.m, args.n, args.d, args.tt_rank_mat, args.tt_rank_vec) - -print(case.settings) -utils.benchmark(case) diff --git a/docs/benchmark/completion.py b/docs/benchmark/completion.py deleted file mode 100644 index 0e73012b..00000000 --- a/docs/benchmark/completion.py +++ /dev/null @@ -1,14 +0,0 @@ -import argparse -import utils - - -parser = argparse.ArgumentParser() -parser.add_argument('--n', type=int) -parser.add_argument('--d', type=int) -parser.add_argument('--tt_rank', type=int) -args = parser.parse_args() - -case = utils.Completion(args.n, args.d, args.tt_rank) - -print(case.settings) -utils.benchmark(case) diff --git a/docs/benchmark/run_all_autodiff.py b/docs/benchmark/run_all_autodiff.py new file mode 100644 index 00000000..251a7c72 --- /dev/null +++ b/docs/benchmark/run_all_autodiff.py @@ -0,0 +1,26 @@ +import subprocess +import utils + + +parser = argparse.ArgumentParser() +parser.add_argument('--logs', type=str) +args = parser.parse_args() + +def run_single(case, n, d, r, R=None): + cmd = ['python3', 'run_single_autodiff.py', '--case=%s' % case, + '--n=%d' % n, '--m=%d' % n, '--d=%d' % d, + '--tt_rank_vec=%d' % r, '--logs=%s' % args.logs] + if R is not None: + cmd.append('--tt_rank_mat=%d' % R) + print(subprocess.check_output(cmd)) + + +for n in [20, 100, 500]: + for d in [10, 20, 40]: + for r in [5, 10, 20]: + run_single('completion', n, d, r) + run_single('ExpMachines', n, d, r) + for R in [5, 10, 20]: + run_single('xAx', n, d, r, R) + run_single('xABx', n, d, r, R) + run_single('RayleighQuotient', n, d, r, R) diff --git a/docs/benchmark/run_bilinear_xabx.py b/docs/benchmark/run_bilinear_xabx.py deleted file mode 100644 index fc252c4a..00000000 --- a/docs/benchmark/run_bilinear_xabx.py +++ /dev/null @@ -1,10 +0,0 @@ -import subprocess -import utils - -test_case = utils.BilinearXABX(3, 3, 3, 4, 5) -utils.test(test_case) - -for n in [20, 100, 500]: - for d in [10, 20, 30]: - for r in [5, 10, 20]: - print(subprocess.check_output(['python3', 'completion.py', '--n=%d' % n, '--d=%d' % d, '--r=%d' % r])) \ No newline at end of file diff --git a/docs/benchmark/run_completion.py b/docs/benchmark/run_completion.py deleted file mode 100644 index 4b9d64b8..00000000 --- a/docs/benchmark/run_completion.py +++ /dev/null @@ -1,10 +0,0 @@ -import subprocess -import utils - -test_case = utils.Completion(3, 3, 4) -utils.test(test_case) - -for n in [20, 100, 500]: - for d in [10, 20, 30]: - for r in [5, 10, 20]: - print(subprocess.check_output(['python3', 'completion.py', '--n=%d' % n, '--d=%d' % d, '--r=%d' % r])) \ No newline at end of file diff --git a/docs/benchmark/run_single_autodiff.py b/docs/benchmark/run_single_autodiff.py new file mode 100644 index 00000000..30b992dc --- /dev/null +++ b/docs/benchmark/run_single_autodiff.py @@ -0,0 +1,34 @@ +import argparse +import utils +import pickle +import os.path + +parser = argparse.ArgumentParser() +parser.add_argument('--logs', type=str) +parser.add_argument('--case', type=str) +parser.add_argument('--m', type=int) +parser.add_argument('--n', type=int) +parser.add_argument('--d', type=int) +parser.add_argument('--tt_rank_mat', type=int) +parser.add_argument('--tt_rank_vec', type=int) +args = parser.parse_args() + +if args.case == 'completion': + assert args.m is None and args.tt_rank_mat is None + case = utils.Completion(args.n, args.d, args.tt_rank_vec) +elif args.case == 'xAx': + case = utils.BilinearXAX(args.m, args.n, args.d, args.tt_rank_mat, args.tt_rank_vec) +elif args.case == 'xABx': + case = utils.BilinearXABX(args.m, args.n, args.d, args.tt_rank_mat, args.tt_rank_vec) +elif args.case == 'ExpMachines': + assert args.m is None and args.tt_rank_mat is None + case = utils.ExpMachines(args.n, args.d, args.tt_rank_vec) +elif args.case == 'RayleighQuotient': + case = utils.RayleighQuotient(args.m, args.n, args.d, args.tt_rank_mat, args.tt_rank_vec) +else: + print('Dont know this case.') + +print(args.case, case.settings) +utils.benchmark(args.case, case, args.logs) + + diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py index 512dfd29..2aa9dfbb 100644 --- a/docs/benchmark/utils.py +++ b/docs/benchmark/utils.py @@ -4,6 +4,7 @@ import t3f import json import pickle +import copy def robust_cumprod(arr): @@ -100,7 +101,6 @@ def naive_hessian_by_vector(self): def smart_hessian_by_vector(self): return NotImplementedError() - class Completion(Task): @@ -320,8 +320,8 @@ def smart_hessian_by_vector(self): return res -def exist(all_logs, case): - for l in all_logs: +def exist(all_logs, case_name, case): + for l in all_logs[case_name]: s = l['settings'] coincide = True for k in case.settings: @@ -332,7 +332,21 @@ def exist(all_logs, case): return False -def benchmark(case, prev_log=None): +def did_smaller_fail(all_logs, name, case_name, case): + for l in all_logs[case_name]: + s = l['settings'] + if name in l and l[name] is None: + # If this attempt failed. + smaller = True + for k in case.settings: + if s[k] > case.settings[k]: + smaller = False + if smaller: + return True + return False + + +def benchmark(case_name, case, logs_path): naive_grad = case.naive_grad() auto_grad = t3f.gradients(case.loss, case.x, runtime_check=False) try: @@ -347,39 +361,53 @@ def benchmark(case, prev_log=None): except NotImplementedError: smart_hv = None try: - with open(r"logs.pickle", "rb") as output_file: - all_logs_list = pickle.load(output_file) + with open(logs_path, "rb") as output_file: + # Dict with case_name -> list of configurations. + all_logs = pickle.load(output_file) except: - all_logs_list = [] - all_logs = {} + all_logs = {} + if case_name not in all_logs: + all_logs[case_name] = [] + # Single configuration. + current_case_logs = {} with tf.Session(config=tf.test.benchmark_config()) as sess: sess.run(tf.global_variables_initializer()) benchmark = tf.test.Benchmark() - all_logs['settings'] = case.settings - if exist(all_logs_list, case): + current_case_logs['settings'] = case.settings + if exist(all_logs, case_name, case): print('skipping') return None - def benchmark_single(op, name, all_logs): + + def benchmark_single(op, name, current_case_logs): + # First write None to indicate the attempt. + with open(logs_path, "wb") as output_file: + all_logs_curr = copy.deepcopy(all_logs) + current_case_logs[name] = None + all_logs_curr[case_name].append(current_case_logs) + pickle.dump(all_logs_curr, output_file) + try: - if prev_log is not None and prev_log[name] is None: + if did_smaller_fail(all_logs, name, case_name, case): # No point in trying again, a smaller example failed already. raise ValueError() logs = benchmark.run_op_benchmark(sess, op) - all_logs[name] = logs + current_case_logs[name] = logs except: - all_logs[name] = None + current_case_logs[name] = None - with open(r"logs.pickle", "wb") as output_file: - pickle.dump(all_logs_list + [all_logs], output_file) + with open(logs_path, "wb") as output_file: + all_logs_curr = copy.deepcopy(all_logs) + all_logs_curr[case_name].append(current_case_logs) + pickle.dump(all_logs_curr, output_file) - benchmark_single(auto_grad.op, 'auto_grad', all_logs) - benchmark_single(auto_hv.op, 'auto_hv', all_logs) + benchmark_single(auto_grad.op, 'auto_grad', current_case_logs) + benchmark_single(auto_hv.op, 'auto_hv', current_case_logs) if smart_grad is not None: - benchmark_single(smart_grad.op, 'smart_grad', all_logs) + benchmark_single(smart_grad.op, 'smart_grad', current_case_logs) if smart_hv is not None: - benchmark_single(smart_hv.op, 'smart_hv', all_logs) - benchmark_single(naive_grad.op, 'naive_grad', all_logs) - benchmark_single(naive_hv.op, 'naive_hv', all_logs) - return all_logs + benchmark_single(smart_hv.op, 'smart_hv', current_case_logs) + benchmark_single(naive_grad.op, 'naive_grad', current_case_logs) + benchmark_single(naive_hv.op, 'naive_hv', current_case_logs) + return current_case_logs From b5f07b366d3060db93b9183bd928c177d3fe596d Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 4 Oct 2019 12:59:09 +0100 Subject: [PATCH 09/20] guard against subprocess failing --- docs/benchmark/run_all_autodiff.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/benchmark/run_all_autodiff.py b/docs/benchmark/run_all_autodiff.py index 251a7c72..265d69fd 100644 --- a/docs/benchmark/run_all_autodiff.py +++ b/docs/benchmark/run_all_autodiff.py @@ -12,7 +12,11 @@ def run_single(case, n, d, r, R=None): '--tt_rank_vec=%d' % r, '--logs=%s' % args.logs] if R is not None: cmd.append('--tt_rank_mat=%d' % R) - print(subprocess.check_output(cmd)) + try: + print(subprocess.check_output(cmd)) + except: + print('Running subprocess failed.') + pass for n in [20, 100, 500]: From d7a79d2e348b9b9921dcfc5f16a82d967e8759c9 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 4 Oct 2019 13:59:44 +0100 Subject: [PATCH 10/20] print current command and small bug fix preventing some of the cases to run --- docs/benchmark/run_all_autodiff.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/benchmark/run_all_autodiff.py b/docs/benchmark/run_all_autodiff.py index 265d69fd..33355c7d 100644 --- a/docs/benchmark/run_all_autodiff.py +++ b/docs/benchmark/run_all_autodiff.py @@ -1,3 +1,9 @@ +""" +Running a suite of autodiff benchmarks. + run_all_autodiff.py --logs=autodiff_cpu.pkl 2> autodiff_cpu.stderr +""" + +import argparse import subprocess import utils @@ -8,11 +14,13 @@ def run_single(case, n, d, r, R=None): cmd = ['python3', 'run_single_autodiff.py', '--case=%s' % case, - '--n=%d' % n, '--m=%d' % n, '--d=%d' % d, + '--n=%d' % n, '--d=%d' % d, '--tt_rank_vec=%d' % r, '--logs=%s' % args.logs] if R is not None: cmd.append('--tt_rank_mat=%d' % R) + cmd.append('--m=%d' % n) try: + print(' '.join(cmd)) print(subprocess.check_output(cmd)) except: print('Running subprocess failed.') From e0e3980aade5a0faf026d92af86f30ffe4275cd1 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 4 Oct 2019 14:00:17 +0100 Subject: [PATCH 11/20] make sure not to halfoverwrite files --- docs/benchmark/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py index 2aa9dfbb..be1da9c7 100644 --- a/docs/benchmark/utils.py +++ b/docs/benchmark/utils.py @@ -5,6 +5,7 @@ import json import pickle import copy +from shutil import copyfile def robust_cumprod(arr): @@ -382,6 +383,7 @@ def benchmark(case_name, case, logs_path): def benchmark_single(op, name, current_case_logs): # First write None to indicate the attempt. + copyfile(logs_path, logs_path + '_back') with open(logs_path, "wb") as output_file: all_logs_curr = copy.deepcopy(all_logs) current_case_logs[name] = None @@ -397,6 +399,7 @@ def benchmark_single(op, name, current_case_logs): except: current_case_logs[name] = None + copyfile(logs_path, logs_path + '_back') with open(logs_path, "wb") as output_file: all_logs_curr = copy.deepcopy(all_logs) all_logs_curr[case_name].append(current_case_logs) From 6e180152b82bb99a0aee86d8312875ea1fb9082e Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 4 Oct 2019 14:05:05 +0100 Subject: [PATCH 12/20] bug fix: check that file exists before copying --- docs/benchmark/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/benchmark/utils.py b/docs/benchmark/utils.py index be1da9c7..a70e87d4 100644 --- a/docs/benchmark/utils.py +++ b/docs/benchmark/utils.py @@ -5,6 +5,7 @@ import json import pickle import copy +import os from shutil import copyfile @@ -383,7 +384,8 @@ def benchmark(case_name, case, logs_path): def benchmark_single(op, name, current_case_logs): # First write None to indicate the attempt. - copyfile(logs_path, logs_path + '_back') + if os.path.exists(logs_path): + copyfile(logs_path, logs_path + '_back') with open(logs_path, "wb") as output_file: all_logs_curr = copy.deepcopy(all_logs) current_case_logs[name] = None From 9725abad959c003f5ba222d7008b1dc41fc29221 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Tue, 15 Oct 2019 15:53:20 +0100 Subject: [PATCH 13/20] more opt_einsums --- t3f/ops.py | 37 ++++++++++++++++------------ t3f/riemannian.py | 62 ++++++++++++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/t3f/ops.py b/t3f/ops.py index 338f217e..bca4d572 100644 --- a/t3f/ops.py +++ b/t3f/ops.py @@ -8,8 +8,13 @@ from t3f import decompositions from t3f import initializers + from opt_einsum import contract + +def my_contract(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') + # TODO: add complexities to the comments. @@ -87,7 +92,7 @@ def _full_tt_batch(tt): for i in range(1, num_dims): res = tf.reshape(res, (batch_size, -1, ranks[i])) curr_core = tf.reshape(tt.tt_cores[i], (batch_size, ranks[i], -1)) - res = tf.einsum('oqb,obw->oqw', res, curr_core) + res = my_contract('oqb,obw->oqw', res, curr_core) if tt.is_tt_matrix(): intermediate_shape = [batch_size] for i in range(num_dims): @@ -163,7 +168,7 @@ def tt_tt_matmul(tt_matrix_a, tt_matrix_b): for core_idx in range(ndims): a_core = tt_matrix_a.tt_cores[core_idx] b_core = tt_matrix_b.tt_cores[core_idx] - curr_res_core = tf.einsum(einsum_str, a_core, b_core) + curr_res_core = contract(einsum_str, a_core, b_core, backend='tensorflow', optimize='optimal') res_left_rank = a_ranks[core_idx] * b_ranks[core_idx] res_right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1] @@ -223,7 +228,7 @@ def tt_dense_matmul(tt_matrix_a, matrix_b): curr_core = tt_matrix_a.tt_cores[core_idx] # On the k = core_idx iteration, after applying einsum the shape of data # becomes ik x (ik-1..., id-1, K, j0, ..., jk-1) x rank_k - data = tf.einsum('aijb,rjb->ira', curr_core, data) + data = my_contract('aijb,rjb->ira', curr_core, data) if core_idx > 0: # After reshape the shape of data becomes # (ik, ..., id-1, K, j0, ..., jk-2) x jk-1 x rank_k @@ -386,8 +391,8 @@ def tt_tt_flat_inner(tt_a, tt_b): b_core = tt_b.tt_cores[0] # Simplest example of this operation: # if both arguments are TT-tensors, then it is - # res = tf.einsum('aib,cid->bd', a_core, b_core) - res = tf.einsum(init_einsum_str, a_core, b_core) + # res = my_contract('aib,cid->bd', a_core, b_core) + res = my_contract(init_einsum_str, a_core, b_core) einsum_str = '{3}ac,{1}a{0}b,{2}c{0}d->{3}bd'.format(axes_str, a_batch_str, b_batch_str, @@ -397,8 +402,8 @@ def tt_tt_flat_inner(tt_a, tt_b): b_core = tt_b.tt_cores[core_idx] # Simplest example of this operation: # if both arguments are TT-tensors, then it is - # res = tf.einsum('ac,aib,cid->bd', res, a_core, b_core) - res = tf.einsum(einsum_str, res, a_core, b_core) + # res = my_contract('ac,aib,cid->bd', res, a_core, b_core) + res = my_contract(einsum_str, res, a_core, b_core) return tf.squeeze(res) @@ -893,7 +898,7 @@ def multiply(tt_left, right, name='t3f_multiply'): right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1] if is_matrix: with tf.control_dependencies(dependencies): - curr_core = tf.einsum('{0}aijb,{1}cijd->{2}acijbd'.format(bs_str_left, + curr_core = my_contract('{0}aijb,{1}cijd->{2}acijbd'.format(bs_str_left, bs_str_right, output_str), a_core, b_core) curr_core = tf.reshape(curr_core, (-1, left_rank, shape[0][core_idx], @@ -903,7 +908,7 @@ def multiply(tt_left, right, name='t3f_multiply'): curr_core = tf.squeeze(curr_core, axis=0) else: with tf.control_dependencies(dependencies): - curr_core = tf.einsum('{0}aib,{1}cid->{2}acibd'.format(bs_str_left, + curr_core = my_contract('{0}aib,{1}cid->{2}acibd'.format(bs_str_left, bs_str_right, output_str), a_core, b_core) curr_core = tf.reshape(curr_core, (-1, left_rank, shape[0][core_idx], right_rank)) @@ -946,19 +951,19 @@ def frobenius_norm_squared(tt, differentiable=False, else: bs_str = '' if tt.is_tt_matrix(): - running_prod = tf.einsum('{0}aijb,{0}cijd->{0}bd'.format(bs_str), + running_prod = my_contract('{0}aijb,{0}cijd->{0}bd'.format(bs_str), tt.tt_cores[0], tt.tt_cores[0]) else: - running_prod = tf.einsum('{0}aib,{0}cid->{0}bd'.format(bs_str), + running_prod = my_contract('{0}aib,{0}cid->{0}bd'.format(bs_str), tt.tt_cores[0], tt.tt_cores[0]) for core_idx in range(1, tt.ndims()): curr_core = tt.tt_cores[core_idx] if tt.is_tt_matrix(): - running_prod = tf.einsum('{0}ac,{0}aijb,{0}cijd->{0}bd'.format(bs_str), + running_prod = my_contract('{0}ac,{0}aijb,{0}cijd->{0}bd'.format(bs_str), running_prod, curr_core, curr_core) else: - running_prod = tf.einsum('{0}ac,{0}aib,{0}cid->{0}bd'.format(bs_str), + running_prod = my_contract('{0}ac,{0}aib,{0}cid->{0}bd'.format(bs_str), running_prod, curr_core, curr_core) return tf.squeeze(running_prod, [-1, -2]) @@ -1100,7 +1105,7 @@ def bilinear_form(A, b, c, name='t3f_bilinear_form'): # experience it's even a little bit slower (but neglectable in general). einsum_str = '{0}aikb,cijd,{1}ejkf->{2}bdf'.format(b_bs_str, c_bs_str, out_bs_str) - res = tf.einsum(einsum_str, curr_core_1, curr_matrix_core, curr_core_2) + res = contract(einsum_str, curr_core_1, curr_matrix_core, curr_core_2, backend='tensorflow', optimize='optimal') for core_idx in range(1, ndims): curr_core_1 = b.tt_cores[core_idx] curr_core_2 = c.tt_cores[core_idx] @@ -1108,8 +1113,8 @@ def bilinear_form(A, b, c, name='t3f_bilinear_form'): einsum_str = '{2}ace,{0}aikb,cijd,{1}ejkf->{2}bdf'.format(b_bs_str, c_bs_str, out_bs_str) - res = tf.einsum(einsum_str, res, curr_core_1, - curr_matrix_core, curr_core_2) + res = contract(einsum_str, res, curr_core_1, + curr_matrix_core, curr_core_2, backend='tensorflow', optimize='optimal') # Squeeze to make the result a number instead of 1 x 1 for NON batch case # and to make the result a tensor of size diff --git a/t3f/riemannian.py b/t3f/riemannian.py index 649d3537..569339df 100644 --- a/t3f/riemannian.py +++ b/t3f/riemannian.py @@ -5,6 +5,12 @@ from t3f import shapes from t3f import decompositions +from opt_einsum import contract + + +def my_contract(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') + def project_sum(what, where, weights=None): """Project sum of `what` TTs on the tangent space of `where` TT. @@ -97,7 +103,7 @@ def project_sum(what, where, weights=None): tens_core = what.tt_cores[core_idx] right_tang_core = right_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sa{0}b,sbd,c{0}d->sac'.format(mode_str) - rhs[core_idx] = tf.einsum(einsum_str, tens_core, rhs[core_idx + 1], + rhs[core_idx] = my_contract(einsum_str, tens_core, rhs[core_idx + 1], right_tang_core) # Prepare lhs vectors. @@ -109,7 +115,7 @@ def project_sum(what, where, weights=None): tens_core = what.tt_cores[core_idx] left_tang_core = left_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sab,a{0}c,sb{0}d->scd'.format(mode_str) - lhs[core_idx + 1] = tf.einsum(einsum_str, lhs[core_idx], left_tang_core, + lhs[core_idx + 1] = my_contract(einsum_str, lhs[core_idx], left_tang_core, tens_core) # Left to right sweep. @@ -121,27 +127,27 @@ def project_sum(what, where, weights=None): if core_idx < ndims - 1: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str) - proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core) + proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) einsum_str = 'a{0}b,sbc->sa{0}c'.format(mode_str) - proj_core -= tf.einsum(einsum_str, left_tang_core, lhs[core_idx + 1]) + proj_core -= my_contract(einsum_str, left_tang_core, lhs[core_idx + 1]) if weights is None: einsum_str = 'sa{0}b,sbc->a{0}c'.format(mode_str) - proj_core = tf.einsum(einsum_str, proj_core, rhs[core_idx + 1]) + proj_core = my_contract(einsum_str, proj_core, rhs[core_idx + 1]) else: einsum_str = 'sa{0}b,sbc->sa{0}c'.format(mode_str, output_batch_str) - proj_core_s = tf.einsum(einsum_str, proj_core, rhs[core_idx + 1]) + proj_core_s = my_contract(einsum_str, proj_core, rhs[core_idx + 1]) einsum_str = 's{1},sa{0}c->{1}a{0}c'.format(mode_str, output_batch_str) - proj_core = tf.einsum(einsum_str, weights, proj_core_s) + proj_core = my_contract(einsum_str, weights, proj_core_s) if core_idx == ndims - 1: if weights is None: einsum_str = 'sab,sb{0}c->a{0}c'.format(mode_str) - proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core) + proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) else: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str, output_batch_str) - proj_core_s = tf.einsum(einsum_str, lhs[core_idx], tens_core) + proj_core_s = my_contract(einsum_str, lhs[core_idx], tens_core) einsum_str = 's{1},sa{0}c->{1}a{0}c'.format(mode_str, output_batch_str) - proj_core = tf.einsum(einsum_str, weights, proj_core_s) + proj_core = my_contract(einsum_str, weights, proj_core_s) if output_is_batch: # Add batch dimension of size output_batch_size to left_tang_core and @@ -275,7 +281,7 @@ def project(what, where): tens_core = what.tt_cores[core_idx] right_tang_core = right_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sa{0}b,sbd,c{0}d->sac'.format(mode_str) - rhs[core_idx] = tf.einsum(einsum_str, tens_core, rhs[core_idx + 1], + rhs[core_idx] = my_contract(einsum_str, tens_core, rhs[core_idx + 1], right_tang_core) # Prepare lhs vectors. @@ -287,7 +293,7 @@ def project(what, where): tens_core = what.tt_cores[core_idx] left_tang_core = left_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sab,a{0}c,sb{0}d->scd'.format(mode_str) - lhs[core_idx + 1] = tf.einsum(einsum_str, lhs[core_idx], left_tang_core, + lhs[core_idx + 1] = my_contract(einsum_str, lhs[core_idx], left_tang_core, tens_core) # Left to right sweep. @@ -299,21 +305,21 @@ def project(what, where): if core_idx < ndims - 1: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str) - proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core) + proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) einsum_str = 'a{0}b,sbc->sa{0}c'.format(mode_str) - proj_core -= tf.einsum(einsum_str, left_tang_core, lhs[core_idx + 1]) + proj_core -= my_contract(einsum_str, left_tang_core, lhs[core_idx + 1]) if output_is_batch: einsum_str = 'sa{0}b,sbc->sa{0}c'.format(mode_str) else: einsum_str = 'sa{0}b,sbc->a{0}c'.format(mode_str) - proj_core = tf.einsum(einsum_str, proj_core, rhs[core_idx + 1]) + proj_core = my_contract(einsum_str, proj_core, rhs[core_idx + 1]) if core_idx == ndims - 1: if output_is_batch: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str) else: einsum_str = 'sab,sb{0}c->a{0}c'.format(mode_str) - proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core) + proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) if output_is_batch: # Add batch dimension of size output_batch_size to left_tang_core and @@ -446,7 +452,7 @@ def project_matmul(what, where, matrix): tens_core = what.tt_cores[core_idx] right_tang_core = right_tangent_space_tens.tt_cores[core_idx] matrix_core = matrix.tt_cores[core_idx] - rhs[core_idx] = tf.einsum('bije,cikf,sdef,sajkd->sabc', matrix_core, + rhs[core_idx] = my_contract('bije,cikf,sdef,sajkd->sabc', matrix_core, right_tang_core, rhs[core_idx + 1], tens_core) # Prepare lhs vectors. # lhs[core_idx] is of size @@ -458,7 +464,7 @@ def project_matmul(what, where, matrix): left_tang_core = left_tangent_space_tens.tt_cores[core_idx] matrix_core = matrix.tt_cores[core_idx] # TODO: brutforce order of indices in lhs?? - lhs[core_idx + 1] = tf.einsum('bije,aikd,sabc,scjkf->sdef', matrix_core, + lhs[core_idx + 1] = my_contract('bije,aikd,sabc,scjkf->sdef', matrix_core, left_tang_core, lhs[core_idx], tens_core) # Left to right sweep. @@ -470,17 +476,17 @@ def project_matmul(what, where, matrix): right_tang_core = right_tangent_space_tens.tt_cores[core_idx] if core_idx < ndims - 1: - proj_core = tf.einsum('scjke,sabc,bijd->saikde', tens_core, + proj_core = my_contract('scjke,sabc,bijd->saikde', tens_core, lhs[core_idx], matrix_core) - proj_core -= tf.einsum('aikb,sbcd->saikcd', left_tang_core, + proj_core -= my_contract('aikb,sbcd->saikcd', left_tang_core, lhs[core_idx + 1]) - proj_core = tf.einsum('saikcb,sbcd->saikd', proj_core, rhs[core_idx + 1]) + proj_core = my_contract('saikcb,sbcd->saikd', proj_core, rhs[core_idx + 1]) if core_idx == ndims - 1: # d and e dimensions take 1 value, since its the last rank. # To make the result shape (?, ?, ?, 1), we are summing d and leaving e, # but we could have done the opposite -- sum e and leave d. - proj_core = tf.einsum('sabc,bijd,scjke->saike', lhs[core_idx], matrix_core, + proj_core = my_contract('sabc,bijd,scjke->saike', lhs[core_idx], matrix_core, tens_core) if output_is_batch: @@ -586,7 +592,7 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[0] curr_du_1 = curr_core_1[:, :, :, :, :right_size] curr_du_2 = curr_core_2[:, :, :, :, :right_size] - res = tf.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2) + res = my_contract('paijb,qaijb->pq', curr_du_1, curr_du_2) for core_idx in range(1, ndims): left_size = tt_ranks[core_idx] // 2 right_size = tt_ranks[core_idx + 1] // 2 @@ -594,14 +600,14 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[core_idx] curr_du_1 = curr_core_1[:, left_size:, :, :, :right_size] curr_du_2 = curr_core_2[:, left_size:, :, :, :right_size] - res += tf.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2) + res += my_contract('paijb,qaijb->pq', curr_du_1, curr_du_2) left_size = tt_ranks[-2] // 2 curr_core_1 = projected_tt_vectors_1.tt_cores[-1] curr_core_2 = projected_tt_vectors_2.tt_cores[-1] curr_du_1 = curr_core_1[:, left_size:, :, :, :] curr_du_2 = curr_core_2[:, left_size:, :, :, :] - res += tf.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2) + res += my_contract('paijb,qaijb->pq', curr_du_1, curr_du_2) else: # Working with TT-tensor, not TT-matrix. right_size = tt_ranks[1] // 2 @@ -609,7 +615,7 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[0] curr_du_1 = curr_core_1[:, :, :, :right_size] curr_du_2 = curr_core_2[:, :, :, :right_size] - res = tf.einsum('paib,qaib->pq', curr_du_1, curr_du_2) + res = my_contract('paib,qaib->pq', curr_du_1, curr_du_2) for core_idx in range(1, ndims): left_size = tt_ranks[core_idx] // 2 right_size = tt_ranks[core_idx + 1] // 2 @@ -617,14 +623,14 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[core_idx] curr_du_1 = curr_core_1[:, left_size:, :, :right_size] curr_du_2 = curr_core_2[:, left_size:, :, :right_size] - res += tf.einsum('paib,qaib->pq', curr_du_1, curr_du_2) + res += my_contract('paib,qaib->pq', curr_du_1, curr_du_2) left_size = tt_ranks[-2] // 2 curr_core_1 = projected_tt_vectors_1.tt_cores[-1] curr_core_2 = projected_tt_vectors_2.tt_cores[-1] curr_du_1 = curr_core_1[:, left_size:, :, :] curr_du_2 = curr_core_2[:, left_size:, :, :] - res += tf.einsum('paib,qaib->pq', curr_du_1, curr_du_2) + res += my_contract('paib,qaib->pq', curr_du_1, curr_du_2) return res From ff41f3327ba1efb754499307d468d488461bdd55 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Tue, 15 Oct 2019 15:53:42 +0100 Subject: [PATCH 14/20] filename typo --- docs/benchmark/{utils_tet.py => utils_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/benchmark/{utils_tet.py => utils_test.py} (100%) diff --git a/docs/benchmark/utils_tet.py b/docs/benchmark/utils_test.py similarity index 100% rename from docs/benchmark/utils_tet.py rename to docs/benchmark/utils_test.py From a60814498bb3bbd912c7880bad3352a7a8436e03 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Tue, 15 Oct 2019 16:12:52 +0100 Subject: [PATCH 15/20] make the rest of einsums opt_einsums --- t3f/batch_ops.py | 15 +++++++++++---- t3f/ops_test.py | 29 +++++++++++++++++++++++++++++ t3f/tensor_train.py | 11 +++++++++-- t3f/tensor_train_batch.py | 19 +++++++++++++------ 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/t3f/batch_ops.py b/t3f/batch_ops.py index f062e0e1..de3513b5 100644 --- a/t3f/batch_ops.py +++ b/t3f/batch_ops.py @@ -6,6 +6,13 @@ from t3f import ops +from opt_einsum import contract + + +def my_contract(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') + + def concat_along_batch_dim(tt_list, name='t3f_concat_along_batch_dim'): """Concat all TensorTrainBatch objects along batch dimension. @@ -168,12 +175,12 @@ def pairwise_flat_inner(tt_1, tt_2, matrix=None, curr_core_2 = tt_2.tt_cores[0] mode_string = 'ij' if tt_1.is_tt_matrix() else 'i' einsum_str = 'pa{0}b,qc{0}d->pqbd'.format(mode_string) - res = tf.einsum(einsum_str, curr_core_1, curr_core_2) + res = my_contract(einsum_str, curr_core_1, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = tt_1.tt_cores[core_idx] curr_core_2 = tt_2.tt_cores[core_idx] einsum_str = 'pqac,pa{0}b,qc{0}d->pqbd'.format(mode_string) - res = tf.einsum(einsum_str, res, curr_core_1, curr_core_2) + res = my_contract(einsum_str, res, curr_core_1, curr_core_2) else: # res[i, j] = tt_1[i] ^ T * matrix * tt_2[j] are_all_matrices = tt_1.is_tt_matrix() and tt_2.is_tt_matrix() @@ -221,13 +228,13 @@ def pairwise_flat_inner(tt_1, tt_2, matrix=None, curr_core_2 = tt_2.tt_cores[0] curr_matrix_core = matrix.tt_cores[0] # We enumerate the dummy dimension (that takes 1 value) with `k`. - res = tf.einsum('pakib,cijd,qekjf->pqbdf', curr_core_1, curr_matrix_core, + res = my_contract('pakib,cijd,qekjf->pqbdf', curr_core_1, curr_matrix_core, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = tt_1.tt_cores[core_idx] curr_core_2 = tt_2.tt_cores[core_idx] curr_matrix_core = matrix.tt_cores[core_idx] - res = tf.einsum('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1, + res = my_contract('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1, curr_matrix_core, curr_core_2) # Squeeze to make the result of size batch_size x batch_size instead of diff --git a/t3f/ops_test.py b/t3f/ops_test.py index f79fef14..415be6b0 100644 --- a/t3f/ops_test.py +++ b/t3f/ops_test.py @@ -9,6 +9,13 @@ from t3f import initializers +from opt_einsum import contract + + +def my_contract(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') + + class _TTTensorTest(): def testFullTensor2d(self): @@ -695,6 +702,7 @@ def testTTMatTimesTTMatBroadcasting(self): left_shape = (2, 3) sum_shape = (4, 3) right_shape = (4, 4) +<<<<<<< HEAD tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), tt_rank=3, batch_size=3, dtype=self.dtype) @@ -713,6 +721,27 @@ def testTTMatTimesTTMatBroadcasting(self): self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5) self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5, rtol=1e-5) +======= + with self.test_session() as sess: + tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), + tt_rank=3, batch_size=3, + dtype=self.dtype) + tt_mat_2 = initializers.random_matrix_batch((sum_shape, right_shape), + dtype=self.dtype) + # TT-batch by one element TT-batch + res_actual = ops.matmul(tt_mat_1, tt_mat_2) + res_actual = ops.full(res_actual) + # TT by TT-batch. + res_actual2 = ops.matmul(ops.transpose(tt_mat_2[0]), ops.transpose(tt_mat_1)) + res_actual2 = ops.full(ops.transpose(res_actual2)) + res_desired = my_contract('oij,jk->oik', ops.full(tt_mat_1), + ops.full(tt_mat_2[0])) + to_run = [res_actual, res_actual2, res_desired] + res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run) + self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5) + self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5, + rtol=1e-5) +>>>>>>> make the rest of einsums opt_einsums def testTranspose(self): # Transpose a batch of TT-matrices. diff --git a/t3f/tensor_train.py b/t3f/tensor_train.py index 79807004..6e54712b 100644 --- a/t3f/tensor_train.py +++ b/t3f/tensor_train.py @@ -4,6 +4,13 @@ from t3f import shapes +from opt_einsum import contract + + +def my_contract(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') + + class TensorTrain(TensorTrainBase): """Represents a Tensor Train object (a TT-tensor or TT-matrix). @@ -130,13 +137,13 @@ def __getitem__(self, slice_spec): if remainder is not None: # Add reminder from the previous collapsed cores to the current # core. - sliced_core = tf.einsum('ab,bid->aid', remainder, sliced_core) + sliced_core = my_contract('ab,bid->aid', remainder, sliced_core) remainder = None new_tt_cores.append(sliced_core) if remainder is not None: # The reminder obtained from collapsing the last cores. - new_tt_cores[-1] = tf.einsum('aib,bd->aid', new_tt_cores[-1], remainder) + new_tt_cores[-1] = my_contract('aib,bd->aid', new_tt_cores[-1], remainder) remainder = None # TODO: infer the output ranks and shape. return TensorTrain(new_tt_cores) diff --git a/t3f/tensor_train_batch.py b/t3f/tensor_train_batch.py index 976582ac..cde7d04b 100644 --- a/t3f/tensor_train_batch.py +++ b/t3f/tensor_train_batch.py @@ -6,6 +6,13 @@ from t3f import shapes +from opt_einsum import contract + + +def my_contract(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') + + class TensorTrainBatch(TensorTrainBase): """Represents a batch of Tensor Train objects (TT-tensors or TT-matrices). @@ -203,17 +210,17 @@ def _full_getitem(self, slice_spec): remainder = sliced_core else: if do_collapse_batch_dim: - remainder = tf.einsum('ab,bd->ad', remainder, sliced_core) + remainder = my_contract('ab,bd->ad', remainder, sliced_core) else: - remainder = tf.einsum('oab,obd->oad', remainder, sliced_core) + remainder = my_contract('oab,obd->oad', remainder, sliced_core) else: if remainder is not None: # Add reminder from the previous collapsed cores to the current # core. if do_collapse_batch_dim: - sliced_core = tf.einsum('ab,bid->aid', remainder, sliced_core) + sliced_core = my_contract('ab,bid->aid', remainder, sliced_core) else: - sliced_core = tf.einsum('oab,obid->oaid', remainder, + sliced_core = my_contract('oab,obid->oaid', remainder, sliced_core) remainder = None new_tt_cores.append(sliced_core) @@ -221,11 +228,11 @@ def _full_getitem(self, slice_spec): if remainder is not None: # The reminder obtained from collapsing the last cores. if do_collapse_batch_dim: - new_tt_cores[-1] = tf.einsum('aib,bd->aid', new_tt_cores[-1], + new_tt_cores[-1] = my_contract('aib,bd->aid', new_tt_cores[-1], remainder) else: - new_tt_cores[-1] = tf.einsum('oaib,obd->oaid', new_tt_cores[-1], + new_tt_cores[-1] = my_contract('oaib,obd->oaid', new_tt_cores[-1], remainder) remainder = None # TODO: infer the output ranks and shape. From 3e91ccd622bf35d020f22424c8fdd375767396cf Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Tue, 22 Oct 2019 23:14:13 +0100 Subject: [PATCH 16/20] fix running instructions --- docs/benchmark/run_all_autodiff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/benchmark/run_all_autodiff.py b/docs/benchmark/run_all_autodiff.py index 33355c7d..92c136b0 100644 --- a/docs/benchmark/run_all_autodiff.py +++ b/docs/benchmark/run_all_autodiff.py @@ -1,6 +1,6 @@ """ Running a suite of autodiff benchmarks. - run_all_autodiff.py --logs=autodiff_cpu.pkl 2> autodiff_cpu.stderr + python3 run_all_autodiff.py --logs=autodiff_logs.pkl 2> autodiff_logs.stderr """ import argparse From 556d2debfe91b0597a4b603f5ca3a468814f3bd3 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Wed, 23 Oct 2019 14:13:52 +0100 Subject: [PATCH 17/20] better hyperparams --- docs/benchmark/run_all_autodiff.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/benchmark/run_all_autodiff.py b/docs/benchmark/run_all_autodiff.py index 92c136b0..f26ca1e7 100644 --- a/docs/benchmark/run_all_autodiff.py +++ b/docs/benchmark/run_all_autodiff.py @@ -27,12 +27,12 @@ def run_single(case, n, d, r, R=None): pass -for n in [20, 100, 500]: +for n in [5, 10, 20]: for d in [10, 20, 40]: - for r in [5, 10, 20]: + for r in [5, 10, 20, 40]: run_single('completion', n, d, r) run_single('ExpMachines', n, d, r) - for R in [5, 10, 20]: + for R in [5, 10, 20, 40]: run_single('xAx', n, d, r, R) run_single('xABx', n, d, r, R) run_single('RayleighQuotient', n, d, r, R) From bbf55e95ac7f8f5e62d81c49658c7cc301cef8cd Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 28 Feb 2020 18:12:34 +0000 Subject: [PATCH 18/20] Fix some minor rebase problem --- t3f/ops_test.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/t3f/ops_test.py b/t3f/ops_test.py index 415be6b0..2c2029b9 100644 --- a/t3f/ops_test.py +++ b/t3f/ops_test.py @@ -702,7 +702,6 @@ def testTTMatTimesTTMatBroadcasting(self): left_shape = (2, 3) sum_shape = (4, 3) right_shape = (4, 4) -<<<<<<< HEAD tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), tt_rank=3, batch_size=3, dtype=self.dtype) @@ -721,27 +720,6 @@ def testTTMatTimesTTMatBroadcasting(self): self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5) self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5, rtol=1e-5) -======= - with self.test_session() as sess: - tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), - tt_rank=3, batch_size=3, - dtype=self.dtype) - tt_mat_2 = initializers.random_matrix_batch((sum_shape, right_shape), - dtype=self.dtype) - # TT-batch by one element TT-batch - res_actual = ops.matmul(tt_mat_1, tt_mat_2) - res_actual = ops.full(res_actual) - # TT by TT-batch. - res_actual2 = ops.matmul(ops.transpose(tt_mat_2[0]), ops.transpose(tt_mat_1)) - res_actual2 = ops.full(ops.transpose(res_actual2)) - res_desired = my_contract('oij,jk->oik', ops.full(tt_mat_1), - ops.full(tt_mat_2[0])) - to_run = [res_actual, res_actual2, res_desired] - res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run) - self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5) - self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5, - rtol=1e-5) ->>>>>>> make the rest of einsums opt_einsums def testTranspose(self): # Transpose a batch of TT-matrices. From 31d468d78e054eebd9a9dd4f0ad02aec86f16605 Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 28 Feb 2020 18:21:26 +0000 Subject: [PATCH 19/20] Move fast einsum to a centralized place --- t3f/batch_ops.py | 16 ++++------ t3f/ops.py | 39 ++++++++++-------------- t3f/ops_test.py | 7 ----- t3f/riemannian.py | 63 ++++++++++++++++++--------------------- t3f/tensor_train.py | 12 ++------ t3f/tensor_train_batch.py | 20 +++++-------- t3f/utils.py | 5 ++++ 7 files changed, 65 insertions(+), 97 deletions(-) diff --git a/t3f/batch_ops.py b/t3f/batch_ops.py index de3513b5..2f43f7df 100644 --- a/t3f/batch_ops.py +++ b/t3f/batch_ops.py @@ -4,13 +4,7 @@ from t3f.tensor_train_base import TensorTrainBase from t3f.tensor_train_batch import TensorTrainBatch from t3f import ops - - -from opt_einsum import contract - - -def my_contract(*args, **kargs): - return contract(*args, **kargs, backend='tensorflow', optimize='optimal') +from t3f import utils def concat_along_batch_dim(tt_list, name='t3f_concat_along_batch_dim'): @@ -175,12 +169,12 @@ def pairwise_flat_inner(tt_1, tt_2, matrix=None, curr_core_2 = tt_2.tt_cores[0] mode_string = 'ij' if tt_1.is_tt_matrix() else 'i' einsum_str = 'pa{0}b,qc{0}d->pqbd'.format(mode_string) - res = my_contract(einsum_str, curr_core_1, curr_core_2) + res = utils.einsum(einsum_str, curr_core_1, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = tt_1.tt_cores[core_idx] curr_core_2 = tt_2.tt_cores[core_idx] einsum_str = 'pqac,pa{0}b,qc{0}d->pqbd'.format(mode_string) - res = my_contract(einsum_str, res, curr_core_1, curr_core_2) + res = utils.einsum(einsum_str, res, curr_core_1, curr_core_2) else: # res[i, j] = tt_1[i] ^ T * matrix * tt_2[j] are_all_matrices = tt_1.is_tt_matrix() and tt_2.is_tt_matrix() @@ -228,13 +222,13 @@ def pairwise_flat_inner(tt_1, tt_2, matrix=None, curr_core_2 = tt_2.tt_cores[0] curr_matrix_core = matrix.tt_cores[0] # We enumerate the dummy dimension (that takes 1 value) with `k`. - res = my_contract('pakib,cijd,qekjf->pqbdf', curr_core_1, curr_matrix_core, + res = utils.einsum('pakib,cijd,qekjf->pqbdf', curr_core_1, curr_matrix_core, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = tt_1.tt_cores[core_idx] curr_core_2 = tt_2.tt_cores[core_idx] curr_matrix_core = matrix.tt_cores[core_idx] - res = my_contract('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1, + res = utils.einsum('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1, curr_matrix_core, curr_core_2) # Squeeze to make the result of size batch_size x batch_size instead of diff --git a/t3f/ops.py b/t3f/ops.py index bca4d572..85f4346a 100644 --- a/t3f/ops.py +++ b/t3f/ops.py @@ -8,13 +8,6 @@ from t3f import decompositions from t3f import initializers - -from opt_einsum import contract - - -def my_contract(*args, **kargs): - return contract(*args, **kargs, backend='tensorflow', optimize='optimal') - # TODO: add complexities to the comments. @@ -92,7 +85,7 @@ def _full_tt_batch(tt): for i in range(1, num_dims): res = tf.reshape(res, (batch_size, -1, ranks[i])) curr_core = tf.reshape(tt.tt_cores[i], (batch_size, ranks[i], -1)) - res = my_contract('oqb,obw->oqw', res, curr_core) + res = utils.einsum('oqb,obw->oqw', res, curr_core) if tt.is_tt_matrix(): intermediate_shape = [batch_size] for i in range(num_dims): @@ -168,7 +161,7 @@ def tt_tt_matmul(tt_matrix_a, tt_matrix_b): for core_idx in range(ndims): a_core = tt_matrix_a.tt_cores[core_idx] b_core = tt_matrix_b.tt_cores[core_idx] - curr_res_core = contract(einsum_str, a_core, b_core, backend='tensorflow', optimize='optimal') + curr_res_core = utils.einsum(einsum_str, a_core, b_core) res_left_rank = a_ranks[core_idx] * b_ranks[core_idx] res_right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1] @@ -228,7 +221,7 @@ def tt_dense_matmul(tt_matrix_a, matrix_b): curr_core = tt_matrix_a.tt_cores[core_idx] # On the k = core_idx iteration, after applying einsum the shape of data # becomes ik x (ik-1..., id-1, K, j0, ..., jk-1) x rank_k - data = my_contract('aijb,rjb->ira', curr_core, data) + data = utils.einsum('aijb,rjb->ira', curr_core, data) if core_idx > 0: # After reshape the shape of data becomes # (ik, ..., id-1, K, j0, ..., jk-2) x jk-1 x rank_k @@ -391,8 +384,8 @@ def tt_tt_flat_inner(tt_a, tt_b): b_core = tt_b.tt_cores[0] # Simplest example of this operation: # if both arguments are TT-tensors, then it is - # res = my_contract('aib,cid->bd', a_core, b_core) - res = my_contract(init_einsum_str, a_core, b_core) + # res = utils.einsum('aib,cid->bd', a_core, b_core) + res = utils.einsum(init_einsum_str, a_core, b_core) einsum_str = '{3}ac,{1}a{0}b,{2}c{0}d->{3}bd'.format(axes_str, a_batch_str, b_batch_str, @@ -402,8 +395,8 @@ def tt_tt_flat_inner(tt_a, tt_b): b_core = tt_b.tt_cores[core_idx] # Simplest example of this operation: # if both arguments are TT-tensors, then it is - # res = my_contract('ac,aib,cid->bd', res, a_core, b_core) - res = my_contract(einsum_str, res, a_core, b_core) + # res = utils.einsum('ac,aib,cid->bd', res, a_core, b_core) + res = utils.einsum(einsum_str, res, a_core, b_core) return tf.squeeze(res) @@ -898,7 +891,7 @@ def multiply(tt_left, right, name='t3f_multiply'): right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1] if is_matrix: with tf.control_dependencies(dependencies): - curr_core = my_contract('{0}aijb,{1}cijd->{2}acijbd'.format(bs_str_left, + curr_core = utils.einsum('{0}aijb,{1}cijd->{2}acijbd'.format(bs_str_left, bs_str_right, output_str), a_core, b_core) curr_core = tf.reshape(curr_core, (-1, left_rank, shape[0][core_idx], @@ -908,7 +901,7 @@ def multiply(tt_left, right, name='t3f_multiply'): curr_core = tf.squeeze(curr_core, axis=0) else: with tf.control_dependencies(dependencies): - curr_core = my_contract('{0}aib,{1}cid->{2}acibd'.format(bs_str_left, + curr_core = utils.einsum('{0}aib,{1}cid->{2}acibd'.format(bs_str_left, bs_str_right, output_str), a_core, b_core) curr_core = tf.reshape(curr_core, (-1, left_rank, shape[0][core_idx], right_rank)) @@ -951,19 +944,19 @@ def frobenius_norm_squared(tt, differentiable=False, else: bs_str = '' if tt.is_tt_matrix(): - running_prod = my_contract('{0}aijb,{0}cijd->{0}bd'.format(bs_str), + running_prod = utils.einsum('{0}aijb,{0}cijd->{0}bd'.format(bs_str), tt.tt_cores[0], tt.tt_cores[0]) else: - running_prod = my_contract('{0}aib,{0}cid->{0}bd'.format(bs_str), + running_prod = utils.einsum('{0}aib,{0}cid->{0}bd'.format(bs_str), tt.tt_cores[0], tt.tt_cores[0]) for core_idx in range(1, tt.ndims()): curr_core = tt.tt_cores[core_idx] if tt.is_tt_matrix(): - running_prod = my_contract('{0}ac,{0}aijb,{0}cijd->{0}bd'.format(bs_str), + running_prod = utils.einsum('{0}ac,{0}aijb,{0}cijd->{0}bd'.format(bs_str), running_prod, curr_core, curr_core) else: - running_prod = my_contract('{0}ac,{0}aib,{0}cid->{0}bd'.format(bs_str), + running_prod = utils.einsum('{0}ac,{0}aib,{0}cid->{0}bd'.format(bs_str), running_prod, curr_core, curr_core) return tf.squeeze(running_prod, [-1, -2]) @@ -1105,7 +1098,7 @@ def bilinear_form(A, b, c, name='t3f_bilinear_form'): # experience it's even a little bit slower (but neglectable in general). einsum_str = '{0}aikb,cijd,{1}ejkf->{2}bdf'.format(b_bs_str, c_bs_str, out_bs_str) - res = contract(einsum_str, curr_core_1, curr_matrix_core, curr_core_2, backend='tensorflow', optimize='optimal') + res = utils.einsum(einsum_str, curr_core_1, curr_matrix_core, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = b.tt_cores[core_idx] curr_core_2 = c.tt_cores[core_idx] @@ -1113,8 +1106,8 @@ def bilinear_form(A, b, c, name='t3f_bilinear_form'): einsum_str = '{2}ace,{0}aikb,cijd,{1}ejkf->{2}bdf'.format(b_bs_str, c_bs_str, out_bs_str) - res = contract(einsum_str, res, curr_core_1, - curr_matrix_core, curr_core_2, backend='tensorflow', optimize='optimal') + res = utils.einsum(einsum_str, res, curr_core_1, + curr_matrix_core, curr_core_2) # Squeeze to make the result a number instead of 1 x 1 for NON batch case # and to make the result a tensor of size diff --git a/t3f/ops_test.py b/t3f/ops_test.py index 2c2029b9..f79fef14 100644 --- a/t3f/ops_test.py +++ b/t3f/ops_test.py @@ -9,13 +9,6 @@ from t3f import initializers -from opt_einsum import contract - - -def my_contract(*args, **kargs): - return contract(*args, **kargs, backend='tensorflow', optimize='optimal') - - class _TTTensorTest(): def testFullTensor2d(self): diff --git a/t3f/riemannian.py b/t3f/riemannian.py index 569339df..779c445b 100644 --- a/t3f/riemannian.py +++ b/t3f/riemannian.py @@ -4,12 +4,7 @@ from t3f.tensor_train_batch import TensorTrainBatch from t3f import shapes from t3f import decompositions - -from opt_einsum import contract - - -def my_contract(*args, **kargs): - return contract(*args, **kargs, backend='tensorflow', optimize='optimal') +from t3f import utils def project_sum(what, where, weights=None): @@ -103,7 +98,7 @@ def project_sum(what, where, weights=None): tens_core = what.tt_cores[core_idx] right_tang_core = right_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sa{0}b,sbd,c{0}d->sac'.format(mode_str) - rhs[core_idx] = my_contract(einsum_str, tens_core, rhs[core_idx + 1], + rhs[core_idx] = utils.einsum(einsum_str, tens_core, rhs[core_idx + 1], right_tang_core) # Prepare lhs vectors. @@ -115,7 +110,7 @@ def project_sum(what, where, weights=None): tens_core = what.tt_cores[core_idx] left_tang_core = left_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sab,a{0}c,sb{0}d->scd'.format(mode_str) - lhs[core_idx + 1] = my_contract(einsum_str, lhs[core_idx], left_tang_core, + lhs[core_idx + 1] = utils.einsum(einsum_str, lhs[core_idx], left_tang_core, tens_core) # Left to right sweep. @@ -127,27 +122,27 @@ def project_sum(what, where, weights=None): if core_idx < ndims - 1: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str) - proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) + proj_core = utils.einsum(einsum_str, lhs[core_idx], tens_core) einsum_str = 'a{0}b,sbc->sa{0}c'.format(mode_str) - proj_core -= my_contract(einsum_str, left_tang_core, lhs[core_idx + 1]) + proj_core -= utils.einsum(einsum_str, left_tang_core, lhs[core_idx + 1]) if weights is None: einsum_str = 'sa{0}b,sbc->a{0}c'.format(mode_str) - proj_core = my_contract(einsum_str, proj_core, rhs[core_idx + 1]) + proj_core = utils.einsum(einsum_str, proj_core, rhs[core_idx + 1]) else: einsum_str = 'sa{0}b,sbc->sa{0}c'.format(mode_str, output_batch_str) - proj_core_s = my_contract(einsum_str, proj_core, rhs[core_idx + 1]) + proj_core_s = utils.einsum(einsum_str, proj_core, rhs[core_idx + 1]) einsum_str = 's{1},sa{0}c->{1}a{0}c'.format(mode_str, output_batch_str) - proj_core = my_contract(einsum_str, weights, proj_core_s) + proj_core = utils.einsum(einsum_str, weights, proj_core_s) if core_idx == ndims - 1: if weights is None: einsum_str = 'sab,sb{0}c->a{0}c'.format(mode_str) - proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) + proj_core = utils.einsum(einsum_str, lhs[core_idx], tens_core) else: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str, output_batch_str) - proj_core_s = my_contract(einsum_str, lhs[core_idx], tens_core) + proj_core_s = utils.einsum(einsum_str, lhs[core_idx], tens_core) einsum_str = 's{1},sa{0}c->{1}a{0}c'.format(mode_str, output_batch_str) - proj_core = my_contract(einsum_str, weights, proj_core_s) + proj_core = utils.einsum(einsum_str, weights, proj_core_s) if output_is_batch: # Add batch dimension of size output_batch_size to left_tang_core and @@ -281,7 +276,7 @@ def project(what, where): tens_core = what.tt_cores[core_idx] right_tang_core = right_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sa{0}b,sbd,c{0}d->sac'.format(mode_str) - rhs[core_idx] = my_contract(einsum_str, tens_core, rhs[core_idx + 1], + rhs[core_idx] = utils.einsum(einsum_str, tens_core, rhs[core_idx + 1], right_tang_core) # Prepare lhs vectors. @@ -293,7 +288,7 @@ def project(what, where): tens_core = what.tt_cores[core_idx] left_tang_core = left_tangent_space_tens.tt_cores[core_idx] einsum_str = 'sab,a{0}c,sb{0}d->scd'.format(mode_str) - lhs[core_idx + 1] = my_contract(einsum_str, lhs[core_idx], left_tang_core, + lhs[core_idx + 1] = utils.einsum(einsum_str, lhs[core_idx], left_tang_core, tens_core) # Left to right sweep. @@ -305,21 +300,21 @@ def project(what, where): if core_idx < ndims - 1: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str) - proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) + proj_core = utils.einsum(einsum_str, lhs[core_idx], tens_core) einsum_str = 'a{0}b,sbc->sa{0}c'.format(mode_str) - proj_core -= my_contract(einsum_str, left_tang_core, lhs[core_idx + 1]) + proj_core -= utils.einsum(einsum_str, left_tang_core, lhs[core_idx + 1]) if output_is_batch: einsum_str = 'sa{0}b,sbc->sa{0}c'.format(mode_str) else: einsum_str = 'sa{0}b,sbc->a{0}c'.format(mode_str) - proj_core = my_contract(einsum_str, proj_core, rhs[core_idx + 1]) + proj_core = utils.einsum(einsum_str, proj_core, rhs[core_idx + 1]) if core_idx == ndims - 1: if output_is_batch: einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str) else: einsum_str = 'sab,sb{0}c->a{0}c'.format(mode_str) - proj_core = my_contract(einsum_str, lhs[core_idx], tens_core) + proj_core = utils.einsum(einsum_str, lhs[core_idx], tens_core) if output_is_batch: # Add batch dimension of size output_batch_size to left_tang_core and @@ -452,7 +447,7 @@ def project_matmul(what, where, matrix): tens_core = what.tt_cores[core_idx] right_tang_core = right_tangent_space_tens.tt_cores[core_idx] matrix_core = matrix.tt_cores[core_idx] - rhs[core_idx] = my_contract('bije,cikf,sdef,sajkd->sabc', matrix_core, + rhs[core_idx] = utils.einsum('bije,cikf,sdef,sajkd->sabc', matrix_core, right_tang_core, rhs[core_idx + 1], tens_core) # Prepare lhs vectors. # lhs[core_idx] is of size @@ -464,7 +459,7 @@ def project_matmul(what, where, matrix): left_tang_core = left_tangent_space_tens.tt_cores[core_idx] matrix_core = matrix.tt_cores[core_idx] # TODO: brutforce order of indices in lhs?? - lhs[core_idx + 1] = my_contract('bije,aikd,sabc,scjkf->sdef', matrix_core, + lhs[core_idx + 1] = utils.einsum('bije,aikd,sabc,scjkf->sdef', matrix_core, left_tang_core, lhs[core_idx], tens_core) # Left to right sweep. @@ -476,17 +471,17 @@ def project_matmul(what, where, matrix): right_tang_core = right_tangent_space_tens.tt_cores[core_idx] if core_idx < ndims - 1: - proj_core = my_contract('scjke,sabc,bijd->saikde', tens_core, + proj_core = utils.einsum('scjke,sabc,bijd->saikde', tens_core, lhs[core_idx], matrix_core) - proj_core -= my_contract('aikb,sbcd->saikcd', left_tang_core, + proj_core -= utils.einsum('aikb,sbcd->saikcd', left_tang_core, lhs[core_idx + 1]) - proj_core = my_contract('saikcb,sbcd->saikd', proj_core, rhs[core_idx + 1]) + proj_core = utils.einsum('saikcb,sbcd->saikd', proj_core, rhs[core_idx + 1]) if core_idx == ndims - 1: # d and e dimensions take 1 value, since its the last rank. # To make the result shape (?, ?, ?, 1), we are summing d and leaving e, # but we could have done the opposite -- sum e and leave d. - proj_core = my_contract('sabc,bijd,scjke->saike', lhs[core_idx], matrix_core, + proj_core = utils.einsum('sabc,bijd,scjke->saike', lhs[core_idx], matrix_core, tens_core) if output_is_batch: @@ -592,7 +587,7 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[0] curr_du_1 = curr_core_1[:, :, :, :, :right_size] curr_du_2 = curr_core_2[:, :, :, :, :right_size] - res = my_contract('paijb,qaijb->pq', curr_du_1, curr_du_2) + res = utils.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2) for core_idx in range(1, ndims): left_size = tt_ranks[core_idx] // 2 right_size = tt_ranks[core_idx + 1] // 2 @@ -600,14 +595,14 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[core_idx] curr_du_1 = curr_core_1[:, left_size:, :, :, :right_size] curr_du_2 = curr_core_2[:, left_size:, :, :, :right_size] - res += my_contract('paijb,qaijb->pq', curr_du_1, curr_du_2) + res += utils.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2) left_size = tt_ranks[-2] // 2 curr_core_1 = projected_tt_vectors_1.tt_cores[-1] curr_core_2 = projected_tt_vectors_2.tt_cores[-1] curr_du_1 = curr_core_1[:, left_size:, :, :, :] curr_du_2 = curr_core_2[:, left_size:, :, :, :] - res += my_contract('paijb,qaijb->pq', curr_du_1, curr_du_2) + res += utils.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2) else: # Working with TT-tensor, not TT-matrix. right_size = tt_ranks[1] // 2 @@ -615,7 +610,7 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[0] curr_du_1 = curr_core_1[:, :, :, :right_size] curr_du_2 = curr_core_2[:, :, :, :right_size] - res = my_contract('paib,qaib->pq', curr_du_1, curr_du_2) + res = utils.einsum('paib,qaib->pq', curr_du_1, curr_du_2) for core_idx in range(1, ndims): left_size = tt_ranks[core_idx] // 2 right_size = tt_ranks[core_idx + 1] // 2 @@ -623,14 +618,14 @@ def pairwise_flat_inner_projected(projected_tt_vectors_1, curr_core_2 = projected_tt_vectors_2.tt_cores[core_idx] curr_du_1 = curr_core_1[:, left_size:, :, :right_size] curr_du_2 = curr_core_2[:, left_size:, :, :right_size] - res += my_contract('paib,qaib->pq', curr_du_1, curr_du_2) + res += utils.einsum('paib,qaib->pq', curr_du_1, curr_du_2) left_size = tt_ranks[-2] // 2 curr_core_1 = projected_tt_vectors_1.tt_cores[-1] curr_core_2 = projected_tt_vectors_2.tt_cores[-1] curr_du_1 = curr_core_1[:, left_size:, :, :] curr_du_2 = curr_core_2[:, left_size:, :, :] - res += my_contract('paib,qaib->pq', curr_du_1, curr_du_2) + res += utils.einsum('paib,qaib->pq', curr_du_1, curr_du_2) return res diff --git a/t3f/tensor_train.py b/t3f/tensor_train.py index 6e54712b..7fb14256 100644 --- a/t3f/tensor_train.py +++ b/t3f/tensor_train.py @@ -2,13 +2,7 @@ from t3f.tensor_train_base import TensorTrainBase from t3f import shapes - - -from opt_einsum import contract - - -def my_contract(*args, **kargs): - return contract(*args, **kargs, backend='tensorflow', optimize='optimal') +from t3f import utils class TensorTrain(TensorTrainBase): @@ -137,13 +131,13 @@ def __getitem__(self, slice_spec): if remainder is not None: # Add reminder from the previous collapsed cores to the current # core. - sliced_core = my_contract('ab,bid->aid', remainder, sliced_core) + sliced_core = utils.einsum('ab,bid->aid', remainder, sliced_core) remainder = None new_tt_cores.append(sliced_core) if remainder is not None: # The reminder obtained from collapsing the last cores. - new_tt_cores[-1] = my_contract('aib,bd->aid', new_tt_cores[-1], remainder) + new_tt_cores[-1] = utils.einsum('aib,bd->aid', new_tt_cores[-1], remainder) remainder = None # TODO: infer the output ranks and shape. return TensorTrain(new_tt_cores) diff --git a/t3f/tensor_train_batch.py b/t3f/tensor_train_batch.py index cde7d04b..0a180317 100644 --- a/t3f/tensor_train_batch.py +++ b/t3f/tensor_train_batch.py @@ -4,13 +4,7 @@ from t3f.tensor_train_base import TensorTrainBase from t3f.tensor_train import TensorTrain from t3f import shapes - - -from opt_einsum import contract - - -def my_contract(*args, **kargs): - return contract(*args, **kargs, backend='tensorflow', optimize='optimal') +from t3f import utils class TensorTrainBatch(TensorTrainBase): @@ -210,17 +204,17 @@ def _full_getitem(self, slice_spec): remainder = sliced_core else: if do_collapse_batch_dim: - remainder = my_contract('ab,bd->ad', remainder, sliced_core) + remainder = utils.einsum('ab,bd->ad', remainder, sliced_core) else: - remainder = my_contract('oab,obd->oad', remainder, sliced_core) + remainder = utils.einsum('oab,obd->oad', remainder, sliced_core) else: if remainder is not None: # Add reminder from the previous collapsed cores to the current # core. if do_collapse_batch_dim: - sliced_core = my_contract('ab,bid->aid', remainder, sliced_core) + sliced_core = utils.einsum('ab,bid->aid', remainder, sliced_core) else: - sliced_core = my_contract('oab,obid->oaid', remainder, + sliced_core = utils.einsum('oab,obid->oaid', remainder, sliced_core) remainder = None new_tt_cores.append(sliced_core) @@ -228,11 +222,11 @@ def _full_getitem(self, slice_spec): if remainder is not None: # The reminder obtained from collapsing the last cores. if do_collapse_batch_dim: - new_tt_cores[-1] = my_contract('aib,bd->aid', new_tt_cores[-1], + new_tt_cores[-1] = utils.einsum('aib,bd->aid', new_tt_cores[-1], remainder) else: - new_tt_cores[-1] = my_contract('oaib,obd->oaid', new_tt_cores[-1], + new_tt_cores[-1] = utils.einsum('oaib,obd->oaid', new_tt_cores[-1], remainder) remainder = None # TODO: infer the output ranks and shape. diff --git a/t3f/utils.py b/t3f/utils.py index 906664a0..7904e2b0 100644 --- a/t3f/utils.py +++ b/t3f/utils.py @@ -1,5 +1,10 @@ import numpy as np import tensorflow as tf +from opt_einsum import contract + + +def einsum(*args, **kargs): + return contract(*args, **kargs, backend='tensorflow', optimize='optimal') # TODO: substitute with native implementation when it's ready. From 6c98a1c32ec934d8efabc3fe4f259f1d0135e77d Mon Sep 17 00:00:00 2001 From: Alexander Novikov Date: Fri, 28 Feb 2020 18:25:25 +0000 Subject: [PATCH 20/20] another bad rebase merge fix --- t3f/ops_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t3f/ops_test.py b/t3f/ops_test.py index f79fef14..dbad6cf5 100644 --- a/t3f/ops_test.py +++ b/t3f/ops_test.py @@ -358,8 +358,8 @@ def testBilinearForm(self): self.assertAllClose(res_actual_val, np.squeeze(res_desired), atol=1e-5, rtol=1e-5) - def testQuadraticFormBatch(self): - # Test quadratic form for batch of tensors. + def testBilinearFormBatch(self): + # Test bilinear form for batch of tensors. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2)