From e843d0bfd3ef4f6693ce13047e287fa508106f32 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 8 Apr 2025 12:51:47 +0800 Subject: [PATCH 1/5] =?UTF-8?q?excuter(cpu/cuda):io=E7=B3=BB=E5=88=97?= =?UTF-8?q?=E7=AE=97=E5=AD=90=EF=BC=8Cprint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/deepx/tensorfunc/{print.hpp => io.hpp} | 6 +++--- excuter/op-mem-cuda/src/client/tfs.cpp | 2 +- .../tensorfunc/{print_miaobyte.hpp => io_miaobyte.hpp} | 8 ++++---- .../tf/print.hpp => op-mem-cuda/src/deepx/tf/io.hpp} | 8 ++++---- excuter/op-mem-cuda/test/tensorfunc/0_new.cpp | 2 +- excuter/op-mem-cuda/test/tensorfunc/1_cublas_add.cpp | 2 +- .../op-mem-cuda/test/tensorfunc/1_cublas_matmul.cpp | 2 +- excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp | 2 +- excuter/op-mem-ompsimd/src/client/tfs.cpp | 2 +- .../tensorfunc/{print_miaobyte.hpp => io_miaobyte.hpp} | 8 ++++---- .../print.hpp => op-mem-ompsimd/src/deepx/tf/io.hpp} | 10 +++++----- excuter/op-mem-ompsimd/test/op/1_mem.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp | 2 +- .../test/tensorfunc/6_tensor_broadcast.cpp | 2 +- .../test/tensorfunc/7_tensor_transpose.cpp | 2 +- .../op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp | 2 +- front/py/deepx/autograd/graph.py | 2 ++ 25 files changed, 41 insertions(+), 39 deletions(-) rename excuter/cpp-common/src/deepx/tensorfunc/{print.hpp => io.hpp} (78%) rename excuter/op-mem-cuda/src/deepx/tensorfunc/{print_miaobyte.hpp => io_miaobyte.hpp} (93%) rename excuter/{op-mem-ompsimd/src/deepx/tf/print.hpp => op-mem-cuda/src/deepx/tf/io.hpp} (92%) rename excuter/op-mem-ompsimd/src/deepx/tensorfunc/{print_miaobyte.hpp => io_miaobyte.hpp} (83%) rename excuter/{op-mem-cuda/src/deepx/tf/print.hpp => op-mem-ompsimd/src/deepx/tf/io.hpp} (91%) diff --git a/excuter/cpp-common/src/deepx/tensorfunc/print.hpp b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp similarity index 78% rename from excuter/cpp-common/src/deepx/tensorfunc/print.hpp rename to excuter/cpp-common/src/deepx/tensorfunc/io.hpp index 559f3912..17541932 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/print.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp @@ -1,5 +1,5 @@ -#ifndef DEEPX_TENSORFUNC_PRINT_HPP -#define DEEPX_TENSORFUNC_PRINT_HPP +#ifndef DEEPX_TENSORFUNC_IO_HPP +#define DEEPX_TENSORFUNC_IO_HPP #include "deepx/tensor.hpp" @@ -16,4 +16,4 @@ namespace deepx::tensorfunc{ } } -#endif // DEEPX_TENSORFUNC_PRINT_HPP +#endif // DEEPX_TENSORFUNC_IO_HPP diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp index 5a4e5540..44560d7f 100644 --- a/excuter/op-mem-cuda/src/client/tfs.cpp +++ b/excuter/op-mem-cuda/src/client/tfs.cpp @@ -1,7 +1,7 @@ #include "deepx/tf/arg.hpp" #include "deepx/tf/tf.hpp" #include "deepx/tf/new.hpp" -#include "deepx/tf/print.hpp" +#include "deepx/tf/io.hpp" #include "deepx/tf/init.hpp" #include "deepx/tf/elementwise_basic.hpp" #include "deepx/tf/elementwise_sqrt.hpp" diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/print_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp similarity index 93% rename from excuter/op-mem-cuda/src/deepx/tensorfunc/print_miaobyte.hpp rename to excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 98487d63..bcd568b1 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/print_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -1,5 +1,5 @@ -#ifndef DEEPX_TENSORFUNC_PRINT_DEFAULT_HPP -#define DEEPX_TENSORFUNC_PRINT_DEFAULT_HPP +#ifndef DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP +#define DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP #include #include @@ -8,7 +8,7 @@ #include #include #include "deepx/tensorfunc/authors.hpp" -#include "deepx/tensorfunc/print.hpp" +#include "deepx/tensorfunc/io.hpp" namespace deepx::tensorfunc { @@ -66,4 +66,4 @@ namespace deepx::tensorfunc }; } -#endif // DEEPX_TENSORFUNC_PRINT_DEFAULT_HPP \ No newline at end of file +#endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/print.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp similarity index 92% rename from excuter/op-mem-ompsimd/src/deepx/tf/print.hpp rename to excuter/op-mem-cuda/src/deepx/tf/io.hpp index 5746b435..6118471a 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/print.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -1,9 +1,9 @@ -#ifndef DEEPX_TF_PRINT_HPP -#define DEEPX_TF_PRINT_HPP +#ifndef DEEPX_TF_IO_HPP +#define DEEPX_TF_IO_HPP #include "deepx/tf/tf.hpp" -#include "deepx/tensorfunc/print.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" namespace deepx::tf { diff --git a/excuter/op-mem-cuda/test/tensorfunc/0_new.cpp b/excuter/op-mem-cuda/test/tensorfunc/0_new.cpp index 4ec44898..5896850c 100644 --- a/excuter/op-mem-cuda/test/tensorfunc/0_new.cpp +++ b/excuter/op-mem-cuda/test/tensorfunc/0_new.cpp @@ -1,7 +1,7 @@ #include "deepx/tensorfunc/init.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" diff --git a/excuter/op-mem-cuda/test/tensorfunc/1_cublas_add.cpp b/excuter/op-mem-cuda/test/tensorfunc/1_cublas_add.cpp index 9ab2a44c..5904b91c 100644 --- a/excuter/op-mem-cuda/test/tensorfunc/1_cublas_add.cpp +++ b/excuter/op-mem-cuda/test/tensorfunc/1_cublas_add.cpp @@ -1,7 +1,7 @@ #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/elementwise.hpp" #include "deepx/tensorfunc/elementwise_cublas_basic.hpp" using namespace deepx::tensorfunc; diff --git a/excuter/op-mem-cuda/test/tensorfunc/1_cublas_matmul.cpp b/excuter/op-mem-cuda/test/tensorfunc/1_cublas_matmul.cpp index cac9cae5..3a2ac4e0 100644 --- a/excuter/op-mem-cuda/test/tensorfunc/1_cublas_matmul.cpp +++ b/excuter/op-mem-cuda/test/tensorfunc/1_cublas_matmul.cpp @@ -1,7 +1,7 @@ #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/matmul.hpp" #include "deepx/tensorfunc/matmul_cublas.hpp" diff --git a/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp b/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp index 33ccab95..f18671c5 100644 --- a/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp +++ b/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp @@ -1,7 +1,7 @@ #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/changeshape_miaobyte.hpp" using namespace deepx::tensorfunc; using namespace deepx; diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index 0489f169..d81306ee 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -4,7 +4,7 @@ #include "deepx/tf/arg.hpp" #include "deepx/tf/new.hpp" #include "deepx/tf/init.hpp" -#include "deepx/tf/print.hpp" +#include "deepx/tf/io.hpp" #include "deepx/tf/changeshape.hpp" #include "deepx/tf/elementwise.hpp" #include "deepx/tf/tffactory.hpp" diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/print_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp similarity index 83% rename from excuter/op-mem-ompsimd/src/deepx/tensorfunc/print_miaobyte.hpp rename to excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp index e2e5e576..1a30bf40 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/print_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp @@ -1,5 +1,5 @@ -#ifndef DEEPX_TENSORFUNC_PRINT_MIAOBYTE_HPP -#define DEEPX_TENSORFUNC_PRINT_MIAOBYTE_HPP +#ifndef DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP +#define DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP #include @@ -7,7 +7,7 @@ #include "stdutil/vector.hpp" #include "stdutil/print.hpp" #include "deepx/tensorfunc/authors.hpp" -#include "deepx/tensorfunc/print.hpp" +#include "deepx/tensorfunc/io.hpp" namespace deepx::tensorfunc { @@ -35,4 +35,4 @@ namespace deepx::tensorfunc } }; } -#endif // DEEPX_TENSORFUNC_PRINT_DEFAULT_HPP \ No newline at end of file +#endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tf/print.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp similarity index 91% rename from excuter/op-mem-cuda/src/deepx/tf/print.hpp rename to excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index 5746b435..ba180f3d 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/print.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -1,9 +1,9 @@ -#ifndef DEEPX_TF_PRINT_HPP -#define DEEPX_TF_PRINT_HPP +#ifndef DEEPX_TF_IO_HPP +#define DEEPX_TF_IO_HPP #include "deepx/tf/tf.hpp" -#include "deepx/tensorfunc/print.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" namespace deepx::tf { @@ -62,4 +62,4 @@ namespace deepx::tf } }; } -#endif +#endif // DEEPX_TF_IO_HPP diff --git a/excuter/op-mem-ompsimd/test/op/1_mem.cpp b/excuter/op-mem-ompsimd/test/op/1_mem.cpp index 327210f5..6433d086 100644 --- a/excuter/op-mem-ompsimd/test/op/1_mem.cpp +++ b/excuter/op-mem-ompsimd/test/op/1_mem.cpp @@ -3,7 +3,7 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" using namespace deepx::mem; diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp index 128c18d1..8a7f34ca 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp @@ -5,7 +5,7 @@ #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/file.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp index 969a0ad4..492d4e86 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp @@ -4,7 +4,7 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/file.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp index 11df39bc..ec82a0c9 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp @@ -1,7 +1,7 @@ #include #include "deepx/tensor.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/file.hpp" int main(){ diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp index 784f642c..35a57404 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp @@ -4,7 +4,7 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/elementwise.hpp" #include "deepx/tensorfunc/elementwise_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init.hpp" #include "tensorutil.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp index 3921c69d..16072397 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp @@ -3,7 +3,7 @@ #include #include -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/new.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp index 015e009f..a2438adf 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp @@ -3,7 +3,7 @@ #include "deepx/tensorfunc/elementwise_miaobyte.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/authors.hpp" #include "tensorutil.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp index 4e469d58..c93e42d9 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp @@ -4,7 +4,7 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/elementwise.hpp" #include "deepx/tensorfunc/elementwise_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" #include "tensorutil.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp index e57b91ef..0cbd2b86 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp @@ -3,7 +3,7 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/elementwise.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init.hpp" #include "deepx/tensorfunc/authors.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp index 0a3f0949..c1ce026a 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp @@ -11,7 +11,7 @@ #include "deepx/shape_reduce.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/file.hpp" #include diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp index 2b7c6ebb..6658be10 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp @@ -2,7 +2,7 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/changeshape.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensorfunc/elementwise.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp index 90188489..d843b6e1 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp @@ -6,7 +6,7 @@ #include "deepx/tensorfunc/changeshape_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/authors.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "stdutil/vector.hpp" #include "tensorutil.hpp" #include "deepx/shape_transpose.hpp" diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp index 3a6bafdc..9456e408 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp @@ -9,7 +9,7 @@ #include "deepx/shape_concat.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/print_miaobyte.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" #include "stdutil/vector.hpp" #include "deepx/mem/mem.hpp" #include "deepx/mem/mem_ompsimd.hpp" diff --git a/front/py/deepx/autograd/graph.py b/front/py/deepx/autograd/graph.py index e5b0f86e..332407a3 100644 --- a/front/py/deepx/autograd/graph.py +++ b/front/py/deepx/autograd/graph.py @@ -6,6 +6,7 @@ class Graph: # 类属性存储默认实例 _default_graph = None + @classmethod def get_default(cls): """获取或创建默认计算图(线程不安全)""" @@ -87,5 +88,6 @@ def graph_method(f): return f + # 初始化默认图 Graph._default_graph = Graph() \ No newline at end of file From 75246b24ca73a6c7c8f409386a99038c1eaf8b72 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 8 Apr 2025 16:06:39 +0800 Subject: [PATCH 2/5] =?UTF-8?q?cpp-common:io=E7=B3=BB=E5=88=97=E7=AE=97?= =?UTF-8?q?=E5=AD=90=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cpp-common/src/deepx/tensorfunc/io.hpp | 20 ++++++++++ .../src/deepx/tensorfunc/reduce.hpp | 39 ++++++++++--------- .../op-mem-cuda/src/deepx/tensorfunc/file.hpp | 18 --------- .../src/deepx/tensorfunc/shape.hpp | 21 ---------- 4 files changed, 40 insertions(+), 58 deletions(-) delete mode 100644 excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp delete mode 100644 excuter/op-mem-cuda/src/deepx/tensorfunc/shape.hpp diff --git a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp index 17541932..48ae2d44 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp @@ -14,6 +14,26 @@ namespace deepx::tensorfunc{ void print(const Tensor &t, const std::string &f=""){ printDispatcher::print(t, f); } + + template + struct saveDispatcher{ + static void save(Tensor &tensor,const std::string &path,int filebegin=0,int fileend=-1)=delete; + }; + + template + void save(Tensor &tensor,const std::string &path,int filebegin=0,int fileend=-1){ + saveDispatcher::save(tensor, path, filebegin, fileend); + } + + template + struct loadDispatcher{ + static Tensor load(const std::string &path,int filebegin=0,int fileend=-1)=delete; + }; + + template + Tensor load(const std::string &path,int filebegin=0,int fileend=-1){ + return loadDispatcher::load(path, filebegin, fileend); + } } #endif // DEEPX_TENSORFUNC_IO_HPP diff --git a/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp b/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp index c9f3b2a7..56c86dbd 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp @@ -7,50 +7,51 @@ namespace deepx::tensorfunc { + + template - struct reducesumDispatcher + struct reducemaxDispatcher { - static void reducesum(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void reducemax(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; }; template - void reducesum(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void reducemax(const Tensor &A, const int axis,const bool keepdims, Tensor &B) { - reducesumDispatcher::reducesum(A, axis, keepdims, B); + reducemaxDispatcher::reducemax(A, axis, keepdims, B); } template - struct reduceprodDispatcher + struct reduceminDispatcher { - static void reduceprod(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void reducemin(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; }; - template - void reduceprod(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void reducemin(const Tensor &A, const int axis,const bool keepdims, Tensor &B) { - reduceprodDispatcher::reduceprod(A, axis, keepdims, B); + reduceminDispatcher::reducemin(A, axis, keepdims, B); } - + template - struct reducemaxDispatcher + struct sumDispatcher { - static void reducemax(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void reducesum(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; }; template - void reducemax(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void sum(const Tensor &A, const int axis,const bool keepdims, Tensor &B) { - reducemaxDispatcher::reducemax(A, axis, keepdims, B); + sumDispatcher::sum(A, axis, keepdims, B); } template - struct reduceminDispatcher + struct prodDispatcher { - static void reducemin(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void prod(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; }; + template - void reducemin(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void prod(const Tensor &A, const int axis,const bool keepdims, Tensor &B) { - reduceminDispatcher::reducemin(A, axis, keepdims, B); + prodDispatcher::prod(A, axis, keepdims, B); } - } #endif // DEEPX_TENSORFUNC_REDUCE_HPP diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp deleted file mode 100644 index 62695e64..00000000 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef DEEPX_TENSORFUNC_FILE_HPP -#define DEEPX_TENSORFUNC_FILE_HPP - -#include -#include - -#include -namespace deepx::tensorfunc -{ - template - void save(Tensor &tensor,const std::string &path); - - template - Tensor load(const std::string &path); - -} - -#endif \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/shape.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/shape.hpp deleted file mode 100644 index 3ad23019..00000000 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/shape.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef DEEPX_TENSORFUNC_SHAPE_HPP -#define DEEPX_TENSORFUNC_SHAPE_HPP - -#include -#include - -#include "deepx/tensor.hpp" -#include "deepx/tensorfunc/new.hpp" -namespace deepx::tensorfunc -{ - template - void transpose(const Tensor &tensor, Tensor &result, const std::vector &dimOrder); - - template - void concat(const std::vector *> &tensors, const int axis, Tensor &result); - - template - void split(const Tensor &tensor, const int axis, std::vector *> &results); -} - -#endif // DEEPX_TENSORFUNC_TRANSPOSE_HPP \ No newline at end of file From 156814dd59e06a304c2b109d2187af3c0d33bf76 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 8 Apr 2025 22:18:42 +0800 Subject: [PATCH 3/5] =?UTF-8?q?excuter(cpu/cuda):io=20save/load=E7=AE=97?= =?UTF-8?q?=E5=AD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/excuter/deepx.op.drawio | 141 ++++++++----- doc/excuter/deepx.op.drawio.svg | 4 - doc/excuter/deepx.op.jpg | Bin 0 -> 59840 bytes .../cpp-common/src/deepx/tensorfunc/io.hpp | 12 +- .../src/deepx/tensorfunc/reduce.hpp | 24 +-- .../src/deepx/tensorfunc/io_miaobyte.hpp | 98 +++++++-- .../src/deepx/tensorfunc/reduce.hpp | 23 --- .../src/deepx/tensorfunc/reduce_miaobyte.hpp | 68 +++++++ .../src/deepx/tensorfunc/file.hpp | 48 ----- .../src/deepx/tensorfunc/io_miaobyte.hpp | 60 ++++++ .../src/deepx/tensorfunc/reduce.hpp | 188 ----------------- .../src/deepx/tensorfunc/reduce_miaobyte.hpp | 190 ++++++++++++++++++ .../test/tensorfunc/2_tensor_new.cpp | 7 +- .../test/tensorfunc/2_tensor_range.cpp | 6 +- .../test/tensorfunc/3_tensor_print.cpp | 4 +- .../test/tensorfunc/4_tensor_matmul.cpp | 9 +- .../test/tensorfunc/5_tensor_sum.cpp | 10 +- 17 files changed, 528 insertions(+), 364 deletions(-) delete mode 100644 doc/excuter/deepx.op.drawio.svg create mode 100644 doc/excuter/deepx.op.jpg delete mode 100644 excuter/op-mem-cuda/src/deepx/tensorfunc/reduce.hpp create mode 100644 excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp delete mode 100644 excuter/op-mem-ompsimd/src/deepx/tensorfunc/file.hpp delete mode 100644 excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce.hpp create mode 100644 excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp diff --git a/doc/excuter/deepx.op.drawio b/doc/excuter/deepx.op.drawio index e30564a9..86117f1f 100644 --- a/doc/excuter/deepx.op.drawio +++ b/doc/excuter/deepx.op.drawio @@ -1,79 +1,122 @@ - + - - + + - - - - - + + - - + + - - + + - - + + - - - - - - + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + - - + + - - + + - + + + + + + + + + + + + + + + + - - + + + + + + + + + - - + + + + - - + + + + + + + + - - + + - - + + diff --git a/doc/excuter/deepx.op.drawio.svg b/doc/excuter/deepx.op.drawio.svg deleted file mode 100644 index 8c207b90..00000000 --- a/doc/excuter/deepx.op.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
excuter 
cuda
excuter 
cpu
excuter 
cuda
class Op{
name string;
vector<string> args;
vector<string> returns;
bool grad;
vector<string> args_grad;
vector<string> returns_grad;

virusal void forward();
virsual void backward();
}

openblas的matmul实现
template <T >
class Matmul_cblas:Op
cblas实现

tensorfunc/elementwise.hpp

template<T>
void matmul_cblas(const Tensor<T> &a, const Tensor<T> &b, Tensor<T> &c)

cblas对double的特化

template <>
void matmul<double>(const Tensor<double> &a, const Tensor<double> &b, Tensor<double> &c)
某auther a的实现
template <T >
class Matmul_authora:Op
默认的matmul
会选其中一个作为默认
template <T >
class Matmul:Op
某auther a的实现

tensorfunc/elementwise.hpp

template<T>
void matmul_authora(const Tensor<T> &a, const Tensor<T> &b, Tensor<T> &c)

authora对double的特化

template <>
  void matmul_authora<double>(const Tensor<double> &a, const Tensor<double> &b, Tensor<double> &c)
tensorfunc<T>
tensorfunc 特化
Op
\ No newline at end of file diff --git a/doc/excuter/deepx.op.jpg b/doc/excuter/deepx.op.jpg new file mode 100644 index 0000000000000000000000000000000000000000..85d4d9bc8e79111cb835b7340aab5b9eb7009049 GIT binary patch literal 59840 zcmd432UwHKwlEy~MnyLwy=^HXAR=Hux?6$JkrF};8$yxLB%w*&>K3FV7?4gtDFO)) z0|p2XQ0cuUbdcVA?_a#YD0+Q>OrcQ}hquWc*a0=A%c}j}7(JHFebfMrZ)g;p|-iz}3wYVF-Hoi>aCUFBiW3 z?Hk`|HZbJV@9)0=dcE7U@3jK}-Qs^y<{xD<*x4gt^a88&KS2b&ae8O3)A8$$f5UHm z$8GO1)vFV1Hb@x=$I7XE

b212c1UPg0JA8khp~KnpXTQTw=g*%z&+ya5 ziwr+8FkHNR_436_S1vIyFfuV-x%%_ZOg~?|e2w|q&&+iE=kJxA`u@!`XMdu9@#jkn zm*~0wML78kVEXCIgL8amPVoayGo3oabn2uA!1JBg&Yb!V|Ne6?oIC&1*)yjZzLSl9 z2An#3`qb&uXD^*UbN0f8Qw)Gpr_Y={cm4v?wV#-;-|(Tbu(I>ZLQLc0i@Vt*@0-Mu z1f+gtFtG6ul=go9_EV?Oy@!tsZC~94V&sfqMPBKJ^h&S)y{&(5^LsAS|Lwv5%^d(- zK68rR*)vQ46~J-s@BdEt2XC}n^kP6sBU@<%!$&*dkV`AGkpI-UKUb>@Ch9(&U^ zXU=}BMqiBXJ$STt^CbZ!OP5&Ss1V;}M8HBNpYM)<)CnM|4rH0@U3~%=pZfsIM4SL> zEi*!u0;1Xqtj#|h>AuDvbmroJi)B8xvc#VN{7wKBxaXpK!oH>l_tqdoLq4-n(LS%D ziu73mTkC_uzyPgE%@J1RRz>iREYH5cBz6q8*j;Ol^cYPCKe$Htq3r7)D~Wx47XM>e z_++b@&(ziqr-ATNiD#g2AY0ulUsh&rNP8wJdvzytR9Y*xzHPRQSn%8E2Q?!3E(SQLcGnkY0AVUSnvdJNqfO3q)nXOAU-X7 zVra^G2bZS=4j#|=8cg`wdxaeAzsVE0Y`q1!qkIl*y^F))P5>?c+H;pS=A`7h+FzSz zy@|Yc=x-TuylEVYlQ?{g`<>rkpEIcDW8BX2x$1mYo8O|RAL-{SM%LNmC2?~CFRH+3 zQkHX&9g&2a0iuGDfolF2l4=(dEQ4xamNxq?W|Cme2=Y5?M(P=(uZSJ*| zN=`@nrk%}SwnLf7$#2ewW3R{W8pmlKN<7_MFDI`*kv|xs$a{h7AK=dE{C(2?6YM;( zF`24&uLGG(ZDJnPQ3L5R-eSuvxhZq!V&kW^p;Mar&#NJa@}ojr#)B zdfzu#$ozWE0xZRpx2xz525WtK(tu}%*-F5hw#%IS{Zm(lXp*PK*A@0s-1L+ zeRf5fK`Q&Y=|b;Dv32b;vtg~S$^3RVU3h(rz*vBxN+rk`1Jh8<0&9%y`YTurQA<+@ z$2|;o`QLmn1rL_{@#R-`hr0>z0`p`@Pi5AYoKf%bs{FhEWa_??&*DRlG0z4E&Z#t& zUc+gXy3es;DesI9@y|uy0fY`?$`O|>j?YAtwpsbbS%|%CT9g`XH%g^?S8JI+&16J# zr2BYATiNDe=P|{=y(kF(smB*&?B!vg^Tgl`Xxq-JDy|8$Xe$uVEaY^q{41N_pWzCp6 znWV1-ZP6Z>HM^)#W`YO%i2-I;AFZb}m%A(I$k#Kdg{RK~`@Kb`hE~l)-AG-{_*(Fd zn1-Z6xlXNukGqrVCCw(a{P*jQhsjLHtNn4Q#ly3SaLf|B4#$?DLKXlYzlC`mbF^5?}Q5raU{_e|)E?p_6oX$FgZTuTNoK)X!F| z%Uuf=t(U)E&P{b0HfT&NALmLy$X)&bGSABH{n|6Qg@RK2d-AoNT3cXi-Ot)$V@Qej||-MglT8j~5sdI3MX2 zCkyaNWUCf{v5kCjCjg^^q%gc}t)uA7z2OMV)BCPQeBnK%2Mpz-?-BXQ(t~l$gneBN z2fI!ZO7&f(WiwY*OL!haB1gUl0Em4$+9jdj{~&u0avH1&VLamBDEV|TIQHo;;0ON% z-v9tw6~e)nxMdel0MO&%Q(z$|bJZ&}*LE-=*zI@R?|@UNe(BvMBdaD?xiIb~c0Z#@ zak>0uq)j-`pUQj$A>rrcCXCmzK=As>e3(~0Qrwy6_fXg9xbo6RaY@`-`doJe=dO!P zS-p_s63!4t+C%f~C1wc;n2hUNW#8u6KshPN$H@(F>WI&sgPdm-XBUVS#7EI@_ax^F2EjYm5$Y z(#{m*!_>U};)y)CRCgMGyJUNF$R+gbZ!|-K>(RS2qa)TxQ)58_mP10eY$$BXt?(X^Pn);vk_Qr;H(= zi@pcXELjoLcf>jx&z0}S=FS#13wBgi59PfFNAouiY@&EN`8GZ$PXzonv+5fVsC2+n zzdQUAh#eeDUo4nSNOAVq4JexP3{1prSzMd}aS6D%ZE$gAA=CF-T$#fw9+^#x7pKY^ zbWOOG-^>jiaGi3UQn>k?e7zWBV(+aw-6&wK+c33ZkQ0szr*NVVdRGTSY6o}q%_m)( zFJMB2ldIIHWG9A&ZXsyV!}B_tIHjn9196we>0(Xx>1-Di+&dYaTbJOf?a;6kE0yR^ zzWjlU`H#Yb?P#xn`&)%XjRKRMGDtqz1SHoylr!>r)cWz@?{qF?fc&vC1`9b#TNoXD zCz^@>>a$9$4rV=YvVEq)TbKkUF0n(DHW_((lEi%u1 zzQfg}$Ju_aF}@u4Y+&0XQGUZk+{OL9UPWtIejJx8hO_y~qPj*NYEjL{WaYk|#mF{K z)ApIGwNy9Tt)VQPEA>G<&vSta>)kJU@KAs0t<@x)X}C)IFu$!=fMV4@?%-s5lp== zcOH1#rf?S<3_R_|bSOBulV~>F^Q7{;CL#}-cr~EuBiW!x42G!7A-}HpILySy{<8(qk1p0*6+k3OHqL?==yF)+{BcId7olhUynNs*6d?$APRSva)Eg9)%v9$yXt=Q8$@zE191HN-9d`|i<2hq+KqOf(o0+_wq^+ia=7m zK|(PrpSH{NWtRH$o3Q4zY`CV)Y|k8(6Qu@*;>v%fpJii%2cixc*4n8xwZ(2EL3`Tde)hB6PR@EGCP9BZY zVrE$}q$nac63BHA>SZQ?vBhxq1i+_Y2NpvS16Y}FXSGdNYd>_3ZV#8Jgyai}4W1Vk z`Tf1NfE(w>UmvkW<(n15C0ht$?}>Vv{GejhC_FdSxh&n!*S-2fd3{@yZJ$W-bi1`| zS`#FdJFC&DSLfA%RIvtLWAcR@tzr?a$#G37$lrg63if()DhqMpibwA)eRJB}%hVw% zUKdnZtWFh5%50*>mqW=)$gL_Y^9(2>z?GM4JIaKpIpYpQ|X;!ym#wtJ^JL$>*KXq@^n9RM|8VL#D>Q5 zYl?Z9$(Iu_Y`fuje((JhEQGLJb!jcj^k%-b`4YO$f&+`GOtkSQz{c}gfw1~4lU6m%&(h$MtT&6<9 z4~!)D(Wm`S>Q?3tn%K|Z<lqJJdaFtveBZ^B*MpWyZMTCNJ$dcU+Azp9I;#+GPC zd@Rlx%)k^yl^B9y8OZb^bB*aJaum`M!H|Z9PN$Aqbn|aw^E+a- zaNof@G7Q4e_k)}}4Ggvy+AzmH+U=sBpOj)U~n4**A)R3qO4iN6mcXG zO`fbCK4*5ge^>XKGNkPUp!5cB+3U5m>~%F(QTex$qinN2Q82o!y7 zdEO(@1*_HaGLb}~9PaRXP$&D0j}Jfp~V=T&^9TeZ6((nW&L;SCf)bl41W^)QqWP+ zcq6JSLk*-1ku8)}8hJQwwPI%bpox>5~nY`?$B?ikL;9Q3B|9 zk*N@^6?wKQVQ&0D!%$t_^j5I(vku<^-iZKSZ}E7hJdn|i^ABAz4b^Pz?7Z!>ScJ*M zoo#;cC;TXO)RNvv&g&_#_2rZ^HAXhKoMYyQx~Mwm?JYmjREf<}6bD>GFo_qp1qKId zEo-uvsOLEiOm9Ko<5$}tZ>FcO81?R*vCE2!XW~64c1K=h!4R$ubtHFE*u-pj_TH%1{jjV?!BQz*9zE$n@*uxqeVCb*teC?&PmhhbP&l_ zsgD0yIrgG<{;>Ybt_{O#Wc(n!+l>-c+-!0!SZ2_{(;yFp?GmUc_EKBYd){8|`1*?d zEf^8RNlOQtyPgy*>BFBaSZobCQaog{V|5sqo}MvLAda; zs_V9N`u;w#`5=}t;omD^P+`?&l}Ol2vm%Lc_z0xdE_SHS@GEV8_}Y=)&!j@F^p>fn zTb?K^^1``RruVLBOmH1Qjo8R@$#(mdV=wLAbT`QGY34f=QCYR(7Q~WbKF-CVXwEt>!zEC;}v6-v+aYu+1C5DbG_MwY{Vd}$_%#WErh|CMMlt2)Ffu%&EC*qs8Vn}X1d$aEP(g3xPhU5U%RAv`aB3R&s z8B`WO-6^f<6ToV^LdGD73bgVE(bfjgxe*aOuWNek&An{rbGcphA^A-O2*L_P4%`O* z4v(ci|N9jF!?)n^^$%gG+`9*-qlq+9B%`jw0aHUk$g=#sY+KoEx^XdDgNLdVsL~0R z7o{J7a>`^S4I{TX&P$((B1V(NhEVE{F1ukCsDYaE@T98H zd}rYgPuVz~_DcD#fX-!)5sN4MSvH_UkJfG_H@~!80xnaq@GQ%cU za?!P}KDWEckzs6jXHXD(w%I#)08Q{Cz7MPH-^rB2`dF({ayNTFd7MyF5mg#mr$SfI zjRL^C@tB>TS^njSMu{hYfg6NL*%QF!XQ}+X>4B)wfQH9)V+-o_nt~gUk3)at(10v$ zbLTjQD9k-!)Ig2L$9H|RV-8jNg;^lWThFRK=|m2vtu5{{VMo`O_4iVTWh0Q1ZkL+4 z(+br-F6rXtP5a=zUI7jlKVex-aSvzY+M?CeGvn;QnAtkgwVS~q=YILql>V6bhla@? z3;s*uf2rAD`?urX1>n>8(1Oz=UV))b!@ep}+n1>$4@Q)P1>Vw}EJ!L2?a?rWsc2cH z;XIf*7ScmY7ta0pT|i95?Z6~o{IZ+ECMYB=Bd_WcENM~=#7|ddL^%mUi``=tT#M!CM_i9=lH$1r z8rqGdx$yC54C} zOv~8)6s`!|xab0Od~rb&s90n@i)o39ev*+x)YwsG%}A-ia3@>8T8fI^PPsA{jM>hE zI&GGqUS0)ttQi`a-pIzJUZFzz;>qEd5@OElJonr-OwiN6?!o_%qtyFJ*sqg_*tvs-Vu-e(L49xq{3JK zq3i8b^D&2_l&@?1uIqT@vQPC4=On_6hwq%*RI?wJA-hQN?G4Z9MuBvQ(lP=c+eG)5 z;j$lJ`|x)e?3w|XniosAt(T$)w3bLcdVm} zAcHMNa1^XE{QT&;-YRk1X`DbVUFqZ#T)D*OkScnRj6Pycermxdx72&z0+=}Hl~UI% zE4Fff(KYeuMsL0z=hg-(rLZn8buOV!bt+t@5~wb7A1e^cBh~#>`4=+Mp>b>%hYEaH zu;J{`x9U?Q)lqauRKE0&u=XMz!H@mmbs=27MqIl(u7R?wo_@<9=m%f#pB%tZ0JMXI zces5-C!WqZTC_OJDUErK3NRN;3z}2fqi^aQ4q|R8dpFf+!hu*bYO5yqw8Z1AKgp3s zg|GGf+2NJb6IH6X--WDy7#F4=2Tb%G1LViDKUSiZa~k|(S>7K#0e><|oHKrfXy=_( zj2+iTty-co77k2g82O%-uqcj5adm(c+!tD1^3e|DoK5GE9)vJ73K5KpHv|(;0M`$n z(LD`_q72$I%O_MKT z?^&=pF2Z6zO13~e5%>WVv@lg=O{Lsr$IFEm#1d)YO;+B18DDrPerVnfy?uS}{$jP> zV3%LCXeOhHPN8449@d~0agOh=R+}JRp{1g^2tmuoJ6l|#Jg=us4OM2iMpd^vxf>7) z8vX9Y8ukQLuE-MHRadlDjyjd)J}NRbD88>BfF|3oEilhn@ZDPUE9*9=DPMQan~@41 zbb__6W3D-D?AM=|I( zH}jh6+xJ!^sbUr(H6B`fd>8Xf`g+w1RWcG*QgU*Pevkc;K|w$MqW=n)Qzg0eYg4Sp z)3lBf*FNOt1_;+5xPLf27n1*|tD*u;ncAz=<{BNGRw*eb=U$-5GwsTYSVywFA?2!sak!=rj0N(IeEl#6W#)@Q3-z z>HlA7?iO`fsL{>*m#QuL4xC)PcSiVoVpC`>4KAv)sk78pq(MV3E*%f1UrmTO2nz_} z$RcJs?2g*a6lnE%uS zQ&qQ@HS%iTDF0F3GswOhe)%_LiwmKa>g_?M>xWSlt@-^`+CuJ-=%Z&IMeX2DWeKd{ z71H&xFM6f*pD~cfh|{F_@Yitv<60jOjP@s z3bh4Na|I@6D?e;!=kx}{0wT7FQ`U231aj!2J0^&xh(D4A@7$&9{Z;>)w5yCNAhX~n zT%*%;*Py`h^*OV85Alk?7E(o4czNB*<&ym3o9#|Z15!`sGBr#aFw4deQS~}$9KqQf z%XIrc$I#p zD*Brl9RCE;yl*}bt$qUNJlp{Z7Q|XHPa&CG5k&SJs+<0{Y(teZFZehacaZQSmz`Rp zRmi~2E2~4>x(8&Oe_lAyvv#e8<>{KiyGhaEgW{?qeb*ol3#t@fM`7AP_#hv+x5W5P z2ujOBYHGC{+WDeAz@fnvEN!|spN}(ftLe+D=5lzUVQsbOYygXsQ*woG&C_sxf5}{Y zy4uyns1FWa*ig1$Ss9RdJkZHyps>-daj>aOcBsg8_%#937KR&Xt0O7HWl5a0g}49# zxU`yiY>?{6JdCIyP~37w`O@1XzDv=8^%l_@obyeEq%KWS?|w5(e(Y>p8^uWFPm`tc z12g}Qdjb9*A^u5c__4*%AKbyGaazSH<7rV!Y0K6_USDw!$&O~Nu28|!QN9+J&9jw>QAMQ=GXd5e;{agm@cn4cUonVao@U~zI?74~qGj=}#%tNW7p z*={SP2x%Hnd}CE8$vc|kH?VqNxGA((&DKGVfNID&0Sr@_jDE$Bi9N2D-%7FmWd$f< zKJd%%FO<31o2hZGZ?Ss*9=EBf=*`#M`EeqK8Zm@jmxvvX(Z5_T=S8Dlxq{dSXdVY` zQ&*q`b27UvWdnGjtVVrJr#^VbLLOA-W0@*au_~!hA}&wWR#p~NJrW;91-2@4r#kg) zp~_s_GKTMIi!C>Mx7LIBGXq)qq=sIjR6*VuMr1GxWa6)kd| zf}90M#d!j}=TdmhaD3~+?OP2N3Z1C|BNjqQ>Y&jL&Jo(?q1kGAmi9Z369Av#fOmyg z(7nc+))(I7x}6`=)+IQwDno9IadqapE-x*rlg)b&a@ZE1^1ob=g&tAtHoN5QcZsebM{l?i%CaGR61%>WXF3fs8e=Kd$IM?Hi)+{+qN+ zJnUFxcccG!>ZsoG$E~w2_jR|ol6I|f>?-}%imN{%YwdV&@=(Eog)QBC(rbA|tV4vh zFgk=CaSYz$8XVjxfBEh|6v_XM%>Tq9aW=GinDFa3Hhcw|8db5+_gX9JOb zB`r|mTV<;Lm&ebb!H(FCiqX;jA#7SXd9>m(I)41BSP>#l0G;0BHkupbJz3mFz;O(WrXYXH8nunr^yjUV0W4165pfWW0Wc{)nV?T;iH6eLj7@N?B$`=y!@yuzL zoe>P*f{x@49+Z_{9g(i7%f+imcnwk<>NT_Sm%Ms+*#oiN2 zUPf|9C9{L-8H>p^t@Ctu$sX;o&G(kfAFGCA zss|#2P4FqW=N*6MOCcy{5W4WVL$<2b>>=)-Fi*4xn3a{I70*5?Ul7z7Cajro{em_R ziVK?>fA3>0+#1?)G&;2bTQ~vqjE;ETXiovZJMK9=JDcLZTJj~0!xzsOxj8D(z8Ngb zcFF&+a(ZWTp*I`hMZdCrs#tAY!Ytl3*~|*`1fB1iJ?^rX{l)AS=&fn8phs};-m-J& zqAG?cX3(yRAo*=puYz7H67D#PjS2O*6(?^hA>DOWV$PWg#|Y0^OLtO4oow{?rM64T z^dh($W;WN;?ifPPZUl;*0H}dV!WglY(SxYH0D{qt$X?-K@gORrpl8RX(blU?<*}%r@utV&YozhX5wBcv}=K-QO?MN)W!E%7PvB zNYl;I@-ugbe2`8J^fc*!kJ-`zHlb=kp zLEKv3;h=$6M6ym~+$WcSyVQ^K8@+zdQgi#oc&TtN$5v?6Tz3JpfA5;IV0sPc_ag6? zK%LNRv-;(vEBfCmqP5R<0t>;VvDc~-3Lqr&9YHsBeDX$;Gu`d=2P*1u@3&r(o5_1a z85?9$^Hhu}Na0({tJ+UBC``YdN8hV_>!xp*r*f9pR*XZjTiO_PHpS)X`B1w>n^m>rT_wyyI4`$fEcKA?YP6z zsE|J{(wjpO+C=xDpUl@$DD=CIWdGFg*Qq_LYsBAnB145{QzEoEh|mUyn~=%7cgi6h zg$$Z@1+&Qx!t&~xQLh$bd3*NS;lze_c27rcZnmawzJdD{4`Z{fjdM%;C{cBVCTi+E z-d0wr`U!D-Hp{uUoz8m59UU=et0GF^6C(!Mm6J$cjxC+-qU7GV!jal(3yXF;6L!K2 zld@0Us?$pBn_DVm7Bq-olsV7(m3#uDic>e~=s-Y=Csea~Zgo)W1hAR3oL&N^rEV)# z+O{k-5nzVOuY39B6kXdnkdBysr2YCr4@cWYB*PxnW*gvMi~&(|pU= zgv`QZ@OX&v5~_3tnCmEmt#=y`nepZ+tc92*p(XqFKSJ^^s$G`+y7$2cpBIJ13Y3+p z&7xFQbvOUEvz`)N#U9>%I$Ct(f8cq}IM!G1nDxGU|1ssL3_!nz_`~-8=hT_0hDt#% zH(;N1!tDAtq_mb-?naz2z8$xNS=yL=C?r7y|BXT3(k!=QBYt>WP`pA%j z;0f#Z0Y25=h)St1r~UZ8(DL=Cy*8LzHk8ClN~X8Bz2=uG%;ze1VQC%%hyz6nH-3%v z!P^RF69T3y$%>Y_y!^KjYPMyj5$#A533*vOhJ9+TZ~65%MKgDH)5|VfMIKKqgs{3v z>ee2OcI-{LN)it~_SxfRc@&_9l5>J?OCVwY^d&?IWwE`yX8C-9!#1;I-9e?Ojyi-T zyr~sVkq~_`ITDgebQ$^gmNFb+xHC3 zI5UjAm5K`#Wr2h2F@>f=s%plLc9DCz+}fRuput_P2@}xMsx?AqkPKa~j&gjrvoHIF z^?h0qApv{r9y8cGz3t;QdyKuVB4^DcKjFn46wDaW=%?FJhqt6$8t0G2|97hH%| zG?)M~1~X%N?*&YIAG@=Kwt;cA_-iuQ!&h)uP?_dHn5cYr5L~nbmkkl{BLi_z#$nb$ zkaJPF&4;89+LE@;PIv(XS=8))Uj02b3Gr9aW-!-~OhW-Pu4~HZS4S58+eKrKo;FU1 zLFk$s!?LYco=JIF9FI-sqpM%~IeYPaNx-@3Mbi7N8 zb9pKzK3koUx*w}U_5DQ)_`%CJ>jjtA!X>dR1f|uCAJvggeq8|HQZC#r>kwIBEm17U zEuQ#@rnxP%`^SmWmfFT~nQn1#l*=*_Qs@h%nQB0%8_?>TAn$|B)y;#_1>^N(7o*DU zBkN~{#0fBl40E})pF>RqGmRK-1|x6+RZSH6f-L{9cPxPEvQfyPES5Znt6B}S`}m13 z&ybIE;kRrymdd+W?&vh&RclNH8I8?Y%yE+@A!~)cFY;*V8t-6+m3M&osF3rX|K*r=N*5ED=M*Bs7|S-)qN%_Igu57 zpS?4)YsV!`PiQWYFg!lN`aRQ`@<@#rjk+lS}JL8)+ef2DJ*QCgMu4P#b z(`GKjR2L+vWf9d5*#tZK@s;f6cVKNdcaPkkH2g+%3W>i#7n@eMz%NCvOZ8xfH;NsJ z777Z7rK!`NV59+syX-PIW25|xaQ%xeS>t*L5${roNh!&%99H!u-mLMO()EkdWA}Fo@DTx!y9G%uSneSC3_oMiw0Z+|M5|hi zCKp5P%;my>AK|}CXa2D!l$GLsg{CjiZIcG}MIG3ud6HFsb`rLd>VsQ6MP@0QXDK#n*g)%l@1VNyF`2rb-H zBxpd(z3e%urTaV&E-4TU-jWLrR=qm+StV@{#J#S2)chriWB2zyY_OyA_PpFeU(4G9 zi>>i`d{Vuss$J9uzMf0SC)a9J7!0N89vF8%9nzg32-hC|3RO~p4*2>7v_`s~#~DV6 zu^N0-^ZMwusCi9}eufwJ+25xcU#Y&Z0VS}1E4=`D?_SGxdZ{m^j&)ONXogYR)-rNk zf9_VUcX!HW$yv8$bqXO|-4l^6OT!jlHo@fhCa;CI`t97C=yvSgZ9|uU%svld7ap=& ze>jl2b6XrCr3u|s&kPUGp{09m2|rkTVLvXM3}3y#ZZXLLEaze@9?FV*AO52HH?MN< zY&%IwdxWU^$jlZ*R6yWkha>v&f?uW&fK-!edEDp&FC zM6?>@nTJLi5QiTes#+1@y%!+;l$JPla0|(W+OHw9eos-#%XjEXv@R7S@aCC014K zaJ-Bz-7ung>TkcC^5RjrpwAdmw@UgcwH=UY(vO)0C=io=r509rugt|-d)e;=YxNJkeLkzlcUdDbPt|bCf zy1@J^_gbrj59I51)ApX&F_%SWJ{-uzIAJ+~lBm59e#bMrEg^ofbXkEE@@2tS+xvsP zr36s$dJgQs@f@_?1aZ9z|%5(vV z`pEf7sAuO*r9S+OT)Db>T$H&BA4K|p;9`j(Tc{+=X-g9)BxjdrzhyQ{)m~m#$Ib3* zk8C=db@Ft--(MY#T#f&@?9$dn*V7#DOmqh9tgCJWJbAaACMS^Xl&94YMwO?9<;~Ky zkW|Zmw82Q|F)LDS-v3fJ0 zM#a+7>9i?J2C$H&>}EvUNQcNV_;c*P2|0n3OL3uRst=SAEryy`6uc)dA*Alj@5m^n ziB73!@8i<_UZ)z>M2Rh@XeKn8)=m;XPHp{EEJ}89%TN9)eaXhvST()GjUVLIj&6>P z`Ap1+uE6)9klO7b#iT=zMA!h@x#Vbxg+)F+yDsJZV%Avu>sz*jZ|`W!Ra|dA3?Xyd zldJBr;AGSrV&@ArV@Wqak`Ys%F(uQI@}6-1v-OZQ$*RSJjKoKVMUGbSur}xBv*vF0 z1@&sbZjagT6}IXBni!Ru1neMk60<_G{jppFWlON&RF)(@uUnOuG4hRp!Kwi?;??kc~KO_S*nm8SAle+)59pUvK}uCJ!nLBqi_sxUvk1zmLP4pZRW zCYUkddOaVN>Z5v{qC6Z}C(X{cLkA~9#S!wzz85m}mp6=Ea-mKM4HM4x^xIC??jHq9 zQdF3qy0i3sg^|iRlZEQW6Jx^joy6OwiHYa6no&@~{3fQT3Cq))?bEt6i#;u!w05J> zPbhq#AgH+fZBuu%P5z{~nwH;^m5HUlefgX9I3K*d{p)=7aDs`ax|fE950G3@=qI z!16L*Tqmn(gO-g7Xix8RK$|o&k9&ocY1Ip&SB#M&3 zs7=@C9hr@3f{=*#l!y%68$&4+V{QpBEK2v0rQlMF+>@)Tm~~NEC^t4Ss~$c#jTLYo z0=zCIo(-f0EOfRFN@o_>!8hYT0&y~4YK!hVt(5{32hA7FzBJ6@?{IWH7^HEX6$}4Z zeCxJ{j4Di#Y(@1qP^?ie*4rlKJm=18O`-aGzOOat>oQBxtrp80Z7IL%-pRulpcL@d zs8+x0pmTmuJgmixDxXf_4%^Vt6RUB*x6}dE^((-j(@tgEA^Ta%)#)K9RgXmc{vbXY zv;~GUm_$OwLDdZ5hZ`UGdxQt#Tez5^dH>F{=piR}yle4EQ*ZWn|1G28Ot&xkc%A@Co73dC zzU7|vf`4tWc@@U(LCm2MbFfS~Ve_i0Tb|0l|4p*}7ZpT$q!_kz@Xct=G2oM)dIT$3 zbZa|o^wV3+%$rt;nKZk!z(k=HD*F8c=;Sw18dEvzn}6bshEXUE-rs@Xic@IqE;8Ns#WSg0 zr{yNxJ6gfDPgen^yFJCF!&iS48GxN7 zu(pnk{hZ{czVeDs%Ra4J1ncX{+`fL79jwm}J4~!4R@g`0+Z1fIj{bDr(bx=6ebp@{ zQa9!7S6rr^2CO;~=@8~fb($X5x9!iF5$w8S(1RQxiD^Sv+(g{LE#@!ElR3+}qd9&t zgHB=$)tfhr3h#vDBYPE%5I1YzB#VI_C)Hh~qC)ZvWOZA<4n4XBa&;aodW*Rh8>h4H z#FY_2Yza&lKnKI#R@m=m|JrWaV$u9tz#ej&7%z2aGdFprH7Yi`+ee+kRB16`yXmg# zqer*o;n-*Ii$#h#xI29NV(CIYSGMOIAmR0-Q2WUw={A_v8uY>}piyBGEkiK@;Rq12 zvE0tn8FYYm+N&_6Ia)&G({Ci#q^zX-{&%z($q8A8wVF)ZCxCjrc#baB^)gbSEQzJn z-`gK5B7<^7)Gs6lax;tANSh5PkY;~LQhAWm$aJ@>rdMmZ-qemtJ(}O7N6L2NJoBb~ zxx$gVO}mjFhyn{nUw_}t&_G+3;Eova%fSph%SO2#` z=S5Bg)NSjE6yvLIJ8`jf#`?`oHUGnrNu+`WwbdATU_`O%_z9igrnusGA*H3t6mP+w z%zBgiaO-Km&cWS}?f8j;@*)nNPl>Z}!Wv*PxqLmmSO#)fpm!;G&8bq^DKTRV?$Y*U zfxTra;i-8D^6qd*Ynoge-munT8&>JU!@i)gwIOIi&euqpDs#NAfI{x9bNKs4qAbz+ zH-~DIcUy)R0|{EL_j7lo;N2iWSM#kgR0}Mr-kYC{TgFu?@bZ+t@P?&FL5PO8UD-WQ z;b`cR#*%T^a==pyJ857EdsH;YyHArjuTXSr#piMQUZlq1jD)j-f2`GppYWmI@dPH{ zw|tV@TMSm&9^PLtqSCUwYjK^lNYc7&ZbU^9WvJPbWDPslzM(j=OwA3p>2#%V>zmEU zPbHbSW$B4MNkS~l%(`r7!|F6aZz|$jtUIElw|}&ofGcpN za5kNUZRm%2$Yi)6I@FsLVmkd{jxIckrSxbhlrx6=d{nDtQ4?@yAs{fr;bW-@44EW- zZ7R~Gcrs2Adw}T+n;m4zc0Xd}8}0<7V<~==0taH|M*J<0<8pLg$)$v)m!{1wulUhk8PHwy>y0rQ$i?Vt+YD$o@1k6KiA{x z+fl{+d=S=r``1(93~36PaA8PEixa5c_vu&*RA%I6Wxj)X@wZ$93wKGs=6Crf^pMR@ zSK=JyD=x?8BOYBVR528G*EcvAtWfm0xe?nm?QP|^xh-u7AqIYQSfPbD0|&W&BTZII z3%Di>Ue{>;sV6p1wsit5!o z#|l(nG$TQ)ruT z`i%xAc9gz>`1>*8s2KlNtHv?TBXyVa%#4w>UlOZ0@o;@Vv+?4(23r5~W(O9HY0rGH zGyR`NbeAgryiqTF6L}-4$Qoj07%F7kK{@x<<)*rZtNbUAfel))c$8gn|1e*4&a`9` z8jj=lG~ntAkasT4?}m+R54cjIDJ6aV92sZJ6%OJBBIkC4qy@(zJy8AmVfJ8I;z)t<(zGA5n) zcVX?G2D(Gk4byr}ep10a3PLu@1_K8N9F~565yu=Vwa=SY&_$RkVpC27#?|vv+K?HW z{?rTENgW^qR{SBV1|6(rvSY>Tl*}M37K{Ez+@?YnWKn8 zp)iYkF6vV9y3P~E5V>{;DZjy94*5*oqWdoL3SKL`C118Y1S*9yqiOAYs|fH;LSuLYl0qrkstFL;;xTB+{}rw$%sO zn=VjpG|G3~!^JFTTcflW>;S#~T%d9Y=vi3vobye(cK=YSx#a71>sEEyY`xbPxYtrF zBm5aNT-cYb+l^YPze13P_SSl@k`1-u-uwUJlWf(QcWcF{tf*Kgb-_Vt- zR~8H=h(1yV&%l|beG959cPb>Joy8ig;hMn?j+-87rl-4B#~c+zYeXvCDN4~shf z@r}-TS!TCQHe%n9Ve^zoNS+okBW(kc{%JO)gng|b_!4Ugc4F93Ane6tB72TSO>fdC z?@9m3Xz#jdF+-5DndMBnk65N3ZVRz>C>vf%Ar8K(aK{Ti`>EiIYi=l^s|LbbKWG=$ zzcDC19@fRbrO7HIR)#lvhiuzw2q?*LBP+`U-yMi8Xq&qn1w^t$*=@aRF{ePOz5{*N z?u_npnFwJB{#L>uc~!menstzfqnr^-&9x22dE<%x?MSYTrxV1=w720{PS*FTD+TjQ zal!+s!uT7iNSk1jj7p^IA-3AQ{iYBbG%GShV|}-IPeiEh|1kI7QB9@q-nTP4I%B~> zq_>f(1Y`gK>7!Dl3zCF{rgW14l2AjljFdn?Ktczl2qc6UFhGESN(mh)2_1$KdXW-3 z%$qsqto55Y>pkar&hxH!ttWr%?7dc2lD+q}_r34y`hGu`(c@vwkCT$=7H&X`g+EWV zr1R8Lh3EA0!Bq5&+GVa#cuoh*EIhxo8NWn9`X}4kUkaRAK|6oqs8{j~yXl|D`^jiR z%|VK0v8%T3T6*{0DVmeduxw;}uEKF*k-bC35RH=`ofT9`w8t zxBy)3XULfPEJzkCmDiEF$6kyreu^6T$gpZ+(U^Z)Th zoHywZqbRJ0%UqqhPXA~?-Pw*<`n{+MF^O#}oA$5q?L%I0-+EcX@Eo9KM!a{ z4{Q&*)hE@7Nc}irGMmjWSZ$wrE&DTEqjhO!Z6rn0Tv_QG?VGN?WzL6s-G<$3;ipV8 zrwDON9@Y7(zek>ki3wr0g(ft!I=6xG3`>K=iv2jhBtH)#uNY%(sSNCIqqAT3V_$%n zQ*wI66<2wdY=5ce*36_-_;WQ)_zi8$TtIM+GgS+{pH0J;j1Drbo{7xXpSuOP{o^fp zy4+@&-5nxJDZ^_OOKK(tWyQx=WW6p-{WQVMGhAe7s`Bh!tMWM!q$u>DJ!IjBkyCG1 zwjn&N3=~MwxxW@2o$~DrN38iWT)N?Nh|~89UGduP z>t0ALE-#--M2q24mq=NKLQL9Djgh>(!G3n}z(RCx-GoMLBqiQ!K<#-{G_ikA_-^Xt|HX-HM25eb#Sk#LXK+AF#sg&96RYvOcfBVuoBQy>* z1fMDUP!r^uBr9hsi<(8Z%}LNYMzB7Hh2OqYp0&I@;v?g$3y4AuYYzw@w9`r+c5T(bs*Go#ET&{HA1Z{+>jUAAH%y zkzrAc$9S9)Iw$|Fp4f?6R9V^Gq&U1=eG@1Gdw^4us)9+yh@b+=$GwUfYqLIZ4mtrD zQPx3OM91%96E4*(3)JoC0=P%r%u+zMuVk=j@^OLzj7nHGuwg5bcGKUUn-*{;@%-{a z#^|b-p;8=75;|ZJ9UXI+`1A3zf4^G(zg0voXYm0>oI9XFbIv*&gnXn1cNdbHEk@Zb;O^QU5aZb(|Ya?5g1F%JStjZ+Y);4jQx`C zq!d$?NR29aQ=)nl;_P2T?jAIED3>UOOLB*##T!!X7(66{#lwV0|Nhtiefhs#gE_q= zmDtpr1g2muC*~FPf7AQzs35U4_R4)m2s9qVX6Gq7#`Ygi6#jW?_qq|;o@i8LUQ>*zt<2b-wz9^{6*gJEu)qd`GIn+obzSK`=Ab` zWY@=TRd=x=O#N356U0>vIM8pgPo1;|+x|Ad$^o>(cGb}$>3GPXQeQAw<>xuLZi>gz zz-p3T1g?3%i!E2QpM1P*YWWbZgG8SVht)uK1RzxITLL$TQaQbEB?LSZT6DG#4&u(7*AZU(N z8mfJ2>D~;xA|D{g12W7nG%9!B@wc?+@weF=|0?$f1BR^PclyE#fNCe>hnpT1+A3E~ zcY?!USBfl3b8I3I78WH-$h?EEit@g2-|D(XDgbCANCzkdq^dZz)z2o9@xPs%41+t9 z)cjzi?vRJO_+Nw{s2+*`vY@5wBTa;d)Sp&JYoyvG(tFz)buwV_t2P-j8V*9c5=Gtm z?4o;Tx}VP6-(ATSa@#=*AlMgDJi)}|TF2UQRL>cdQi{N$)|?HZ)Ic95r-7DGEU{7l z3i}vr7h4`|YHQ{rwZz=Km6sD@^G7E*DM<2p`*6;VB%=%AZ2)IjapN(uU)ivuS8vz0 z>_~;{LMP!+(O9pvx-7M9rv{QoS|Syg9pcbWvY_!7t)2e2u$8tG->{_T^z16GX-)zE zzV*ldOWE|_0{wryxbhDIQ^`N*OwTyq_MPY-#!|7-0m058X*=Fy&*rFvUT8@L3oWO3 zaHA_FyUL(xV}887?xpmiwl*Js@5YeN?;p!N6sa;}o%$(L;BL!Kr!E_CJXLY78xQpVczM}v96 zz8UM{gYgIe<>F;2FLqwzvPf!T8zaBk^$L0)PtCwS_dr^dE8m|^7iZz~6mv)@hdHX# zfch_~eXW?DG#V#SHO>e*^BrA; zC>2|YvXomoj10)G;}W-Oo0@HA6%n8Yff;N2c`;pX?{%_;hn01*@iZX}{=F7ta*0UPS1~Wh(wgDPDIX^`VPi86Inxs5|xIPd<{i=9vmlBA-&o` z-LV~R+W~eX;M!T6TlInDJ~afkjo8g3K%?UjII%bb0~?qsuRRTTFH>8_dwr-vyk@&&Cv;IeeEl@%}k75MKQiGNBE z{%c^!Uu@#Pfkq^)W_9|T82qr;uKp8x{dm`aQs7TzGXYxEVRu&?x9InurG>7yXlSw% zrIAMy10Fr=9(U5lPO8dQgIJBLw?zV+Ulzo>5ndBSqe1s460n@-3Llg`lgeLYV`E?m z{jL*uWVz*)eObLtykbQMagSYB@57bq;3$oOB`YNwNEp1vg(%pdE5T?T|CogV8^@Cv zvMaSJl|qctxThkM8yCY=V-}8L#n$H48f!H>G3(I$1GW=nseNGt=X_zqe$ba7WAeqG ztZ5V5CJ+0UHu^fm zyFsZYs?fC_i_r3dLz|TM@4TP%QW2Y_Mzzhbc zfwbR?*1~_M+)ph^-O)7*;4@bjS%sGM1_G;m?VuG$?TlGesp0&rsLI%)_~=>NqcxZ+1g@nw%yTW6=oY&T`S6>iZSVqJwS_u z6$1m4RW9|JPDiv3LGT@%qVnL=2zmBx^JmfbcbQUNSZTP`LX!)0z{fFqK>xTkT#ZX< z#;Zc&sD(kVVY(lsg#ENrE|9S!;~$ZIa-OyVmXFYwR3}+x)svvld$d!hRT3qqmLX^pI`!YNasY^l7x|7`2jI^lUzP=s;pV4d9Vj7H@+9bqk z?c-|9q5`fPlPr~B=#RdSDL!F2t`P=H7U;e^uc8XZye02{vU=YETvwwZ5K4xlsx$G| zWeh3BF;c#tF9@tpFq?%W@>GkTcdLb3A{k*u4~O&fi)~Y1Q$`NOB=W4wn;X^4%H;Bx z)$8}2bS50_iqe{*Dxqr@bct>6wwWuD3|X$nBI%v&u1fpRWXEJ^OP<1FXi*b!CATZG zPHsP>zXwghw~kJfEljt8<8JrG4PUrgy#*oNga)n5iu1|Tih6spw(^sJhW?3NsYm*# z8ob`TEQ#q>F?7+e2a|a&`#r9B2qaGij7_W!=8rDJ@(!{FoF-46`L?`YvxsV1)6)J} zu_KOa)jNmt8YBn;<<#JS@vC-M$$2$eo+k_yHQ#A z`M;L)9Oe)zh4WZv)An3$k3OBdT6ei`Kw% zWEFp&dK^(*nj7;D(`5M~C|$~0)pkdxn8<Y!GPRX$Mm-8J$abKU4j;R=e+cH&4W?PH9V`o)JHcNqXgJcJu_UEY-qT#l_#j6X=d{ahtEii%7c*nV$_3+z!t{-a=8O`UU{ACj*s*)nrdoI|| z!pN_8>Wu|j2(?-xOYeuDwXfI?Ellkqm?RY$1OdgvdqXC~s>}duJsv!>3bX;*IBO9e z<|k_A?fV^rwMvPUE~H~yCEC&hmmaM|K2Y}wbW;4fbZ4*@@4wEmUdj2bve{g^GbceE z3R^#`rx-LjL;B?2j$^2cCO&GG+<~! zg$|94ms&L&&C8d<8=vfqTO%y=IJl)yJbAZk@#5A{^el(~odu~1AD=zt>#vg|0%GgN zwucfc9tHLMls>WuY9Ai;?oS`gvv+-iw@lv3e0)rXv})d-O$qJpj-6ytE=Lwpebxrm zRu7Zau*XydrzlEi@)MWx%SG~FZxscRK_}qi!lE1Gq9QOuR#{fnzI3T+ar6`y_}gU1 z5MH>9U5e1+j7|WsL+Yv6n$h9M8Fqc9RukCQRAK4tgz{ErP(_WJ*}SR<20zeCY3f`= zcT+0xau$jKIoo-7dY2<|)jOlfP38F)qg z^!k%SXelYFL{-D#N}f(>B8CnCT3PYukt~WEHc!8}x#?}Fs~maqvNJQ%Dq*IR_xQ`9 z3X^2lJU5xrLZ)%AqX)?H*0bE~v^Y+&aar6V8|~7_49{_a&7v~`W&)q|db%pDewp>? z>`KMT*XKJRfAlun7Y7Kf-vDaUCB>z%7TH=4gF1ZGYSU+gL@Z%z#?^6t0z(oR8j_|r zy#ya8NuyFQ44|#xcy9@VcU&vXB=g_IqyMLJ&a>$q_SdS{hjV-aB(iu#YZe5&LN2_< zc_zw`FH+Tq%GMu_3ya_Yl=e9QQ0lm6)yCX7C?Z+j!O=7sG_g4h)jCT-w}`v6E+-DR z*|Z!u;p>8!&E^SM@BE?tIivd@3>lk!YI5x2)5+Uh+N^Pvq~Uhb)bb6{&idK)=XjRy zqE1kGpR+8{87#lZZ?WRnl`e^z7;NC^!o!jKdoN=&OtGq+Tn**ln$2m$Vf<#F?A=PS z;`(1kTcl!~wH#d-Ju&f#GFZm2W{p1D&!-E`{&X4_6#J5T4l*lpQ7e#cdX=q{{J00$ zsxWrp*9vq&|o8(Q2Hj!@l6+=SpB&#LVgTg*IfvXBJ4f5fDZ(P-v$l~;|3pxWCyQ9PzMu(d+X?%6|v+C1cl z5GP?y1BE|Ny(qe8$Gq(kwOvY38S%OLWJn|n;fJV=`wS##*>a#^1&L&l=#s;JQYqI{ zM)hFkH<6q*qxOxH-YgOqTdXs#D)Fm!W`DBc%ip_P@~@2FuQ$X z{1nPb>gmpg9>7gK+r=XOgkMxQIguFmqS-V2>Yf@R&-R5{>kc_dzXK@kkx}ZBebvjT z2u6s|QrpIE#id5WiG90m z!a1$|ooFwu$;;k{7wc-@(3iN0<5h~qW6e}1ydc@&&cLK17;xhAW?{}|&1R_>KS6s) zGCp-X0>TV9&W~R}b4AIha~FBrW6AczI&!4sfE!LZ#VPsNa)kudK0B<_q>v$JEl z#F%{REnVHea1>HA`E{1@VPu_mfSIT4Vbuy?Fun)_t*gwq@2l6^zH{WZZh$J9ehAs9n*~ zzDw)jDIqCVn(ChI95@~?8r{&#e_|50F(G}MvX>n&2Y=)iBG{>`s01jnrg>upNE97W zrow@iFjRx%oA-b7+&+c@Wh_PWKCsl#OZOFlke}=!m^WD&hYp!3NzezD-klaND&Fg3 zh(QIo#_Z94N(_UCK=Z`7CE#L&oWmoC2kHjI%TJ9Alq)4Q?&A5S=V`m zJ;{{l7-a$UnpT#w@0R|wOx?u@S6UX{Y;&2?qm>me@9 zUs&s*R;H46OKgHfNW&Q8_><#z`NmyYPx$6HemXqUmSWn~4Cohq@TA*bM{1S2}bKkqZ|)$K1< ze?m*oy)1RxM6fo+%VkZa0K_Q8!f4;fiWGO`-N8Yl{0SlqJvLyUH6a;P&2jn7Kfbw2_@HO|(J-!+`VYg9Ftm{CI!QvEAm-#Kgkb0X2Zy`!+_-oX*y?w@u{s z16pY|yi40G-VNIWcN`j{7!Ri6178feoAeFrKqe~aJK>LaA=xB+2ud6m*@tKRJBc?&a2Yc*8Z{cKqZX}+MP zLfBN5Y`i$)>Lva0Ajv z7ju}MrL6%NDFa{lp4lNEhr+$0CWY^1Utp9OYe^fxWS!YITqWle-0CclGU|^L;t-$~ zbys3emy7otc_hF|)r!5qq;u+uEq#eQLW?{A2&OFEwiyHD@bjRK)g(mMx_02RksSy3 z2^y}oACc;dBXpceDKY@jXvwPOs`xp46^$4OtHMh>r@YLmaJ;4#XQ}*^0U!A^OIfZ( zKr@%EKD$ZdDkiO5A#NRtCJNaVvT(el$gFeTe)c#tL;d<5j-~o-J!XWx|AFJ+?4b*T z+El9-+y6YJUy(q%FauqMBvXLW5yYjF_?18deNeULo(L&rHV5{#D&C~qnEXh2D9l5t zWT3UL!em7Hcj~44rRdXPH|*|kyZaT#n~JUl8Z7oYt|W)*Z`$1M6x)a4%8%j5j+NVH zx|*5+SB`)31h(0}2;nJRY8#dGMM}=NTFJvzti;xufn2zA@`xM>jX;MP^N=T$)ZTur z8Tv626iHxjXClEtK1_5ge`x?M>jPphJ@+}M=B!bXQBjK%k6jw-1I*=S4*xnV z-;@^OgP)}vfIMX)^X!Z$j-&SnTNcNAt?UyPCI%lp*~^+UPd=C3v%I%@^LKQS$Q|k| zV7vt8PxVh#rmTP>4ZC~f_>Tk`NmP>U;Q_TgP}b8w<{tz1c|(3#KA+3?RL)q&Yq9e_ zw&-YT5JR|IV>w*9CG(wsQE2|>>VtodS^wJlhY`a1A0COnKJ>lbCtj(BrdnwM`+=nH zjfSRXmb|6Np5)#7A6yQoAvHUNPK}nmOsA2vyZeOez~3_F{IH{Lvt9t-_NpJCW%;;HQ`Tqq zCsG;Nn1mO#HJVcTse~FDEyPN%eO`nWGj#%O@j9I6g7*v{>^ptKv_W^rM>({P0?0*X z%fiaN(bKp10aa!!Zm`0mq4k!{gO{%wDXNm*FFNi zX|}I$_%Q1@XB9Gsku?&}+93$UBLTTv5uL@)9^UztGW{^>2_+|>F&P57uv0)1qa`IZ z2YGUYDD|Fp4zAD1Ix9xX099BVmBQQ6U*KaiMjnePcKf)jwJJiGd{xkKvjVVWF7iPkP$!u##pP_) zCa^a#Xjp2LpTW_h44iGfBzwAZEU?@aHbou|Nt`Wnm2NgN+>v1c%qXHxrhdqK+Ge3n z+y2Jfj#wkrhZHbg=xp2c@^8(?dY^(;-aAr){A0^%7x?}tT}qGZ`JCh8_<;m&D&D`M zo87kq5MG8ep8KC!4E=4x<1=N`tkccSKNGZA9tYhqG>dh^W9@!CFn^9{Nz9)CYi#6O zNnz1PJM8u)dn$c&4#}hy%{$>LEV4tkOra@JKm^~l$RH71h<22GqAhlXLxGvwm6O=f z`>1}(_he?~a+0sjzPmhM+^G7;(%I_JhZ0SrGH7qQVy}sOEgbZzB?`mk-M~ljE*?q0E z*}VA~>`e60r_Zjz4>~HZCW~_!%8Z0|eOyZ6t^gTQ)}U=d)IJFfhq3Mk4dRy5#^bha zf3JJ4fqXreLIR`$KI^Om0DHk@F>a+OH~K^BX(-9fbtzd>AC*Gju?qRB`BCJsHAh-G zh>%IsQ564P>+Z~9CdkCBOtaIs=HN>k4CoW_W-ZPGHD{oY3T@5Ya+tu5zS~f2WIqYj zsP=*GkZ1?%M(oTQ7PF&_) znxi6rAj#QmC^fa|xzrUV`_c*A$2tzJ$r5<~3G5sK`EqXGLb8)f~Qa$s=`)l70CN&r9wLnr^ zJ0}fr-Rv_N%!tOiFhq7rv4)bo4bvJn4SRn>Hr!fi;93(P44)4+>n55eyqgqkQ`-WS zUuh1nc$A*%1o>8@C@pEq?QeHM)9)3KAa1j!hHXgkv#@5UXh(cGI?R_@&7cpFBB6Fa z*zu@ED0D9|_2+4ts%1cwEz4P=Hg9Je@QLeT(Nt~f;!1E>`45Ak%Mb+*4HN8fqe2Qw z-g!Q^izq$%R2LX;>jAC=k+sCPv{UpUqU$A7Td6ZcrK_O1evbHsT6y5+xv7>Z3LTEV zUSKNBEHCZTKhP;NKzQyn(0Bw+dDW_w31X8|Lik{9nzOXGZGLf@;% z8g42kM=yf4^IFe|%TrvGP zUNH9zTpPbHEIGC?tq)Hr86<11f8z4|@)WGjU@6$W=48?(m1L+nozNV(oujo4&141S z64PttryFDNP1)9tr9T)P9S=;$C7f|&SMApUD zgS(}%=XKz_6V6J8g6(`&N1#412-+LC`VusKsY*BF*dfroX_iGdwixJhXi(_+HZuIq z&)oG=ylNt^ojgIZ)}c?&&f;H9mi|dI{7)^#{;n3_Xb0?537fgAmJepTRX#1HH#}E8 z+Bv@}zZTxj!T*F;#AawXCkcUS55u`a><3jF*jO9jLh;!Uuc>gg3ANj5v`nWYrlG)c zqXhkR#Tj*p7r=}kSMOZKVn!tsyF&!E9GTQ^8~r7k<&@*EseSU?wqX+Yw*xa%_AEqp?bi|r zQ$!E1m8UC$vQ9m-8&|pjN+-|fKn=#-xU8Ie*PM3@_hzx>N}bCRtvP=$9si?8#aZDy z->8LCdxQ1^f#+dIIbzyLx-F+~ga|N|kE=$==H+1A4QYG=T2(R;SayE|Kj1W8eez}R zvzli>%Ofx4F}}BT?WG;|-dcU{u;j+ao>KLg#k8Rr!&Wk&*Osx&C@zWmd*wy^Z|xL* zb1e;um=;FQ30qebmD^i1kRS|S%6LHgfv&P2Y3@t>mR1&F!9PU`S&FsgWb=SP2SCWT z+yeJhaSw1IOt^gh*FlxaO<_I@I6vMhdy@hJx9`?;Xw#)0$`xSs;ntjziT=>Z-%3XR zr2C7pza2Ik|6cMk>Oykah4C^MVMrVuj_*Ue}DcUU0(H#2!%09{H{g-E)?Q~h& zCAC4UCznxrCp}Y|tMS&Y9c|1&Nh*xeLxyn5k}Yf$P!u46?W6P<<>gN>jzZId?pSxC zo>bVrVCu=nY{LaZ`oS-7cQoxuXDrhpjv6`+5FXo@Uf$yDpO23I7G9VCCK>oe1u=C~ z)ms+uD)R!85jwN6>FR(|M0W1v8c3M*g!Z*B+RyEC`v(Ssci0huz~yf*pY7#Ude434 zt57T$v+J>SO0GP3ZR{_CW08Pvwg6N0blK3?lW=vi7{Tgd*k;co%;kNzAN-Djq1?S?gk9fp^{VIpp( zf^|3u%-zSjLFVo@&;`Mmd%l{am=5VeE8p*@jC1pf%xbIbS$!?HJ04BfzIv%|wD2)S zJ6r~5y$TNY_i~laxp48-3W8nWRLe$NT74&_&Y|e`SMA-)U8%h`Mjh6*2P*K zzkA(3P2*g$#$ej6uF9B-=BnI*ywDuKL+*`u+7HDjUx!xurIMpDMkYfo0yin7tU^tfIN+bVWXwy=G<}&T=F=?rW;j z>_$XSalzWQ8dP9)z^K{mq?~vDnID71pI>X5-c*xm?n+FL-m!$rcHF$f(Y9!aUrt(d zoM^&kx9)rmBD~%?o;My*@Upe5KCCundIDRGNt2%|ZhQ~IxZ6b*&TdV?N*PfB2;$oQ7&YNB*zD z!E$|zMEveAes6Dy@lRA_&JkaeD?S^f$B9J%NaIPr`@~aXZj2^f&5+UQK?&`U%Rqn$ zB9cnuWU^|%{VP`fdui>N)1Kc-$79{A%6qaHn{w%g!=+)OFCUr}-nR?*s06`yj=SZ+ zYnKhAkV?)VKNL_b9;n}ZpaMWq)DWTbK^>)L+fZ$Wk0+@+B>uF>|LA-x|Ml%SIv2 zfflTV2j_mB(dIWc1vf42Uz$IcMtt#(1TNw;1#V+DEu$l$U+bH0QbuoC1JiBy`YEAJ zTS7Yr`6eNIkO*gpqa=OKH|@se2dE3_(XWX!utupqjb+wA%wnG|CJwZZ-=*-^Sj+=3 zL8o*4b1!Ig>A?f$iE9TfYD*r1*_ad^z21$lt<`NxTZXNYl?+2o)mN+g$6bjDU`>6r zYVMrmm&kkE;L8aKNzOSmA!xQ8;U>?Ko#khbaa^RKMn%5!R_O4%5FlxBjGlMIa5)h9 zPAK`g${uZEUUIRPtsNfTbyU||bHY6cP=0SHAL91h#Va~%zS7LWqtbEP0y9V8Cp}dC zVlRoE5(&R|IjXDcb)mZm5WT2($Kt&kHcDlG`o|$7Yk5Xqz}W>HWoSdzD1xi@f&|sfdih$3~r35 z#lU|ZS#OzjuQgEww1qY3^$knn+iu|m22k9z-hvpF!^th%BCBWD>iVZDN%G)s=e(qp zi(W)I^FUrljiMJqgWPVL;!?j7APPZ;BPGH$RRp18PO`l4oI!q@c^eBk_FRpxRPC1% zuy{=4a8eh#tUT)n1U~dVb-Aexkca+lM;Qpq7ug|X#4j!Z z^=+22Bl__XU6+SW=kD2k=0EpzuGuG1s!`8pmeY6>8GNbY5nSFrtj%f8SYF<+0GQ$m zmXr4~%G1iDwG94)2h=Sj#!j8Nyb}!5j(t{Q%`}Bh%Y&>7_(oWfkjG6;JCub%)p2}D zFIt2DqwQIR6&@Rc+KyoU7>3;DxX!(=1iHIO*2ZT=*FN*dGMkek9D3Du zgtV?Rr9)yHRP6tw29h3oiO%1mjjEp<{7{Klt$Ua>agz~K-qh4~@7uk{7~Z@t{&tnkRkSt;Eig8w!B-ivmJ%GgB8MMb~;A8Vh? zOtjd>-=9vAf>`k|dz9Y#UDxRD(c8QxGR$bl!0@3GH z?O6f3mz8+OMdA~MhZ*?ccxtD}VJW=!vP4hSW0k~ZZS$hlbl9*E2&YAmaAl>ZRp&v||Sou0<#1(kk;6=SeoW^nfQ?foHEi6%>IU*5>dyD(R) zUEfq$VqfSKbf_JIVS-adb|m1?L)BokGOyGlMSy#idw25KGnFP=O@JmL&h z|E(MK*}ulJ{~y-=SM3|rqXZ-tQ|BZnq_~;_@-izCflFylJ z%9qKahgL1`g)hmdtD|4x2d>7s51Mq(L1}}f_3k)l-UlpUe zOqry^HAcpV?c#}!$7zL_rVR!pG*S~}sUCswOh7TZ4FZj>{(OcmIz@U#wyoWRp?|NB z2|v$BOk$OFE)reXrc<5sk{!)VkTMXqAEmJJKYwX0( z9-B7PG*R^ZJ>NdvMyIQ|M}D43WHN;W0)X>0r%!!(QsZ9r7XFO`P{QC*hb(e{3wxughxw>X5D6neDaM4YQv)ci^5HDzTT3*BKG*eo$P_9*M2e*)ly z&yF{ShSYfolUHnG6B-sa9%i$Zt8SF922ISs)o%4O@aC@h49spnf2<70QC8moBjVZv{u znc-=>Gwm|Qhg`<{AgC+gw3NF+Ke004%Y0Mxu+<_8bp)69n+7VF9x67?+=Ze@P`j~ zFq&8Pr3;q*d$BJXwL~4t$imLA1U=eZyY7!f&lMU-5Vl@7R!3 zI$S>JMuavq%{me=6)anqt)!+73DxKX)FFenHQq9ZqptqhS=GKiByJ@OTP$8wh_)93v3_b2i@T%J|qTL2DU-ZvmA4%UK8(_xpD0zQ5i%!XEc zT69h);RN|^GdAweD~)59IQ_b>|MnA8t8t#k*}r+fa~U`M%7pSn`Q;XWLF9FSU0AiC zu!C}eXIl+$$boeb4%=zaTm zjG?A&7{bpK$uO;Yw!yhNyt@%K1ZxtRKm@9@f`nr`0TG} z|G)UQ`}eLnt@Cxi`Ks~o>N=M#Mt2S7XVM=j>z#3hL+rgWal=s6ia!1O(7^`kaXW^S zYUT!N3jKNNjLQnEI`B#J!oVjm`i zC%>Wa!Fenw`CVG-kn6gxKm)9N;67}Phz!{EUy41MSxjb?0lpYp%lF8@-iU{vXTmC( z?jKE^h0kx3q&(+~LfmX;4K+0L8S$&jz4t8w_HkrLP1YIF2dqKDiVWy`lVlWga89;zL!1e-iWYmKF?AE-O!3Z_ftt0*~dt*e%LhpxmHE$}@!#G63*#LOV$;|@X>FSkLQ^Pz zb{75G@+}pD~iQ4%=}2|!H|OagsSWKoo-b7+u?NWL5E_o=}hqsUR@ zB7Y_B7>&fQLysq%)${V$tDII{77Cv|k8(2`H!pM3n*(`Vx)w4(Rp{+>A<1sdFdR-Hp~X{BU!y__k`(iVwI9$%aw2A<{J-d;kuY?AA!IHBA=zsCIO&(`{=z+?Ba#7xuU=aODmUU zeCzwuQXQBY=b#;|N7#N-!${}_K1pnuA$1L#0G70BkLFCUc&#qInYHB7?4BeKP80mM zHIxJ#02&k)VxEryd?_(%J@NeYJz*(3(Y2&T*W^xYoDtX}J_1xS_~@^D>HmqW&3pa@@-!L6K-$X>t*cwLlYMb3mfsC%oAZYz%5YII&hZ{NeQ%gof5_ocjU` z)`2b3zWTmOwqR&0L{dM#HuI1XlG9|P?^u%4wmErq%J+8MiAS=(ra8^z z1-XhbISG5Y`lWJphShM(?2`k5H}fZ` zQOjD}eB|}S^84E`$rN&)pYeOASJYuuVNH;tMMkNhy!U*3Bva{W>@9d=*J#F%H!Xdo zL;b`d?A&2sd~z*bL#Pz?9fL1eCH@8i(Z7Z~TXhy~s_?$ko@y)X%M=xszMsS!fu_0O z)br8)s~mr6ej@N;^6iO}wwswouT=?xd-4j#zE*B;!&)p7>wz&}^ACGZ?5*`E4@qzl zPF)bFK|lRvEZaqYRI1ez`L)M=_ohxq_ zfmOA}?t~9+71|!nj18Nm@gg!(TA*@-YJy95x4X4jy#MaLbZ8gKEkp0VaXL?acB!-E zpQk#X4}RQ-W6E!L*87OJ)f8q0)USR z*h&QqT?)DUl4xyqQe|MpP#pZ&rUW+>=sm$oyLX%)Pr8K!q|M2uvDGb9W^P)M+YSQe zTWjcHW~%1(Qq;r*fzD*>m9#gdhn;!#@rTyRv;n`@5>*{=?xpxZg9`zc$+ssjgj|m8 z+&oLUJNJdD#qG$1{#e+W&)_CFl%eHY@pE|r66~eib#A1Ga2Mx#k9zH2qQ-xJ&o0Ir z`a=!LS&ml1x!yr}a(KU`=-7doxZEH1uz?q?W-@AYs&9U7$ynpkdolM}DS0ffO_uBO z)gmlQXrX@TBV4kQ@v+yu*M3mq!!^zr5#4Q`E4B}(V6!LQ%mEymfDnf*%%tvgAfhEV z{-wdE_n-m6v9MaR$n#ewUVOCPtw9^Vw!!0UcUtB&feQmha{frwt7=EOTLmk>POm&t z7ZJ0NpM7W<8>aocirV&5ycFrVEz20%jIwHC8&{`>6miz ztY;haLQv=UC_lh>QpJAO_NLMb3Oc7DunmOQLjPDAqQ*wgG3LjoSUPvjk(AuKnZJ&) zfQ7sX;601nh7R}%GL+$1|B;REfo_6D)mp^zt5$twhae(pL8*Xq;RdATp{2!frfZFM}rSv;Lzdy`Ow{sA5!kx{8T_fSjbZl&sRJP@ROLx4_=CLhry z9r{0Vy<`NRyE8E!oME{BY{(%l@mft&r@@&mYZ65*}IZWLaFMWql0ur zj}W42S^X~YdF1aql#y7+al0S4i5G1gTk}ha=_i(`S(H_RQE8anWL9$It{Sv$oJ7zc z<=fB8nMf-5COH}B!r`gN(T>sif|4Qp^V>_&lTM}bSA(OXH?ge^g6;oLd*2<_RJN|q zjH8Zy5CNq+RHeuO1_;#|=?DxZ)IgLVND^S^5I}4oMM9HKK$=KG5&{MY7*Oe5NM!q@UpQ=R${qpLWlZR^SD+?=`1TMO^nx0PBSyE0p>Iv_n}V;Wu86>_#E z)Z~;r&=o4fA}_5#Q?DLZT^n*G!J!V9X@2ab^vJ*>wyjD3;RY*i!X zTo)C#d+aY8?xi?j;PGN=W4ci0Ae^2#-wbNjEdvU&q~fd@_UH_WZ4Sa=3gE*My7B42 z=NFj4|F$$=eVHx$>jz1xm;sosWGU?`U<*^PeoNaZ;=_>noJl$B(;SG zD%ly};2h7O1~?c4KA!=bH7^+OWx;<|^PllxKhM(>W_?g?`0h3_Rxy4HS{AL9ZgF4f2Ocqw(TXDk!GzSoUY0oZ@_4cEi?SSesK zAil^Uk;|wm7Mq7QtHo&JXH7kGa?d#)FmdhJy?d9U($H_8b;rpLcq9@&m8b|PL&aUp z1By%TtG$Zl2sghS-kfN$o}~{m0s7zYyaAlN)C8txc?WjLSth`z&E*xt!W#N3Z9N)_ zX0-I0k4c%oK*uzTXj4(Db5WRT`c8My;}@GLODxTj2=jT7u&0U2r^F*)Fc4tDB%XSH zky}&L!lvjV`S!YJrn05SB^8qU515Q%j}X6V^&OI1Zm~>svF0tY2h(y9&D5SEgpWA7 z!#pRLAW(h_l*npI>}Z*09()~zQ!gf%daOD4M6iL0SupT;-a=AKF1x_eSqe|BHuA*X zO++(r#BC&&&$s>F4!GL(nK?Txw|D%8e3@`e&9KY5>(;YC*ae5G>Dk^wQJc_Zb!&zLc+gO?tb6=OtE1n)!JnJhIwt_$sW|PqNZP z7KMCj_c(21A2$AMLr&){_{g))E+HqPZ*2FA`*AN7)4zO@QZWiZ|0 zob?CGGakO*Q4yU*&TW$8Z|2|-W+^F~8!Ei;*7VpwG@AkmiK(m0QiCK)Xpk%@0c|&4 z{N7YP8``x#8})xr^k;{@YT@e1bgh`@x45-DHj4YObi;hSbp2oq9I(4pvv~}xi zx>@qG5bJO-ECoXL*U)$%x1u?E?e5|@(Uq+X?~SZk#uH!9ZWQ4H1kc*@sv}Ri7_=J} z^})J4C8sC_YK&d~SP;cuE8jJM#m#9UXEOj?gu`hSz}}<<_sWz2YGv@s;c|%%0)ARH zs_v8!FB3xIJ1&g=k0GbM3+*R3O4gTqO{))`}2c4L>yi0=C^N&t!nh=eB%+* z=+>^KFjyq_rd6;td5PGN?g0 zVEVRpTsUs+X%-q3ibg7Ac%2BWnMamG?=?G^^;bzxtaW4bpJ#P?`J2rNgju!Bpp}to z3^0j`acc_GL~cn0y2Ll0bl-)VA3m-SC%2A@BDY0cJ#&iYM0K-u(mUSkW)njhnDgaX z7vYaG!eBCBJI+rBe;J!|tJ1iNC8{cXU8B#?yXKKqzA1{{yH#35v+BmA(5D(csBEshz!i__zBYk4nf-kVI5qS7acS@5 z;X#ctiLZ+{o%Or?Reni6`Y5Z(`Q?x7=Gg96Tgel3>(wik7s9$KGS;_akvZU}z$wKv zjVfUd?}ww5@4e)ETO=eNH4VeZ;6`O*eVPLo7UdgzPVGQAGLxn`D6H}xYpZz*3Et!c zzoJT^ji(Wp9yQ@9QAImfx-nzXCaNUs@il=|+o;0g1E$=%ppoMox>d@Tz0T@RrvB99 zJzp+VaoGh@J>dD=2U%Fx=!mN1G_=obI}b7?%_D`QhX5SWDnu~Z8uYQ;xJRX@Q0GB@ zX>pcwHipm0Pe*h{D$r)CT z6!?f`(IgXYAwpE%$<2pr!dvesBUZLkCMmDNx9UQ6LNET_ME{77~H)z-3FViBRoDLAM_DBcyz81UsWDMyFx)hF+Z^lu%9)axGWF+Gy* zVCV=H9I6kwC`<#jyk;y_@{JubB(sBC|Y%(?>j z?S!4<)>33!NTWEN)^48Z#`iE+wl%gj3iPVHd=0@LzpMD}=KJ-&*fBAay{G*ET;{nz zbnC#1$i? z`k1Rxse9Qgms2&oS{2az`)*x1Zt)BcaE1f;S;&yfMJuDd_+1|-c7fCNvYOomo_RXd z4scmarqe)(ErXCk=Q_$%WIHOLKQK9B%QW(hmg}B)PMB-aB3Lj-*;6OZL*2`1>!^l0 zk+x!id46XN)6}UJt#4#Hu^}5z&J5c)z3>n(HY77|UNU#?Ql3k{$LpEbnK-R|*l0RY zM%}{=?LZN5C8iA%!+I&KMws;okrfk4l^O2bnM{+nW7!Ofo@?nktR@}&2tZ!rIAuxd z8U+_5MBZbjym~f0qi=CE_%p5kBgD_o691_;r-h`osrfr5&R#WlMjiu+m3pVMYsd{d z22`!{W#CW>3sEp8M^%4C!n!R9V)<0%R&CU@q5$!C@c z4k@%c+V2kuMa-BrA_KvQi@}Suk2i&?U){VIIB*O%FI7gr{K+bM$RXx4N2q^~xbN^kji;Rl!(SlYgrkR*0co6= z;rWSqHKS;WQlHDXQxj1{Qnw7r8fTNiN6zia0qGA~-S|QkX_uQQvnjXtgEktux8H&o zR#vl3vmG55(;+c0`nlMMz=+qytp3Zj_wzqlSr4VH9ET1==ftZae#D`?PjWq+_V7!- zVwk}Xr&T`ml0PHEYzOer``y^s_@lLolx#=yE25?SuNM=y<8x5Q$T)XA>JkIl0XPd| zP>Yz7RH&!-JBnbXGAC&rrL*0*fKxh!?XEx_Vkh>vZ*PrPBOfVz-nX}Nro0%)tOR>- z_u;?406+UePJC>g#W_Kn?3SgLCBmFpx0rlZ@ehe|&OdU^!NF5#fopCCQ(eRcsx^g& zK`U!~u^HT2hK zEo{>=aZ7fIiRmTd0#(Bc)&q_|C%1%g+sxvLMb)fL^`TdO$+g4XCF{$B&cDRiyEE7G^7Y^dW>2)I4K&r5y^w9BC*Pe|t z#ims67_}rN35IWTbYbM+yor0?tFM^QWR<4jq89(9Gquw9RlMsYdy8= z^L%VyZOrM`C`ICKapC0jSUwFu%3lo2Hj}(AvgPbfT(ikqvqu6@uI|337$E3;?{6RX z4YoRBk=T^ly3RY-!4SYm?WWm;zWvU*_34IRMf6)TUIjMM4uW`@TkSXBNwzZ__SA{b zRA=z61jdm;c}#BlZo1qldS%4^is9Z((MAx&x;1EJ;5j4E?-|Ct4tBLcGQE z2oK)1BOC;R7t+dKsI)#8{>tB*0TeR`gS=AyIpyfua4ln2o_jx?58Ba0YKdG|EM7@6~*^8~8eqrx=%DXp_y&Xhd(g*FER|kB|JYf28`0*p4L&+YG zpLh$g<_g$Iil5;$tyVJKvS(#r#6=u1{doBKGp2{qojY7iN67D`Iez&08vw}Rbg(zM zBWYozvkI+qNEJbs!P=xsBaiJ(uQ?As@M9Zezt~Y<>v04joB(Gh8Bxrstn%r`n>m;R zCb5xnUy^LAR=8Xl@DpjE5 z3>EJA?zkW0uiSWyVp{9~p+@~hgzd^$V-S}BI_ALA91V$-S0U$tN_q(0aQsv#4z!jS zaAN3v?U@~wu8QItzY$Q^+N~@3S_F~8Ba~3pYM%YqMV04X-sqXrp6^b#p%`%U56r+> zXya>qSEGPARcG1VCsJ=FKfCUC?6|nTVHd)(u0k|5jqUj|Su-1_?QI)`j&)x&MUC!& zaMRO2mz8bsJypN!TJnbN+Jv)gG&Z6ZO0S2FHyX)Y5q-xAYK_()8{{!eCYW(!28)o< zQe^X;CBVnx*hZu5qsiZa9$B&!$ml4xB7Ui%x@G_ue3NJ-B`3Ei#_2^a76y3Ix$j*f z_}>u3IrFJ^ z`|d@&#(20as0_xd`KZWQ!Q@#%bw6$aF60Ypo>$X*g_h4q4=Eclt&Ee2Q{j)A+?T}~ ziJzMNIn@#Kn7^1sz)#Ol&m5`Q8O;C(7<`3-ChxTU+zxDU^b9!Hu7yR&NE<6F-|j7% ztPrdfeA0)>4)&|Xloef#Hj6?d)uXZ;RpHDmtt7*-#&U62@lmUIv&xo1`5Ih~{STf} zx1!Qc49;oxohhBER-;ovR$bH?{k2nrypkc^F2*a}r?YRxN%J2OH!9QfY(AP`-RUNP zQWqi3x`9}^LSHd1-*eZlCTbGhM_Mef_IwRXMPt;orwl zHMdd^$j_hpl=FvK#JuCS<*R&ew5b4WY7Q~PE9&@j+bF%bmzG4l{TBjr7>#W7?8;o5 z%if>{l|ILUwPZUFuJ(i=$?_ylE69X|@mh}h_J^LaXgR2RK^;vp+!WZtfU(ve#8gZ@ z<#bH;#APBluL&FUlXSDU#0MnC!9`RIhCbnbhTgpRJC6|Me)~2>pwm5 zcV2&F<~keDxNJ?Xy?=EB@^q=ps^STQp~Gen+Jvpk(&8vSGi+F1exN$&P8+(*{daMLf`SF%7oi0%FoG|JRRW zINOI*e23(LS2~Lh=Y9l=D6p^k&D~3LepaNN4_HW-wkXgA9IIxL$9VXm;2daBLvU!$ z&*uqjD(`Ehd{w#wiuL}|E`O~-D$st4z<6ij7Vqxs$06mRt3x{~r~{@(^)E&oqy1j@ zpKX(W1p)?C3O{W=2N*66m6ny>jpYv8RzBI3ms?*ZXvVSeA+SYrIB3Tv1Pww;$&eU; zo}H?yH{>6jC&cJ;2S_dxMrx=F%*zg=gb8KPu30Z-`z@uv)M4s?+y zGUr$m8hY@8xj|&X68g~K_0ab6Y%;8crgE9S|_N%GBU2PF%(`` z(POu^p(2da^o>S+4yqzy%XA3}+xYc}&A)uyVX(Gux2`XBt?-KMi#b{^Nh))ZQasaM zyV;RL3S)_eIhReA#~SW3aL^Sh%-&IaXZxbHvew833WF5fROb{e1sKvN?#;O{`!0q#I{Ug)gH(2og)va?q`^kH?P?4;| zcSo3C-`?7H{;qUjmu(vVBj)9C+5UPD&ccFY+a|(UEKU7k{Qil|f@Zd}_E0EVkt~lX z-e@T4^VLvVqQxq=6+nL>pMk*0NIWk9&OL2dsr-FDc>%w!f>qpN#h}#mCWWoLfcV!B zRM0yrigPhbGKGZ|9?fbcBXX@4-RE^IpKaFk<*%2++2~Nw8)FS3MY2fk9-o$-2U`)?`Pw9lm|Xq1e%j-ir_oid<~lf8QzDZg)xFApB6a;_guZ(2`OM zv<*X5?E5PgRPKi#wdTByw4`Ep-n0#(#9JJM;%U|j-g&ts>Gz?A>O#O zbRyG`PHc)iQt+#grZnO4E%78{c)N_G;Jj}X!5Gfn+a;k+Xw_U?$hqfpvgekk^X)P_ zl5S8&J+jGA-neNDT4xzG)18D_O}(xzk;{x3a|I%SQqsS~VTD=XDL*x#tT{N)2xGZt z&)wAHXPQyV6t)JH#$}qZnHeUGgE#>0lC&7V$MKS->PdKia@Imppd+$bF;4$PJekeP z(HMKbT&BqInEJ*|j>Zd}LP<^=GP07?)M{wyhR{>z+MtTiP!jGjq$@u)BBW|U`r_=% zfis9a16z*hT5b`ir=??2UGA=$RCg=Zy=UfJp$48Rr!07eUf8+oCg1LHZY0_nDaEyv zHZe1{otMqc8}KN$_cf(i4j@Y|UDZGkXYcsDj9A4P=z`eB;O1rKRQMA@F~;d( zP0i}p18s8q#3D>G)L424!@~{D`xlmYHPYXermB2Y5$2Egddrz=>Vq4&t}}H1fGKCI zts?7wiP7-Y9SbxUnz-g$s!(>szLG6x64!KXyl7awS=ql!Y;=~UbwF28QuMp80dGZlG~c49Q16+gXQ;A9FYOFW4TA|>>S*~-Xw_s8Zgv+~DjK$oV^ zZ(AmGZ4qGdY`d$r{w;c)^bC!Mi$f1PGplGF8+)xsErNtuf+~)3f-D#`6>V9dfxGi~ zf^Dj$L4?2k4ZO1|#85W-m)~0Q36sCRi40`4~JO5UI?)eG|3QaUnjo8V8*8I7u~;IVrN9}GkDhgiQ7!d!=hBDPf z!lRq@R>OIPCm_t^DNny_jiqYat{=*ZL^X<^gL}7i@`i$9hLc3fBifn}iQk&-Pg}eS z#yfj$83F23xkxBIaHG>)pB|fdjESxafK3S5bSnsem9ObxGZSF&JCp7hv|;79@EV!P zGg8uv>X|lNug}%nF_Xh<*~|2DuUlCn9~o#uoj32lGW|HEkVFAf+TGod#jjNcc4CVB zgeD_KTU6Q|W)pYJe9fmvDG6XlZ;_i~27ChHkM8=lYe}d}6LC|u*wn$0fvh`<=1>_c z&s#0*x=A|b$6+z>*FvRH^)WfA2rX+bjC#37JfAfpTs4aMb?W>AMEyhYpkQX%chaEX-Ylgg16T5BaINDs!GIHHZnv+T_6% z9iV;`Ie+{ zRwQ&zS73b_X?Z>wi#>nzZv2lES-E_g7+g3r^E7>A;i5%^uyZ$Bg(qn!sz4|D>37dP zy`9D{zTWA_A|FFFj1W=P0?!~)`iA_F?nypQA=643?DOt{WT)|>j&z~=@V=~fv_kXO z0*!|5CmT-WxnZWpX~!3oND$_ zwn*s$wpM-v@O(|PSh$TsrU8ZX!DjhuU1v2FxA zVA^rH7_t5k(oOU^U^;h+TgUpTQX-+@`dH?(q-CSKXdu~uC-hDrtMAg4T_GOX@$`G|+*{H>+=x8a5T}FVcN0-mUYc>X=AO*c6YOtOhmEG)}wz$U)vWO2N&m z>S6o)TPzb4fBIP1SN^;NZLbnZ<)h{8@&~>JCz_jH-pPek5_26pZV?6V^>*_oQw*$x zK{HXW2Z!d;yx*dmIh-COm-0uOh&!QVDI!O?} zKQ&GUZ@w+sUopF{!Pt+vJmQ=|k=<_{typf0!Hq`ysN8K|*&aW45}eMRg1<(+0+G=- zQ3otnO#r~T2_V&(F?t&-G9V-NtYS6f-4sU)i|FwvFW#rwCQVk&j6>h=YbX>841?L% z2qTiS^Pu~8QGIL5vm_gG4e$Yg2flN&=V%S5{<50?e7v}3V?Z&|BrvjOnyY`*f?z^q z1#xsTT;+u4ue~^7THVU91%gvRn+{PMQ=8=*B@K;SQMcTrmXIo3|&pkBin@{6ses z4|>TWcno?bIR`nY)b#@8=cz~9>=QVX#I9Leijg!Bv{^-BBwh_L=s3etukMrOu!|l1 z3vEko)hb=WtoJ!={kK0+b^e>hb%<7__>k_k&jbCt9IFwRaILrPJe#`Th$WQfdqN!y zY;OhW$UyeB&mhTn*U0oXX6BQ>>+AoQ(Eq`p@O7e)H0L$#(@iDYt#zPB7WtO$y5<={ zrs~sB__h}y!u3OYPk)KsAeq=Z`kHw_k;w1s6km*jH z&=j!IKSH}0`=N1&vy266+;`)Atv7OXFCd~wN%kk2;MX*Q^6Cj={mfb#0|y9Gf&%CTqQQ~%ZA$8JmvMv>ngV-++;axTHaO-Fkjz&vLF%y9 zS+V9e@N-FKI~+Cr&^^s@V`$qef{El-84@KkKNm72nA%|b!)9I(-?zOJ45Gs^s$gu3 z-rzV={@E3(1|J`A&j6WG%$hn{QrI7lcfGJBfH30{vOl?f>bR@9(@)wGrd#OxzV7Ep z&vk-V@=jmh$|Eb1H|5=^T!%{e$JH}&`ocfg&C558U9vd2bLJ$@h5=8hMO~^1Q9Lhd zhj7i)pSN{3Hc8Ih+Ofojw^}#QEzc_Z!)@Qe$Q(ve*a_4+NV4&3zG{V0w@ZsS_o%wH zsl0e1PcYT57P5E~cl)|kScFJlheKQa^v3E81M)B2B=v5jdEoat>`qxI53&WrVYBLC zfe5)(Lxf~Wv{M8+_4M_a6I@?j6DbpF#Le~8iNYn|Idvme5o40zj4> z8!E+X$3vAY1^QLx=R-;+mM@9Oy2?nte!*D#ZDK6_b*>7yn%k#rPKKI@dBk~1*c+>9 zNLtNy$+3Jq7d*6&UD+3M>^sXiJyPPi4SAn-;(+Pl0h3udBz`e%Yv0Q&X5VCEcR$?cio)dAtcGLzsb@pg zH26e3*0%Q?J{&OV$KLbjcmvw)5nN87`9v#_7pBniS)mB+{CLp{$xPCSVd8*FoCQF55FY+%qk=8xiJ)Ro9= zd?I8_7$-%CdLf@l5b`A?X3T?shdAH?Q=BBv)vcRhve%^d;+IUMN1ZC7-XWdmSomc# zwm}ZxulQ9w=o{*r@bcV{CFr4z2u8mO^pjBk39dgHKmQ4e_&191M`Od^c=XSY`j6=U zv^w|~5q+nRLO^j%@2CuipI5O{v~q{2A9tSdqw1Q>r12F{p)QNx>$xfHKo>T72C^DB zvg-pa^mTGmt=a@nhFKmXahtn#Z!UUMlaj?GAz#;}rp;)OFu4r4UHRbe&UDnUvFr8& z$`zfzb;Wx*^ZG0n}Pk%=^$ z&JXa!6NRS>?M1m{gdUj-;Jf>#26)fqvM$WeL;8b5i*7J+6a_?kSiya$a&%EA6T1zW zq5DE?HF!jL4vQ_i#fz~6$#31dg^gnBy}|U=K3~S#R7J@F(~ZMQ^snK<+sMVc>+79# zz_e=;d%zTxFynTE>J2gJ1iB$7{2X`l$mKT(VYnEABC#}2Je?&Y@DAK80*1B5%ujwg zgwwb4VB$W!+eg^qfazW;A#$WeF1yT*yUH_z!Byf@5+rl9Va>hVHzStSz#ycjk3J8Y zK`VJuAfiogNJ1JnCl8oT%g-OttN%GFE_-v5%^}XJe=t(H02GD=hmM_Mj zdWRh?hU5*FjtcH#5XOK+4^6s^+1?45)?G2t{+3qh<^{drSF6tY`#j}^0Y|pbZ@cG( zM+4TKKS~$C5?^_qHEJv~jrBNXFj}y+gH0xL%cF`I%)4jM;ci^xduoIK$s|MyZQ1>@aH%W{Xu^v;dUwuV8@ zcw42OR19iNDyoEhpWGRXCg`?i&N$u4jtISx2!8@Cp%=4 zM(Fl74<#YGZ;Zkwbv31%E2a#avKj3~A}|19!d2G*%nELfi$tuw*PB-U8O5jNOY?9$ z)WwyB&gDp2mmPJk!w{lt#YYE{R-@i44B4_{Z@Q?S6vG`pj-Q?DWXn0OTe1sj?@m z{cEt1tmcPj$A^=HX%|VNgw?!gV(KEmFmpMlCOmxa|^$_I4$4>cLbNspemv% zLaGdgNFs%$iz?zLQ6q=HID&(O<+qDb*P0N<19yxv^IAJf9R?1V1_bcYYK+~)&NCkl z^-g~@#XgkupD_3bgdqQfB|cQ7|A_vOF#fT@|A_t{EAK struct saveDispatcher{ - static void save(Tensor &tensor,const std::string &path,int filebegin=0,int fileend=-1)=delete; + static void save(Tensor &tensor,const std::string &path,int filebegin=0)=delete; }; template - void save(Tensor &tensor,const std::string &path,int filebegin=0,int fileend=-1){ - saveDispatcher::save(tensor, path, filebegin, fileend); + void save(Tensor &tensor,const std::string &path,int filebegin=0){ + saveDispatcher::save(tensor, path, filebegin); } template struct loadDispatcher{ - static Tensor load(const std::string &path,int filebegin=0,int fileend=-1)=delete; + static Tensor load(const std::string &path,int filebegin=0)=delete; }; template - Tensor load(const std::string &path,int filebegin=0,int fileend=-1){ - return loadDispatcher::load(path, filebegin, fileend); + Tensor load(const std::string &path,int filebegin=0){ + return loadDispatcher::load(path, filebegin); } } diff --git a/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp b/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp index 56c86dbd..f1570693 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp @@ -12,46 +12,46 @@ namespace deepx::tensorfunc template struct reducemaxDispatcher { - static void reducemax(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void reducemax(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) = delete; }; template - void reducemax(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void reducemax(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) { - reducemaxDispatcher::reducemax(A, axis, keepdims, B); + reducemaxDispatcher::reducemax(A, dims, B, keepdims); } template struct reduceminDispatcher { - static void reducemin(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void reducemin(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) = delete; }; template - void reducemin(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void reducemin(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) { - reduceminDispatcher::reducemin(A, axis, keepdims, B); + reduceminDispatcher::reducemin(A, dims, B, keepdims); } template struct sumDispatcher { - static void reducesum(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void reducesum(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) = delete; }; template - void sum(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void sum(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) { - sumDispatcher::sum(A, axis, keepdims, B); + sumDispatcher::sum(A, dims, B, keepdims); } template struct prodDispatcher { - static void prod(const Tensor &A, const int axis,const bool keepdims, Tensor &B) = delete; + static void prod(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) = delete; }; template - void prod(const Tensor &A, const int axis,const bool keepdims, Tensor &B) + void prod(const Tensor &A, const std::vector &dims,Tensor &B,const bool keepdims=false) { - prodDispatcher::prod(A, axis, keepdims, B); + prodDispatcher::prod(A, dims, B, keepdims); } } #endif // DEEPX_TENSORFUNC_REDUCE_HPP diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index bcd568b1..e4aa4080 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -11,7 +11,7 @@ #include "deepx/tensorfunc/io.hpp" namespace deepx::tensorfunc -{ +{ template struct printDispatcher { @@ -22,33 +22,42 @@ namespace deepx::tensorfunc // 统一分配CPU内存 unsigned char *host_data = new unsigned char[total_bytes]; - if (host_data == nullptr) { + if (host_data == nullptr) + { throw std::runtime_error("Failed to allocate host memory"); } - + // 统一复制数据到CPU cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { + if (err != cudaSuccess) + { delete[] host_data; throw std::runtime_error("Failed to copy data from device to host"); } // 对于half和bf16类型需要转换为float - if (t.shape.dtype == Precision::Float16 || t.shape.dtype == Precision::BFloat16) { - float* host_float = new float[t.shape.size]; - if (host_float == nullptr) { + if (t.shape.dtype == Precision::Float16 || t.shape.dtype == Precision::BFloat16) + { + float *host_float = new float[t.shape.size]; + if (host_float == nullptr) + { delete[] host_data; throw std::runtime_error("Failed to allocate host memory for float conversion"); } // 在CPU上进行类型转换 - if (t.shape.dtype == Precision::Float16) { - for(size_t i = 0; i < t.shape.size; i++) { - host_float[i] = __half2float(((half*)host_data)[i]); + if (t.shape.dtype == Precision::Float16) + { + for (size_t i = 0; i < t.shape.size; i++) + { + host_float[i] = __half2float(((half *)host_data)[i]); } - } else { // BFloat16 - for(size_t i = 0; i < t.shape.size; i++) { - host_float[i] = __bfloat162float(((nv_bfloat16*)host_data)[i]); + } + else + { // BFloat16 + for (size_t i = 0; i < t.shape.size; i++) + { + host_float[i] = __bfloat162float(((nv_bfloat16 *)host_data)[i]); } } @@ -56,7 +65,8 @@ namespace deepx::tensorfunc stdutil::print(t.shape.shape, host_float, Precision::Float32, f.empty() ? "%.4f" : f); delete[] host_float; } - else { + else + { // 其他类型直接打印 stdutil::print(t.shape.shape, host_data, t.shape.dtype, f); } @@ -64,6 +74,64 @@ namespace deepx::tensorfunc delete[] host_data; } }; -} + template + struct saveDispatcher + { + static void save(Tensor &tensor, const std::string &path, int filebegin = 0) + { + // 保存shape + std::string shapepath = path + ".shape"; + std::string shapedata = tensor.shape.toYaml(); + std::ofstream shape_fs(shapepath, std::ios::binary); + shape_fs.write(shapedata.c_str(), shapedata.size()); + shape_fs.close(); + + // 保存data + std::string datapath = path + ".data"; + std::ofstream data_fs(datapath, std::ios::binary | std::ios::in | std::ios::out); + + if (!data_fs.is_open()) + { + // 如果文件不存在,则创建新文件 + data_fs.open(datapath, std::ios::binary | std::ios::out); + } + data_fs.seekp(filebegin); + data_fs.write(reinterpret_cast(tensor.data), tensor.shape.size * sizeof(T)); + data_fs.close(); + } + }; + template + struct loadDispatcher + { + static Tensor load(const std::string &path, int filebegin = 0) + { + // 加载shape + std::string shapepath = path + ".shape"; + std::ifstream shape_fs(shapepath, std::ios::binary); + std::string shapedata((std::istreambuf_iterator(shape_fs)), std::istreambuf_iterator()); + + Shape shape; + shape.fromYaml(shapedata); + shape_fs.close(); + + // 加载data + Tensor tensor = New(shape); + std::string datapath = path + ".data"; + std::ifstream data_fs(datapath, std::ios::binary); + + if (!data_fs.is_open()) + { + throw std::runtime_error("无法打开数据文件: " + datapath); + } + + // 设置读取位置 + data_fs.seekg(filebegin); + data_fs.read(reinterpret_cast(tensor.data), shape.size * sizeof(T)); + data_fs.close(); + + return tensor; + } + }; +} #endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce.hpp deleted file mode 100644 index 8a39fccf..00000000 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef DEEPX_TENSORFUNC_REDUCE_HPP -#define DEEPX_TENSORFUNC_REDUCE_HPP - -#include -#include -#include -#include - -#include "deepx/tensor.hpp" -#include "deepx/shape_reduce.hpp" -#include "deepx/tensorfunc/init.hpp" - -namespace deepx::tensorfunc -{ - - template - void sum(const Tensor &tensor, const std::vector &dims, Tensor &result); - - - template - void product(const Tensor &tensor, const std::vector &dims, Tensor &result); -} -#endif \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp new file mode 100644 index 00000000..7b2a9f58 --- /dev/null +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp @@ -0,0 +1,68 @@ +#ifndef DEEPX_TENSORFUNC_REDUCE_MIAOBYTE_HPP +#define DEEPX_TENSORFUNC_REDUCE_MIAOBYTE_HPP + +#include +#include +#include + +#include "deepx/tensor.hpp" +#include "deepx/shape_reduce.hpp" +#include "deepx/tensorfunc/authors.hpp" +#include + +#include "deepx/tensorfunc/reduce.hpp" + +namespace deepx::tensorfunc +{ + + template < typename T> + struct reducemaxDispatcher + { + static void reducemax(const Tensor &A, const std::vector &dims, Tensor &B,const bool keepdims) { + if (axis < 0) { + axis += A.shape.dim; + } + if (axis >= A.shape.dim) { + throw std::invalid_argument("Invalid axis for reducemax"); + } + + } + }; + + + template < typename T> + struct reduceminDispatcher + { + static void reducemin(const Tensor &A, const std::vector &dims, Tensor &B,const bool keepdims) { + if (axis < 0) { + axis += A.shape.dim; + } + if (axis >= A.shape.dim) { + throw std::invalid_argument("Invalid axis for reducemin"); + } + + } + }; + + + template + struct sumDispatcher + { + static void sum(const Tensor &tensor, const std::vector &dims, Tensor &result,const bool keepdims) + { + + + } + }; + + + template + struct prodDispatcher + { + static void prod(const Tensor &tensor, const std::vector &dims, Tensor &result,const bool keepdims) + { + + } + }; +} +#endif DEEPX_TENSORFUNC_REDUCE_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/file.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/file.hpp deleted file mode 100644 index 5bfea0e2..00000000 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/file.hpp +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef DEEPX_TENSORFUNC_FILE_HPP -#define DEEPX_TENSORFUNC_FILE_HPP - -#include -#include - -#include -namespace deepx::tensorfunc -{ - template - void save(Tensor &tensor,const std::string &path) - { - std::string shapepath = path + ".shape"; - std::string shapedata = tensor.shape.toYaml(); - std::ofstream shape_fs(shapepath, std::ios::binary); - shape_fs.write(shapedata.c_str(), shapedata.size()); - shape_fs.close(); - - std::string datapath = path + ".data"; - std::ofstream data_fs(datapath, std::ios::binary); - data_fs.write(reinterpret_cast(tensor.data), tensor.shape.size * sizeof(T)); - data_fs.close(); - } - template - Tensor load(const std::string &path) - { - - std::string shapepath = path + ".shape"; - std::ifstream shape_fs(shapepath, std::ios::binary); - std::string shapedata((std::istreambuf_iterator(shape_fs)), std::istreambuf_iterator()); - - Shape shape; - shape.fromYaml(shapedata); - shape_fs.close(); - - Tensor tensor=New(shape); - std::string datapath = path + ".data"; - std::ifstream data_fs(datapath, std::ios::binary); - - data_fs.read(reinterpret_cast(tensor.data), shape.size * sizeof(T)); - data_fs.close(); - - - return tensor; - } -} - -#endif \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp index 1a30bf40..e6feebcc 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp @@ -8,6 +8,7 @@ #include "stdutil/print.hpp" #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/io.hpp" +#include "deepx/tensorfunc/new.hpp" namespace deepx::tensorfunc { @@ -34,5 +35,64 @@ namespace deepx::tensorfunc stdutil::print(t.shape.shape, t.data, t.shape.dtype, f); } }; + + template + struct saveDispatcher + { + static void save(Tensor &tensor, const std::string &path, int filebegin = 0) + { + // 保存shape + std::string shapepath = path + ".shape"; + std::string shapedata = tensor.shape.toYaml(); + std::ofstream shape_fs(shapepath, std::ios::binary); + shape_fs.write(shapedata.c_str(), shapedata.size()); + shape_fs.close(); + + // 保存data + std::string datapath = path + ".data"; + std::ofstream data_fs(datapath, std::ios::binary | std::ios::in | std::ios::out); + + if (!data_fs.is_open()) + { + // 如果文件不存在,则创建新文件 + data_fs.open(datapath, std::ios::binary | std::ios::out); + } + data_fs.seekp(filebegin); + data_fs.write(reinterpret_cast(tensor.data), tensor.shape.size * sizeof(T)); + data_fs.close(); + } + }; + template + struct loadDispatcher + { + static Tensor load(const std::string &path, int filebegin = 0) + { + // 加载shape + std::string shapepath = path + ".shape"; + std::ifstream shape_fs(shapepath, std::ios::binary); + std::string shapedata((std::istreambuf_iterator(shape_fs)), std::istreambuf_iterator()); + + Shape shape; + shape.fromYaml(shapedata); + shape_fs.close(); + + // 加载data + Tensor tensor = New(shape); + std::string datapath = path + ".data"; + std::ifstream data_fs(datapath, std::ios::binary); + + if (!data_fs.is_open()) + { + throw std::runtime_error("无法打开数据文件: " + datapath); + } + + // 设置读取位置 + data_fs.seekg(filebegin); + data_fs.read(reinterpret_cast(tensor.data), shape.size * sizeof(T)); + data_fs.close(); + + return tensor; + } + }; } #endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce.hpp deleted file mode 100644 index aebd110a..00000000 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce.hpp +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef DEEPX_TENSORFUNC_REDUCE_HPP -#define DEEPX_TENSORFUNC_REDUCE_HPP - -#include -#include -#include -#include -#include - -#include "deepx/tensor.hpp" -#include "deepx/shape_reduce.hpp" -#include "deepx/tensorfunc/init_miaobyte.hpp" - -namespace deepx::tensorfunc -{ - using namespace hwy::HWY_NAMESPACE; - - template - void sum(const Tensor &tensor, const std::vector &dims, Tensor &result) - { - constant(result,T(0)); - - std::vector sorted_dims = dims; - if (dims.size()==0){ - sorted_dims=arrange(tensor.shape.dim); - } - // 从大到小排序 - std::sort(sorted_dims.begin(), sorted_dims.end(), std::greater()); - std::vector sumMap = reduceDimMap(tensor.shape, sorted_dims); - // 如果dims的最后一个元素是tensor.shape.dim-1,则说明求和的数据不连续(不对齐),无法simd(需要不停跳跃) - - const ScalableTag _tag; - size_t minshape_1=Lanes(_tag); - // if (true) - if (sorted_dims.rbegin()[0] == tensor.shape.dim - 1 - ||tensor.shape.dim>sorted_dims.size() - ||tensor.shape[-1]>=minshape_1 - ) - { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) - { - // 计算输出索引 - - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { - if (sumMap[i]==0) { - newIndices[j++]=indices[i]; - } - } - // 累加求和 - int outputIdx=result.shape.linearat(newIndices); -#pragma omp atomic - result.data[outputIdx]+=tensor.data[idx_linear]; }, result.shape.dim); - } - else - { - //这里有bug,todo - // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) - { - // 计算输出索引 - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { - if (sumMap[i]==0) { - newIndices[j++]=indices[i]; - } - } - int outputIdx = result.shape.linearat(newIndices); - - - int shape_last = tensor.shape[-1]; - const ScalableTag tag; - const size_t lanes = Lanes(tag); - size_t j = 0; - T sum=0; - // 前部分:处理到对齐 - while (j < shape_last && !IsAligned(tag, tensor.data + idx_linear + j)) - { - sum+=tensor.data[idx_linear + j]; - ++j; - } - - // 中间部分:SIMD - size_t aligned_end = shape_last - (shape_last % lanes); - auto sum_vec = Zero(tag); // 初始化累加向量为0 - for (; j + lanes <= aligned_end; j += lanes) - { - auto vec = Load(tag, tensor.data + idx_linear + j); - sum_vec = Add(sum_vec, vec); // 向量累加 - } - -// 将向量累加结果写回 - sum+= ReduceSum(tag, sum_vec); // 使用ReduceSum替代GetLane(SumOfLane()) - - - // 尾部分:处理剩余 - for (; j < shape_last; ++j) - { - sum+=tensor.data[idx_linear + j]; - } - #pragma omp atomic - result.data[outputIdx]+=sum; - - }, result.shape.dim); - } - } - - template - void product(const Tensor &tensor, const std::vector &dims, Tensor &result) - { - - std::vector sorted_dims = dims; - if (dims.size()==0){ - sorted_dims=arrange(tensor.shape.dim); - } - // 从大到小排序 - std::sort(sorted_dims.begin(), sorted_dims.end(), std::greater()); - std::vector sumMap = reduceDimMap(tensor.shape, sorted_dims); - // 如果dims的最后一个元素是tensor.shape.dim-1,则说明求和的数据不连续(不对齐),无法simd(需要不停跳跃) - constant(result,T(1)); - if (sorted_dims.at(sorted_dims.size() - 1) == tensor.shape.dim - 1&&tensor.shape.dim>sorted_dims.size()) - { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) - { - // 计算输出索引 - - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { - if (sumMap[i]==0) { - newIndices[j++]=indices[i]; - } - } - // 累加求和 - int outputIdx=result.shape.linearat(newIndices); -#pragma omp atomic - result.data[outputIdx]*=tensor.data[idx_linear]; }, result.shape.dim); - } - else - { - // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &sumMap](const int i, const std::vector &indices, std::vector &newIndices) - { - // 计算输出索引 - - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { - if (sumMap[i]==0) { - newIndices[j++]=indices[i]; - } - } - // 累加求和 - int outputIdx = result.shape.linearat(newIndices); - - - int shape_last = tensor.shape[-1]; - const ScalableTag tag; - const size_t lanes = Lanes(tag); - size_t j = 0; - T product=1; - // 前部分:处理到对齐 - while (j < shape_last && !IsAligned(tag, tensor.data + i + j)) - { - product*=tensor.data[i + j]; - ++j; - } - - // 中间部分:SIMD - size_t aligned_end = shape_last - (shape_last % lanes); - auto product_vec = One(tag); // 初始化累乘向量为1 - for (; j + lanes <= aligned_end; j += lanes) - { - auto vec = Load(tag, tensor.data + i + j); - product_vec = Mul(product_vec, vec); // 向量累乘 - } - -// 将向量累乘结果写回 - product*= ReduceMul(tag, product_vec); - - - // 尾部分:处理剩余 - for (; j < shape_last; ++j) - { - product*=tensor.data[i + j]; - } - #pragma omp atomic - result.data[outputIdx]*=product; - - }, result.shape.dim); - } - } -} -#endif \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp new file mode 100644 index 00000000..3eb77628 --- /dev/null +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp @@ -0,0 +1,190 @@ +#ifndef DEEPX_TENSORFUNC_REDUCE_MIAOBYTE_HPP +#define DEEPX_TENSORFUNC_REDUCE_MIAOBYTE_HPP + +#include +#include +#include +#include +#include + +#include "deepx/tensor.hpp" +#include "deepx/shape_reduce.hpp" +#include "deepx/tensorfunc/reduce.hpp" +#include "deepx/tensorfunc/init_miaobyte.hpp" + +namespace deepx::tensorfunc +{ + using namespace hwy::HWY_NAMESPACE; + + template + struct sumDispatcher + { + static void sum(const Tensor &tensor, const std::vector &dims, Tensor &result,const bool keepdims) + { + constant(result, T(0)); + + std::vector sorted_dims = dims; + if (dims.size() == 0) + { + sorted_dims = arrange(tensor.shape.dim); + } + // 从大到小排序 + std::sort(sorted_dims.begin(), sorted_dims.end(), std::greater()); + std::vector sumMap = reduceDimMap(tensor.shape, sorted_dims); + // 如果dims的最后一个元素是tensor.shape.dim-1,则说明求和的数据不连续(不对齐),无法simd(需要不停跳跃) + + const ScalableTag _tag; + size_t minshape_1 = Lanes(_tag); + // if (true) + if (sorted_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > sorted_dims.size() || tensor.shape[-1] >= minshape_1) + { + tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + + for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { + if (sumMap[i]==0) { + newIndices[j++]=indices[i]; + } + } + // 累加求和 + int outputIdx=result.shape.linearat(newIndices); +#pragma omp atomic + result.data[outputIdx]+=tensor.data[idx_linear]; }, result.shape.dim); + } + else + { + // 这里有bug,todo + // 如果数据连续(对齐),则可以simd + tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + { + if (sumMap[i] == 0) + { + newIndices[j++] = indices[i]; + } + } + int outputIdx = result.shape.linearat(newIndices); + + int shape_last = tensor.shape[-1]; + const ScalableTag tag; + const size_t lanes = Lanes(tag); + size_t j = 0; + T sum = 0; + // 前部分:处理到对齐 + while (j < shape_last && !IsAligned(tag, tensor.data + idx_linear + j)) + { + sum += tensor.data[idx_linear + j]; + ++j; + } + + // 中间部分:SIMD + size_t aligned_end = shape_last - (shape_last % lanes); + auto sum_vec = Zero(tag); // 初始化累加向量为0 + for (; j + lanes <= aligned_end; j += lanes) + { + auto vec = Load(tag, tensor.data + idx_linear + j); + sum_vec = Add(sum_vec, vec); // 向量累加 + } + + // 将向量累加结果写回 + sum += ReduceSum(tag, sum_vec); // 使用ReduceSum替代GetLane(SumOfLane()) + + // 尾部分:处理剩余 + for (; j < shape_last; ++j) + { + sum += tensor.data[idx_linear + j]; + } +#pragma omp atomic + result.data[outputIdx] += sum; }, result.shape.dim); + } + } + }; + + template + struct prodDispatcher + { + static void prod(const Tensor &tensor, const std::vector &dims, Tensor &result,const bool keepdims) + { + + std::vector sorted_dims = dims; + if (dims.size() == 0) + { + sorted_dims = arrange(tensor.shape.dim); + } + // 从大到小排序 + std::sort(sorted_dims.begin(), sorted_dims.end(), std::greater()); + std::vector sumMap = reduceDimMap(tensor.shape, sorted_dims); + // 如果dims的最后一个元素是tensor.shape.dim-1,则说明求和的数据不连续(不对齐),无法simd(需要不停跳跃) + constant(result, T(1)); + if (sorted_dims.at(sorted_dims.size() - 1) == tensor.shape.dim - 1 && tensor.shape.dim > sorted_dims.size()) + { + tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + + for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { + if (sumMap[i]==0) { + newIndices[j++]=indices[i]; + } + } + // 累加求和 + int outputIdx=result.shape.linearat(newIndices); +#pragma omp atomic + result.data[outputIdx]*=tensor.data[idx_linear]; }, result.shape.dim); + } + else + { + // 如果数据连续(对齐),则可以simd + tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &sumMap](const int i, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + + for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + { + if (sumMap[i] == 0) + { + newIndices[j++] = indices[i]; + } + } + // 累加求和 + int outputIdx = result.shape.linearat(newIndices); + + int shape_last = tensor.shape[-1]; + const ScalableTag tag; + const size_t lanes = Lanes(tag); + size_t j = 0; + T product = 1; + // 前部分:处理到对齐 + while (j < shape_last && !IsAligned(tag, tensor.data + i + j)) + { + product *= tensor.data[i + j]; + ++j; + } + + // 中间部分:SIMD + size_t aligned_end = shape_last - (shape_last % lanes); + auto product_vec = One(tag); // 初始化累乘向量为1 + for (; j + lanes <= aligned_end; j += lanes) + { + auto vec = Load(tag, tensor.data + i + j); + product_vec = Mul(product_vec, vec); // 向量累乘 + } + + // 将向量累乘结果写回 + product *= ReduceMul(tag, product_vec); + + // 尾部分:处理剩余 + for (; j < shape_last; ++j) + { + product *= tensor.data[i + j]; + } +#pragma omp atomic + result.data[outputIdx] *= product; }, result.shape.dim); + } + } + }; +} +#endif \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp index 8a7f34ca..05a0df43 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp @@ -5,9 +5,8 @@ #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" -#include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" -#include "deepx/tensorfunc/file.hpp" +#include "deepx/tensorfunc/io_miaobyte.hpp" using namespace deepx; using namespace deepx::tensorfunc; @@ -15,11 +14,11 @@ void test_tensor_new(){ Tensor tensor=New({2, 3}); constant(tensor,1); print(tensor); - save(tensor,"tensor"); + save(tensor,"tensor"); Tensor tensor2=New({2, 3}); constant(tensor2,2); print(tensor2); - save(tensor2,"tensor2"); + save(tensor2,"tensor2"); } void test_arange() { diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp index 492d4e86..b1d5dcb8 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp @@ -5,7 +5,7 @@ #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensorfunc/io_miaobyte.hpp" -#include "deepx/tensorfunc/file.hpp" +#include "deepx/tensorfunc/authors.hpp" using namespace deepx; @@ -14,11 +14,11 @@ void test_tensor_range(){ Tensor tensor=New({2, 3}); constant(tensor,1); print(tensor); - save(tensor,"2_tensor_range.1"); + save(tensor,"2_tensor_range.1"); Tensor tensor2=New({2, 3}); constant(tensor2,2); print(tensor2); - save(tensor2,"2_tensor_range.2"); + save(tensor2,"2_tensor_range.2"); } int main(){ diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp index ec82a0c9..6657ecce 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp @@ -3,11 +3,11 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/new.hpp" -#include "deepx/tensorfunc/file.hpp" +#include "deepx/tensorfunc/authors.hpp" int main(){ deepx::Tensor t=deepx::tensorfunc::New({2, 3,4}); std::iota(t.data, t.data+t.shape.size, 0); deepx::tensorfunc::print(t); - deepx::tensorfunc::save(t,"3_tensor_print"); + deepx::tensorfunc::save(t,"3_tensor_print"); return 0; } \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp index 16072397..0b1b791e 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp @@ -12,8 +12,7 @@ #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" #include "deepx/shape_matmul.hpp" -#include "deepx/tensorfunc/file.hpp" - + using namespace deepx; using namespace deepx::tensorfunc; /* @@ -50,10 +49,10 @@ void test_tensor_matmul(){ void bench_tensor_matmul(int i) { Tensor tensor= New({i,i}); uniform(tensor,0,1); - save(tensor,"4_tensor_matmul"+std::to_string(i)+"tensor"); + save(tensor,"4_tensor_matmul"+std::to_string(i)+"tensor"); Tensor tensor2= New({i,i}); uniform(tensor2,0,1); - save(tensor2,"4_tensor_matmul"+std::to_string(i)+"tensor2"); + save(tensor2,"4_tensor_matmul"+std::to_string(i)+"tensor2"); Tensor tensor3= New(matmul_shape(tensor.shape, tensor2.shape).shape); std::cout<<("matmul ", i, "x", i); auto start = std::chrono::high_resolution_clock::now(); @@ -61,7 +60,7 @@ void bench_tensor_matmul(int i) { matmul(tensor, tensor2, tensor3); auto end=std::chrono::high_resolution_clock::now(); std::chrono::duration duration = end - start; - save(tensor3,"4_tensor_matmul"+std::to_string(i)+"result"); + save(tensor3,"4_tensor_matmul"+std::to_string(i)+"result"); std::cout << "time:" << duration.count() << " seconds" << std::endl; } diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp index c1ce026a..02fc55ef 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp @@ -5,14 +5,14 @@ #include #include "deepx/tensor.hpp" -#include "deepx/tensorfunc/reduce.hpp" +#include "deepx/tensorfunc/reduce_miaobyte.hpp" #include "stdutil/vector.hpp" #include "deepx/vector_combination.hpp" #include "deepx/shape_reduce.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensorfunc/io_miaobyte.hpp" -#include "deepx/tensorfunc/file.hpp" +#include "deepx/tensorfunc/authors.hpp" #include @@ -33,7 +33,7 @@ void test_sum() std::cout <<"sum(t,"<< comb <<")"<< std::endl; Shape sumshape=reduceShape(shape,comb); Tensor r = New(sumshape.shape); - sum(tensor, comb, r); + sum(tensor, comb,r); print(r); } /* @@ -59,8 +59,8 @@ void benchmark_sum(int i){ { Shape sShape = reduceShape(shape, comb); Tensor r=New(sShape.shape); - sum(tensor, comb,r); - save(r,"5_tensor_sum"+std::to_string(i)+"result"); + sum(tensor, comb,r); + save(r,"5_tensor_sum"+std::to_string(i)+"result"); } auto end=std::chrono::high_resolution_clock::now(); std::chrono::duration duration = end - start; From 1496b75bc1caeddf83537828bb9dec114637ce02 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 8 Apr 2025 22:20:03 +0800 Subject: [PATCH 4/5] doc: --- doc/excuter/deepx.op.drawio.svg | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/excuter/deepx.op.drawio.svg diff --git a/doc/excuter/deepx.op.drawio.svg b/doc/excuter/deepx.op.drawio.svg new file mode 100644 index 00000000..f6fe1fd0 --- /dev/null +++ b/doc/excuter/deepx.op.drawio.svg @@ -0,0 +1,4 @@ + + + +
excuter 
ompsimd
tensorfunc<T>
tensorfunc 特化
TF
cpp-common
excuter 
cuda
TFfactory
List
init.hpp
elementwise.hpp
matmul.hpp

io.hpp

reduce.hpp

changeshape.hpp
authora
init_authora.hpp
elementwise_authora.hpp
matmul_authora.hpp

io_authora.hpp

reduce_authora.hpp

changeshape_authora.hpp
authorb
matmul_authora.hpp

io_authora.hpp

reduce_authora.hpp

changeshape_authora.hpp
TF
+ name
+ args
+ returns
template author 特化
%3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20style%3D%22edgeStyle%3DorthogonalEdgeStyle%3Brounded%3D0%3BorthogonalLoop%3D1%3BjettySize%3Dauto%3Bhtml%3D1%3BentryX%3D0%3BentryY%3D0.123%3BentryDx%3D0%3BentryDy%3D0%3BentryPerimeter%3D0%3BexitX%3D1.006%3BexitY%3D0.145%3BexitDx%3D0%3BexitDy%3D0%3BexitPerimeter%3D0%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22-332%22%20y%3D%22283%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22-150%22%20y%3D%22280%22%20as%3D%22targetPoint%22%2F%3E%3CArray%20as%3D%22points%22%3E%3CmxPoint%20x%3D%22-332%22%20y%3D%22280%22%2F%3E%3C%2FArray%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22template%20author%20%E7%89%B9%E5%8C%96%22%20style%3D%22edgeLabel%3Bhtml%3D1%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3Bresizable%3D0%3Bpoints%3D%5B%5D%3B%22%20vertex%3D%221%22%20connectable%3D%220%22%20parent%3D%222%22%3E%3CmxGeometry%20x%3D%220.4298%22%20y%3D%221%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22-37%22%20y%3D%221%22%20as%3D%22offset%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphModel%3E
%3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20style%3D%22edgeStyle%3DorthogonalEdgeStyle%3Brounded%3D0%3BorthogonalLoop%3D1%3BjettySize%3Dauto%3Bhtml%3D1%3BentryX%3D0%3BentryY%3D0.123%3BentryDx%3D0%3BentryDy%3D0%3BentryPerimeter%3D0%3BexitX%3D1.006%3BexitY%3D0.145%3BexitDx%3D0%3BexitDy%3D0%3BexitPerimeter%3D0%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22-332%22%20y%3D%22283%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22-150%22%20y%3D%22280%22%20as%3D%22targetPoint%22%2F%3E%3CArray%20as%3D%22points%22%3E%3CmxPoint%20x%3D%22-332%22%20y%3D%22280%22%2F%3E%3C%2FArray%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22template%20author%20%E7%89%B9%E5%8C%96%22%20style%3D%22edgeLabel%3Bhtml%3D1%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3Bresizable%3D0%3Bpoints%3D%5B%5D%3B%22%20vertex%3D%221%22%20connectable%3D%220%22%20parent%3D%222%22%3E%3CmxGeometry%20x%3D%220.4298%22%20y%3D%221%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22-37%22%20y%3D%221%22%20as%3D%22offset%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphModel%3E
template author 精度特化
Add:TF
+ run override
Sub:TF
+ run override
\ No newline at end of file From d2c88582022d7329ca60228273b2d4fc865e894c Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 8 Apr 2025 22:26:59 +0800 Subject: [PATCH 5/5] doc: --- doc/excuter/excuter.md | 9 ++++++++- src/deepx/tensorfunc/changeshape_miaobyte.cu | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) delete mode 100644 src/deepx/tensorfunc/changeshape_miaobyte.cu diff --git a/doc/excuter/excuter.md b/doc/excuter/excuter.md index 700a60bd..0e604e04 100644 --- a/doc/excuter/excuter.md +++ b/doc/excuter/excuter.md @@ -60,9 +60,16 @@ todo #### 4.front对接测试 -1.先启动excuter +1.先启动excuter可执行文件, 位于excuter/op-mem-{cuda/ompsimd}/build,可执行文件名同excuter名 2.然后测试front中py的对应算子脚本(front/py/examples 目录) +可以按照顺序,以此测试 + +1_tensor + +2_ir + +3_functional diff --git a/src/deepx/tensorfunc/changeshape_miaobyte.cu b/src/deepx/tensorfunc/changeshape_miaobyte.cu deleted file mode 100644 index 0519ecba..00000000 --- a/src/deepx/tensorfunc/changeshape_miaobyte.cu +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file