diff --git a/.github/workflows/executor-cppcommon.yml b/.github/workflows/executor-deepxcore.yml similarity index 86% rename from .github/workflows/executor-cppcommon.yml rename to .github/workflows/executor-deepxcore.yml index da4b2be9..dcbe9ab1 100644 --- a/.github/workflows/executor-cppcommon.yml +++ b/.github/workflows/executor-deepxcore.yml @@ -2,10 +2,10 @@ name: Excuter/cppcommon Build on: push: paths: - - 'executor/cpp-common/**' + - 'executor/deepxcore/**' pull_request: paths: - - 'executor/cpp-common/**' + - 'executor/deepxcore/**' env: HIGHWAY_VERSION: 1.2.0 @@ -48,16 +48,16 @@ jobs: uses: actions/cache@v3 with: path: | - executor/cpp-common/build + executor/deepxcore/build ~/.ccache key: ${{ runner.os }}-build-${{ hashFiles('**/CMakeLists.txt') }} restore-keys: | ${{ runner.os }}-build- - # 构建 cpp-common 库 - - name: Build Common Library + # 构建 deepxcore 库 + - name: Build deepx core Library run: | - cd executor/cpp-common + cd executor/deepxcore mkdir -p build && cd build cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .. cmake --build . --config Release -j$(nproc) diff --git a/.github/workflows/executor-memcuda.yml b/.github/workflows/executor-heapmemcuda.yml similarity index 96% rename from .github/workflows/executor-memcuda.yml rename to .github/workflows/executor-heapmemcuda.yml index a5b43356..bd3119da 100644 --- a/.github/workflows/executor-memcuda.yml +++ b/.github/workflows/executor-heapmemcuda.yml @@ -2,10 +2,10 @@ name: op/cuda-linux Build on: push: paths: - - 'executor/mem-cuda/**' + - 'executor/heapmem-cuda/**' pull_request: paths: - - 'executor/mem-cuda/**' + - 'executor/heapmem-cuda/**' env: CUDA_VERSION: "12.9.1" CUDA_MAJOR_VERSION: "12" @@ -69,7 +69,7 @@ jobs: # 构建 CUDA 执行器 apt install -y libhiredis-dev && \ - cd ../../mem-cuda && \ + cd ../../heapmem-cuda && \ mkdir -p build && cd build && \ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ diff --git a/.github/workflows/executor-op-cuda-linux.yml b/.github/workflows/executor-op-cuda-linux.yml index 6f7b81b8..41cfa4a6 100644 --- a/.github/workflows/executor-op-cuda-linux.yml +++ b/.github/workflows/executor-op-cuda-linux.yml @@ -61,8 +61,8 @@ jobs: cp -r include/* /usr/local/include/ && \ cd /workspace && \ - # 构建 common 库 - cd executor/cpp-common && \ + # 构建 deepxcore 库 + cd executor/deepxcore && \ mkdir -p build && cd build && \ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -GNinja .. && \ ninja && \ @@ -72,9 +72,7 @@ jobs: mkdir -p build && cd build && \ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCMAKE_CUDA_ARCHITECTURES=\"60;70;75;80;86\" \ - -DCUTLASS_DIR=/usr/local \ -GNinja .. && \ ninja " \ No newline at end of file diff --git a/.github/workflows/executor-op-ompsimd-linux.yml b/.github/workflows/executor-op-ompsimd-linux.yml index e94ff0ee..414be7ed 100644 --- a/.github/workflows/executor-op-ompsimd-linux.yml +++ b/.github/workflows/executor-op-ompsimd-linux.yml @@ -48,8 +48,8 @@ jobs: uses: actions/cache@v3 with: path: | - executor/op-mem-ompsimd/build - executor/cpp-common/build + executor/op-ompsimd/build + executor/deepxcore/build ~/.ccache key: ${{ runner.os }}-build-${{ hashFiles('**/CMakeLists.txt') }} restore-keys: | @@ -58,9 +58,6 @@ jobs: # Highway 库安装 - name: Install Highway (Ubuntu) run: | - # 安装依赖 - sudo apt-get install -y libgtest-dev - # 克隆 Highway git clone --depth 1 --branch ${HIGHWAY_VERSION} https://github.com/google/highway.git cd highway @@ -81,10 +78,10 @@ jobs: # 确保头文件正确安装 sudo cp -r ../hwy /usr/local/include/ - # 构建 cpp-common 库 - - name: Build Common Library + # 构建 deepxcore 库 + - name: Build deepxcore Library run: | - cd executor/cpp-common + cd executor/deepxcore mkdir -p build && cd build cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .. cmake --build . --config Release -j$(nproc) @@ -92,7 +89,7 @@ jobs: # 构建执行器 - name: CMake Build run: | - cd executor/op-mem-ompsimd + cd executor/op-ompsimd mkdir -p build && cd build cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .. cmake --build . --config Release -j$(nproc) \ No newline at end of file diff --git a/README.md b/README.md index bbda77d9..ab104c08 100644 --- a/README.md +++ b/README.md @@ -1,81 +1,64 @@ # deepx ## 一.deepx概述 -deepx一种以IR计算图为核心的原生分布式自动并行的训推一体化的深度学习框架,以IR计算图为核心,经过多层级等价替换,实现从简单的数学形式的计算图,自适应等价替换为分布式、并行、自动反向的工程系统架构。 +deepx一种以统一存算面为底座的深度学习计算框架,模型函数可以通过简单的数学形式进行表达为计算图,计算图可以写入到统一存算面的存储空间,被统一存算面的调度执行器自动执行,你也可以按照统一存算面的协议,实现自己的执行器,例如dataloader的数据处理执行器是训练用户必须自己实现的。 -deepx的分前中后端,分别是为前端表达侧,编译替换调度层,执行器层,遵守严格的进程间与代码组件的隔离,以保证分工明确,架构长久稳定。 +## 二.前端与统一存算面 -+ 前端/模型表达侧,交由算法工程师、用接近数学的表达方式,设计其数学计算过程。只表示为单线程的简洁数学表达过程,不涉及复杂的device类型、分布式等。 -+ 中端:编译替换与分布式调度层:注册了多轮不同类型的IR编译器,实现等价替换,可以以插件的形式增加自定义能力如定制kvcache,实现对计算图进行局部替换,获得新的能力。 -+ 执行器层:绑定具体的加速硬件,实现真正的tensor的储存、计算、网络通信,大规模并行化。 +deepx可以划分为前端与统一存算面(中端与后端),分别是为 ++ 前端 + * 模型表达api + * 统一存算面sdk ++ 统一存算面 + 中端:编译替换调度层。对数学表达式进行等价编译替换,以匹配最优的后端kernel。 + 后端:执行器层:遵守严格的进程间与代码组件的隔离,以保证分工明确,架构长久稳定。 -### 前端 +### 前端-模型表达侧 + 面向算法工程师、用接近数学的表达方式,设计其数学计算过程。只表示为单线程的简洁数学表达过程。不容许涉及复杂的device类型、分布式概念等。 + tensor相关的函数表达式,和pytorch/numpy的api风格接近 + deepx前端会输出计算图的IR序列,传递给统一存算面 -python sdk提供接近pytorch的API -也容许其他语言的sdk接入, +### 前端-统一存算面sdk + 面向算法和infra工程师,可以通过deepx存算协议标准,实现自己的调度器、执行器。 + dataloader程序负责向存算面注册新的input tensor序列,因此依赖deepx sdk -+ IR通信调度。不同于pytorch或其他py+bind c++这种单一进程的栈上函数调度执行的方式。deepx各个程序(如front的python sdk,back的计算图编译器优化器、executor如ompsimd)之间,通过IR实现网络通信调度,需要各自启动对应进程。 +### 统一存算面:通过统一的kv寻址空间,组织分散的gpu算力与tensor。 +统一存算面可以被看作一个原生的分布式tensor计算解释器。 +统一存算面把分布式的gpu集群,抽象为统一的计算与存储平面,具体而言,就是存储了tensor元信息、计算图。但存算面本身,目前只利用了redis的kv功能。tensor的真正存储依然在gpu显存、内存、磁盘上,真正的计算,也是由执行器进程通过gpu/cpu去进行计算。但统一存算面的作用在于,抽象了tensor相关的计算图与存储表示,为tensor编程,开辟了新的上层表达语言,从而可以完全和复杂的底层工程代码隔离。 -| 维度 | PyTorch类框架 | DeepX | -|--------------|-----------------------|-------------------------| -| 执行模式 | 单进程内函数栈调度 | 多进程分布式协同 | -| 通信方式 | 内存直接访问 | IR网络计算调度协议交换 | -| 组件耦合度 | 紧耦合(Python绑定C++)| 松耦合| -| tensor生命周期管理 | 由python侧控制 | 由deltensor这个IR指令,显示管理tensor| -### 中端:编译替换与分布式调度层 + + 统一寻址空间 + * 当前采用redis存储tensor元信息,配合heapmem进程,负责管理堆tensor的生命周期。 + + + 调度层:编译替换与分布式调度层:注册了多轮不同类型的IR编译器,实现等价替换,可以以插件的形式增加自定义能力如定制kvcache,实现对计算图进行局部替换,获得新的能力。 + * 算子注册: 收集当前已就绪的执行器的算子列表,收集算子时耗和存储占用信息。计算图编译器优化器:fusion算子,计算图节点消除,自动生成tensor拆分并行的计算子图并替代原节点 + * 反向传播推导引擎:深度学习模型,如pytorch框架,通常只需要定义模型前向过程,反向的计算图是通过pytorch自动实现的。deepx同理。 + * 执行调度器:负责数据并行,流水线并行(前向反向并行),模型并行 -+ 注册中心:收集当前已就绪的执行器的算子列表,收集算子时耗和空间占用信息 -+ 计算图编译器优化器:fusion算子,计算图节点消除,自动生成tensor拆分并行的计算子图并替代原节点 -+ 执行调度器:数据并行,流水线并行(前向反向并行),模型并行。 -+ front生成基础IR,编译器负责进行fusion成executor注册的高级算子。 + + 执行器层:绑定具体的加速硬件,实现真正的tensor的储存、计算、网络通信,大规模并行化。 + * heapmem-cuda:实现了nv平台的tensor生命周期管理,是统一寻址空间中的tensor的具体实现。 + 当我们在统一寻址空间删除一个key对应的tensor,实际的tensor会通过heapmem-cuda进程进行删除,创建同理。 + heapmem管理的tensor,通常是持久的权重,可能被很多个不同进程访问,。 + 相对应的,随着函数执行完毕自动回收的中间变量tensor,可以被称之为stacktensor,这些tensor交给op进程自行管理。 + * op-cuda:实现了nv平台的常用基础算子。[cuda](docs/executor/op-mem-cuda/list.md) +## 三.deepx T程序与cpu程序 -### 执行层 +为了把gpu上的tensor计算过程,区别于传统cpu上的程序,我们为tensor的分布式计算,定义一个新概念————T程序 -执行层包括op和mem两种执行器,但实际实现时,当前只设计了一个程序同时负责op和mem的管理。 +概念比较 -负责低级的算子计算操作,以IR为执行的核心单元 -``` -Op{args(args_grad),returns(returns_grad)|func run} -``` +|cpu程序| T程序 | +|---|---| +|数据区、代码区|kv存储管理| +|上层程序设:func和struct| deepxIR和tensor| +|cpu执行底层机器码/字节码| deepx执行器执行deepxIR| +|存储-堆 |kv存储tensor元信息,heapmem管理gpu、内存上的tensordata| +|线程栈|计算进程自行管理| -Op需要实现run方法 -关于executor,只要能按deepxIR序列执行,并返回结果,就可以接入deepx分布式调度框架,因此,从硬件、指令、加速库、高级框架包括训练、推理引擎,都可以稍作修改,就接入deepx体系。 - -当前的 - - -#### 默认执行器 -+ cpu执行器,已实现ompsimd。其支持的算子列表[ompsimd](docs/executor/op-mem-ompsimd/list.md) - -#### GPU执行器 -+ cuda执行器,其支持的算子列表[cuda](docs/executor/op-mem-cuda/list.md) - -欢迎大家提交cuda代码 - -+ rocm -+ apple -+ 其他硬件加速器 - -#### 张量计算框架or函数级执行器 - -DeepX可以集成现有的张量计算框架作为执行器,充分利用现有生态系统的优化能力: - -+ jax: - - 结合DeepX的分布式调度,使JAX代码自动获得分布式执行能力 - - 支持异构设备(GPU/TPU)加速的同时保持DeepX的分布式弹性扩展 - -+ LibTorch/aten: - - 可将PyTorch生态系统的算子作为DeepX执行器 - - 利用ATEN底层优化的同时享受DeepX分布式调度的优势 - -这种架构使得DeepX可以整合各类先进的计算框架作为执行引擎,同时提供统一的分布式调度和执行能力,为用户提供更灵活的选择和更高的性能。 - - ### 官方文档 [https://deepx.array2d.com](https://deepx.array2d.com) diff --git a/executor/cpp-common/test/CMakeLists.txt b/executor/cpp-common/test/CMakeLists.txt deleted file mode 100644 index 127aea0a..00000000 --- a/executor/cpp-common/test/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ - -add_executable(test_dtypes 0_dtypes.cpp) -target_link_libraries(test_dtypes deepx_common) - -add_executable(test_tf 1_tf.cpp) -target_link_libraries(test_tf deepx_common) - -add_executable(test_tfcheck 1_tfcheck.cpp) -target_link_libraries(test_tfcheck deepx_common) - -add_executable(test_saveload 2_saveload.cpp) -target_link_libraries(test_saveload deepx_common) \ No newline at end of file diff --git a/executor/cpp-common/CMakeLists.txt b/executor/deepxcore/CMakeLists.txt similarity index 68% rename from executor/cpp-common/CMakeLists.txt rename to executor/deepxcore/CMakeLists.txt index cd28bf54..df82063b 100644 --- a/executor/cpp-common/CMakeLists.txt +++ b/executor/deepxcore/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.15...3.29) -project(deepx-common LANGUAGES CXX ) +project(deepxcore LANGUAGES CXX ) # 设置 C++ 标准 set(CMAKE_CXX_STANDARD 17) @@ -14,10 +14,10 @@ include_directories(src) # 源文件 -file(GLOB_RECURSE DEEPX_COMMON_SOURCES "src/*.cpp") +file(GLOB_RECURSE DEEPX_CORE_SOURCES "src/*.cpp") -add_library(deepx_common SHARED - ${DEEPX_COMMON_SOURCES} +add_library(deepxcore SHARED + ${DEEPX_CORE_SOURCES} ) @@ -30,9 +30,9 @@ else() set(YAMLCPP_LIB yaml-cpp) endif() -target_link_libraries(deepx_common PUBLIC ${YAMLCPP_LIB}) +target_link_libraries(deepxcore PUBLIC ${YAMLCPP_LIB}) -target_include_directories(deepx_common PUBLIC +target_include_directories(deepxcore PUBLIC $ $ ) diff --git a/executor/cpp-common/src/client/udpserver.cpp b/executor/deepxcore/src/client/udpserver.cpp similarity index 100% rename from executor/cpp-common/src/client/udpserver.cpp rename to executor/deepxcore/src/client/udpserver.cpp diff --git a/executor/cpp-common/src/client/udpserver.hpp b/executor/deepxcore/src/client/udpserver.hpp similarity index 100% rename from executor/cpp-common/src/client/udpserver.hpp rename to executor/deepxcore/src/client/udpserver.hpp diff --git a/executor/cpp-common/src/client/unixsocketserver.cpp b/executor/deepxcore/src/client/unixsocketserver.cpp similarity index 100% rename from executor/cpp-common/src/client/unixsocketserver.cpp rename to executor/deepxcore/src/client/unixsocketserver.cpp diff --git a/executor/cpp-common/src/client/unixsocketserver.hpp b/executor/deepxcore/src/client/unixsocketserver.hpp similarity index 100% rename from executor/cpp-common/src/client/unixsocketserver.hpp rename to executor/deepxcore/src/client/unixsocketserver.hpp diff --git a/executor/cpp-common/src/client/worker.hpp b/executor/deepxcore/src/client/worker.hpp similarity index 100% rename from executor/cpp-common/src/client/worker.hpp rename to executor/deepxcore/src/client/worker.hpp diff --git a/executor/cpp-common/src/deepx/dtype.hpp b/executor/deepxcore/src/deepx/dtype.hpp similarity index 100% rename from executor/cpp-common/src/deepx/dtype.hpp rename to executor/deepxcore/src/deepx/dtype.hpp diff --git a/executor/cpp-common/src/deepx/mem/mem.hpp b/executor/deepxcore/src/deepx/mem/mem.hpp similarity index 100% rename from executor/cpp-common/src/deepx/mem/mem.hpp rename to executor/deepxcore/src/deepx/mem/mem.hpp diff --git a/executor/cpp-common/src/deepx/shape.cpp b/executor/deepxcore/src/deepx/shape.cpp similarity index 100% rename from executor/cpp-common/src/deepx/shape.cpp rename to executor/deepxcore/src/deepx/shape.cpp diff --git a/executor/cpp-common/src/deepx/shape.hpp b/executor/deepxcore/src/deepx/shape.hpp similarity index 100% rename from executor/cpp-common/src/deepx/shape.hpp rename to executor/deepxcore/src/deepx/shape.hpp diff --git a/executor/cpp-common/src/deepx/shape_changeshape.cpp b/executor/deepxcore/src/deepx/shape_changeshape.cpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_changeshape.cpp rename to executor/deepxcore/src/deepx/shape_changeshape.cpp diff --git a/executor/cpp-common/src/deepx/shape_changeshape.hpp b/executor/deepxcore/src/deepx/shape_changeshape.hpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_changeshape.hpp rename to executor/deepxcore/src/deepx/shape_changeshape.hpp diff --git a/executor/cpp-common/src/deepx/shape_matmul.cpp b/executor/deepxcore/src/deepx/shape_matmul.cpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_matmul.cpp rename to executor/deepxcore/src/deepx/shape_matmul.cpp diff --git a/executor/cpp-common/src/deepx/shape_matmul.hpp b/executor/deepxcore/src/deepx/shape_matmul.hpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_matmul.hpp rename to executor/deepxcore/src/deepx/shape_matmul.hpp diff --git a/executor/cpp-common/src/deepx/shape_range.cpp b/executor/deepxcore/src/deepx/shape_range.cpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_range.cpp rename to executor/deepxcore/src/deepx/shape_range.cpp diff --git a/executor/cpp-common/src/deepx/shape_reduce.cpp b/executor/deepxcore/src/deepx/shape_reduce.cpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_reduce.cpp rename to executor/deepxcore/src/deepx/shape_reduce.cpp diff --git a/executor/cpp-common/src/deepx/shape_reduce.hpp b/executor/deepxcore/src/deepx/shape_reduce.hpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_reduce.hpp rename to executor/deepxcore/src/deepx/shape_reduce.hpp diff --git a/executor/cpp-common/src/deepx/shape_tensorinit.cpp b/executor/deepxcore/src/deepx/shape_tensorinit.cpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_tensorinit.cpp rename to executor/deepxcore/src/deepx/shape_tensorinit.cpp diff --git a/executor/cpp-common/src/deepx/shape_tensorinit.hpp b/executor/deepxcore/src/deepx/shape_tensorinit.hpp similarity index 100% rename from executor/cpp-common/src/deepx/shape_tensorinit.hpp rename to executor/deepxcore/src/deepx/shape_tensorinit.hpp diff --git a/executor/cpp-common/src/deepx/tensor.hpp b/executor/deepxcore/src/deepx/tensor.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensor.hpp rename to executor/deepxcore/src/deepx/tensor.hpp diff --git a/executor/cpp-common/src/deepx/tensorbase.hpp b/executor/deepxcore/src/deepx/tensorbase.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorbase.hpp rename to executor/deepxcore/src/deepx/tensorbase.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/authors.hpp b/executor/deepxcore/src/deepx/tensorfunc/authors.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/authors.hpp rename to executor/deepxcore/src/deepx/tensorfunc/authors.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/changeshape.hpp b/executor/deepxcore/src/deepx/tensorfunc/changeshape.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/changeshape.hpp rename to executor/deepxcore/src/deepx/tensorfunc/changeshape.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/elementwise.hpp b/executor/deepxcore/src/deepx/tensorfunc/elementwise.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/elementwise.hpp rename to executor/deepxcore/src/deepx/tensorfunc/elementwise.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/init.hpp b/executor/deepxcore/src/deepx/tensorfunc/init.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/init.hpp rename to executor/deepxcore/src/deepx/tensorfunc/init.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/io.hpp b/executor/deepxcore/src/deepx/tensorfunc/io.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/io.hpp rename to executor/deepxcore/src/deepx/tensorfunc/io.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/matmul.hpp b/executor/deepxcore/src/deepx/tensorfunc/matmul.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/matmul.hpp rename to executor/deepxcore/src/deepx/tensorfunc/matmul.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/reduce.hpp b/executor/deepxcore/src/deepx/tensorfunc/reduce.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/reduce.hpp rename to executor/deepxcore/src/deepx/tensorfunc/reduce.hpp diff --git a/executor/cpp-common/src/deepx/tensorfunc/tensorlife.hpp b/executor/deepxcore/src/deepx/tensorfunc/tensorlife.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tensorfunc/tensorlife.hpp rename to executor/deepxcore/src/deepx/tensorfunc/tensorlife.hpp diff --git a/executor/cpp-common/src/deepx/tf/tf.cpp b/executor/deepxcore/src/deepx/tf/tf.cpp similarity index 100% rename from executor/cpp-common/src/deepx/tf/tf.cpp rename to executor/deepxcore/src/deepx/tf/tf.cpp diff --git a/executor/cpp-common/src/deepx/tf/tf.hpp b/executor/deepxcore/src/deepx/tf/tf.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tf/tf.hpp rename to executor/deepxcore/src/deepx/tf/tf.hpp diff --git a/executor/cpp-common/src/deepx/tf/tffactory.cpp b/executor/deepxcore/src/deepx/tf/tffactory.cpp similarity index 100% rename from executor/cpp-common/src/deepx/tf/tffactory.cpp rename to executor/deepxcore/src/deepx/tf/tffactory.cpp diff --git a/executor/cpp-common/src/deepx/tf/tffactory.hpp b/executor/deepxcore/src/deepx/tf/tffactory.hpp similarity index 100% rename from executor/cpp-common/src/deepx/tf/tffactory.hpp rename to executor/deepxcore/src/deepx/tf/tffactory.hpp diff --git a/executor/cpp-common/src/deepx/vector_combination.cpp b/executor/deepxcore/src/deepx/vector_combination.cpp similarity index 100% rename from executor/cpp-common/src/deepx/vector_combination.cpp rename to executor/deepxcore/src/deepx/vector_combination.cpp diff --git a/executor/cpp-common/src/deepx/vector_combination.hpp b/executor/deepxcore/src/deepx/vector_combination.hpp similarity index 100% rename from executor/cpp-common/src/deepx/vector_combination.hpp rename to executor/deepxcore/src/deepx/vector_combination.hpp diff --git a/executor/cpp-common/src/stdutil/error.hpp b/executor/deepxcore/src/stdutil/error.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/error.hpp rename to executor/deepxcore/src/stdutil/error.hpp diff --git a/executor/cpp-common/src/stdutil/fs.cpp b/executor/deepxcore/src/stdutil/fs.cpp similarity index 100% rename from executor/cpp-common/src/stdutil/fs.cpp rename to executor/deepxcore/src/stdutil/fs.cpp diff --git a/executor/cpp-common/src/stdutil/fs.hpp b/executor/deepxcore/src/stdutil/fs.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/fs.hpp rename to executor/deepxcore/src/stdutil/fs.hpp diff --git a/executor/cpp-common/src/stdutil/num.cpp b/executor/deepxcore/src/stdutil/num.cpp similarity index 100% rename from executor/cpp-common/src/stdutil/num.cpp rename to executor/deepxcore/src/stdutil/num.cpp diff --git a/executor/cpp-common/src/stdutil/num.hpp b/executor/deepxcore/src/stdutil/num.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/num.hpp rename to executor/deepxcore/src/stdutil/num.hpp diff --git a/executor/cpp-common/src/stdutil/print.hpp b/executor/deepxcore/src/stdutil/print.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/print.hpp rename to executor/deepxcore/src/stdutil/print.hpp diff --git a/executor/cpp-common/src/stdutil/string.cpp b/executor/deepxcore/src/stdutil/string.cpp similarity index 100% rename from executor/cpp-common/src/stdutil/string.cpp rename to executor/deepxcore/src/stdutil/string.cpp diff --git a/executor/cpp-common/src/stdutil/string.hpp b/executor/deepxcore/src/stdutil/string.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/string.hpp rename to executor/deepxcore/src/stdutil/string.hpp diff --git a/executor/cpp-common/src/stdutil/time.hpp b/executor/deepxcore/src/stdutil/time.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/time.hpp rename to executor/deepxcore/src/stdutil/time.hpp diff --git a/executor/cpp-common/src/stdutil/vector.hpp b/executor/deepxcore/src/stdutil/vector.hpp similarity index 100% rename from executor/cpp-common/src/stdutil/vector.hpp rename to executor/deepxcore/src/stdutil/vector.hpp diff --git a/executor/cpp-common/test/0_dtypes.cpp b/executor/deepxcore/test/0_dtypes.cpp similarity index 100% rename from executor/cpp-common/test/0_dtypes.cpp rename to executor/deepxcore/test/0_dtypes.cpp diff --git a/executor/cpp-common/test/1_tf.cpp b/executor/deepxcore/test/1_tf.cpp similarity index 100% rename from executor/cpp-common/test/1_tf.cpp rename to executor/deepxcore/test/1_tf.cpp diff --git a/executor/cpp-common/test/1_tfcheck.cpp b/executor/deepxcore/test/1_tfcheck.cpp similarity index 100% rename from executor/cpp-common/test/1_tfcheck.cpp rename to executor/deepxcore/test/1_tfcheck.cpp diff --git a/executor/cpp-common/test/2_saveload.cpp b/executor/deepxcore/test/2_saveload.cpp similarity index 100% rename from executor/cpp-common/test/2_saveload.cpp rename to executor/deepxcore/test/2_saveload.cpp diff --git a/executor/deepxcore/test/CMakeLists.txt b/executor/deepxcore/test/CMakeLists.txt new file mode 100644 index 00000000..28cb4906 --- /dev/null +++ b/executor/deepxcore/test/CMakeLists.txt @@ -0,0 +1,12 @@ + +add_executable(test_dtypes 0_dtypes.cpp) +target_link_libraries(test_dtypes deepxcore) + +add_executable(test_tf 1_tf.cpp) +target_link_libraries(test_tf deepxcore) + +add_executable(test_tfcheck 1_tfcheck.cpp) +target_link_libraries(test_tfcheck deepxcore) + +add_executable(test_saveload 2_saveload.cpp) +target_link_libraries(test_saveload deepxcore) \ No newline at end of file diff --git a/executor/mem-cuda/CMakeLists.txt b/executor/heapmem-cuda/CMakeLists.txt similarity index 100% rename from executor/mem-cuda/CMakeLists.txt rename to executor/heapmem-cuda/CMakeLists.txt diff --git a/executor/mem-cuda/README.md b/executor/heapmem-cuda/README.md similarity index 77% rename from executor/mem-cuda/README.md rename to executor/heapmem-cuda/README.md index e5a88711..3b45efc6 100644 --- a/executor/mem-cuda/README.md +++ b/executor/heapmem-cuda/README.md @@ -1,4 +1,4 @@ -# mem-cuda 方案草案 +# heapmem-cuda 方案草案 本目录用于设计/实现单机多进程的 GPU Tensor 统一存储面(CUDA IPC),并通过 Redis 做 name → IPC handle 的集中注册与控制。 @@ -20,7 +20,7 @@ - `bytes`: int - `ipc_handle`: binary - `refcount`: int -无需设计tensor owner + ### 2) Redis 指令队列(List) 控制通道 list key: `tensor_lifecycle`。 @@ -39,13 +39,19 @@ - 仅限同机;需保证 device id 一致 - 跨 stream 写读需要显式同步(事件/流同步策略) -## 显存池方案 +## 是否有必要使用显存池 + +显存池通常用于子分配(suballoc),子分配和cuda ipc存在冲突。 + +堆tensor不会高频alloc/free + +但是,计算进程的栈tensor必须使用,现在有以下2个选项 -- **RMM (RAPIDS Memory Manager)** + **RMM (RAPIDS Memory Manager)** - 优点:成熟、支持 pool/async allocator、统计完善 - 适合:对稳定性与可观察性要求高的生产环境 -其他op计算进程可以使用CUB。 +**CUB** ## 目录结构(具体方案) ``` @@ -53,6 +59,9 @@ mem-cuda/ README.md doc/ src/ + registry/ + init.h #进程初始化时,向redis注册当前节点、节点所有gpu、和gpu显存大小 + cudastream.h # cudastream流和redis的list(lifecycle指令)结合 ipc/ # CUDA IPC 封装 ipc.h ipc.cpp @@ -74,6 +83,25 @@ mem-cuda/ - `runtime/`: 指令消费/路由与跨 stream 同步策略。 - `common/`: 状态码、JSON 解析、日志等公共工具聚合。 + +### 架构图 +``` mermaid +graph LR + subgraph 单机 + RM["Redis (元数据 + 指令队列)"] + HMC["heapmem-cuda 进程"] + CP["计算进程 (多进程)"] + GPU["GPU"] + end + + HMC -->|注册/读写 Hash| RM + CP -->|读写/推送 指令 list| RM + HMC -->|cudaMalloc / cudaIpcGetMemHandle| GPU + CP -->|cudaIpcOpenMemHandle| GPU + HMC -->|管理 ipc_handle / GC| CP + HMC -->|流/同步策略| GPU +``` + ## 后续工作清单(分阶段) - [ ] 阶段 0:确定目录与接口(完成本 README 细化) - [ ] 阶段 1:实现 `lifecycle/` diff --git a/executor/mem-cuda/src/CMakeLists.txt b/executor/heapmem-cuda/src/CMakeLists.txt similarity index 100% rename from executor/mem-cuda/src/CMakeLists.txt rename to executor/heapmem-cuda/src/CMakeLists.txt diff --git a/executor/mem-cuda/src/registry/CMakeLists.txt b/executor/heapmem-cuda/src/registry/CMakeLists.txt similarity index 100% rename from executor/mem-cuda/src/registry/CMakeLists.txt rename to executor/heapmem-cuda/src/registry/CMakeLists.txt diff --git a/executor/mem-cuda/src/registry/registry.cpp b/executor/heapmem-cuda/src/registry/registry.cpp similarity index 100% rename from executor/mem-cuda/src/registry/registry.cpp rename to executor/heapmem-cuda/src/registry/registry.cpp diff --git a/executor/mem-cuda/src/registry/registry.h b/executor/heapmem-cuda/src/registry/registry.h similarity index 100% rename from executor/mem-cuda/src/registry/registry.h rename to executor/heapmem-cuda/src/registry/registry.h diff --git a/executor/mem-cuda/src/runtime/lifecycle.cpp b/executor/heapmem-cuda/src/runtime/lifecycle.cpp similarity index 100% rename from executor/mem-cuda/src/runtime/lifecycle.cpp rename to executor/heapmem-cuda/src/runtime/lifecycle.cpp diff --git a/executor/mem-cuda/src/runtime/lifecycle.h b/executor/heapmem-cuda/src/runtime/lifecycle.h similarity index 100% rename from executor/mem-cuda/src/runtime/lifecycle.h rename to executor/heapmem-cuda/src/runtime/lifecycle.h diff --git a/executor/mem-cuda/src/runtime/sync.cpp b/executor/heapmem-cuda/src/runtime/sync.cpp similarity index 100% rename from executor/mem-cuda/src/runtime/sync.cpp rename to executor/heapmem-cuda/src/runtime/sync.cpp diff --git a/executor/mem-cuda/src/runtime/sync.h b/executor/heapmem-cuda/src/runtime/sync.h similarity index 100% rename from executor/mem-cuda/src/runtime/sync.h rename to executor/heapmem-cuda/src/runtime/sync.h diff --git a/executor/mem-cuda/test/CMakeLists.txt b/executor/heapmem-cuda/test/CMakeLists.txt similarity index 100% rename from executor/mem-cuda/test/CMakeLists.txt rename to executor/heapmem-cuda/test/CMakeLists.txt diff --git a/executor/op-cuda/CMakeLists.txt b/executor/op-cuda/CMakeLists.txt index f373c08e..f363c005 100644 --- a/executor/op-cuda/CMakeLists.txt +++ b/executor/op-cuda/CMakeLists.txt @@ -26,7 +26,7 @@ include_directories(src) # include_directories(${cutlass_SOURCE_DIR}/tools/util/include) -add_subdirectory(../cpp-common common) # 包含公共模块 +add_subdirectory(../deepxcore deepxcore) # 包含公共模块 # 源文件 file(GLOB_RECURSE DEEPX_SOURCES "src/deepx/*.cpp" "src/deepx/*.cu" "src/deepx/*.cuh") @@ -53,7 +53,7 @@ add_library(deepx SHARED target_link_libraries( deepx PUBLIC - deepx_common + deepxcore yaml-cpp CUDA::cublas ) diff --git a/executor/op-mem-ompsimd/.cursorignore b/executor/op-ompsimd/.cursorignore similarity index 100% rename from executor/op-mem-ompsimd/.cursorignore rename to executor/op-ompsimd/.cursorignore diff --git a/executor/op-mem-ompsimd/.cursorrules b/executor/op-ompsimd/.cursorrules similarity index 100% rename from executor/op-mem-ompsimd/.cursorrules rename to executor/op-ompsimd/.cursorrules diff --git a/executor/op-mem-ompsimd/.gitignore b/executor/op-ompsimd/.gitignore similarity index 100% rename from executor/op-mem-ompsimd/.gitignore rename to executor/op-ompsimd/.gitignore diff --git a/executor/op-mem-ompsimd/CMakeLists.txt b/executor/op-ompsimd/CMakeLists.txt similarity index 96% rename from executor/op-mem-ompsimd/CMakeLists.txt rename to executor/op-ompsimd/CMakeLists.txt index abc1a868..c2c80712 100644 --- a/executor/op-mem-ompsimd/CMakeLists.txt +++ b/executor/op-ompsimd/CMakeLists.txt @@ -12,7 +12,7 @@ set(CMAKE_BUILD_TYPE Debug) # 包含头文件目录 include_directories(src) -add_subdirectory(../cpp-common common) +add_subdirectory(../deepxcore deepxcore) # 源文件 @@ -53,7 +53,7 @@ add_library(deepx_ompsimd SHARED target_link_libraries( deepx_ompsimd PUBLIC - deepx_common + deepxcore yaml-cpp ${JEMALLOC_LIBRARIES} openblas diff --git a/executor/op-mem-ompsimd/Dockerfile b/executor/op-ompsimd/Dockerfile similarity index 100% rename from executor/op-mem-ompsimd/Dockerfile rename to executor/op-ompsimd/Dockerfile diff --git a/executor/op-mem-ompsimd/dockerbuild.sh b/executor/op-ompsimd/dockerbuild.sh similarity index 100% rename from executor/op-mem-ompsimd/dockerbuild.sh rename to executor/op-ompsimd/dockerbuild.sh diff --git a/executor/op-mem-ompsimd/log.md b/executor/op-ompsimd/log.md similarity index 100% rename from executor/op-mem-ompsimd/log.md rename to executor/op-ompsimd/log.md diff --git a/executor/op-mem-ompsimd/src/client/main.cpp b/executor/op-ompsimd/src/client/main.cpp similarity index 100% rename from executor/op-mem-ompsimd/src/client/main.cpp rename to executor/op-ompsimd/src/client/main.cpp diff --git a/executor/op-mem-ompsimd/src/client/tfs.cpp b/executor/op-ompsimd/src/client/tfs.cpp similarity index 100% rename from executor/op-mem-ompsimd/src/client/tfs.cpp rename to executor/op-ompsimd/src/client/tfs.cpp diff --git a/executor/op-mem-ompsimd/src/client/tfs.hpp b/executor/op-ompsimd/src/client/tfs.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/client/tfs.hpp rename to executor/op-ompsimd/src/client/tfs.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/dtype_ompsimd.hpp b/executor/op-ompsimd/src/deepx/dtype_ompsimd.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/dtype_ompsimd.hpp rename to executor/op-ompsimd/src/deepx/dtype_ompsimd.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/mem/mem_ompsimd.hpp b/executor/op-ompsimd/src/deepx/mem/mem_ompsimd.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/mem/mem_ompsimd.hpp rename to executor/op-ompsimd/src/deepx/mem/mem_ompsimd.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_cblas.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/elementwise_cblas.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_cblas.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/elementwise_cblas.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/equal.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/equal.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/equal.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/equal.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/highway.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/highway.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/highway.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/highway.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/new_mempool.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/new_mempool.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/new_mempool.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/new_mempool.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp b/executor/op-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp rename to executor/op-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/arg.hpp b/executor/op-ompsimd/src/deepx/tf/arg.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/arg.hpp rename to executor/op-ompsimd/src/deepx/tf/arg.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/changeshape.hpp b/executor/op-ompsimd/src/deepx/tf/changeshape.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/changeshape.hpp rename to executor/op-ompsimd/src/deepx/tf/changeshape.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/elementwise.hpp b/executor/op-ompsimd/src/deepx/tf/elementwise.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/elementwise.hpp rename to executor/op-ompsimd/src/deepx/tf/elementwise.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/init.hpp b/executor/op-ompsimd/src/deepx/tf/init.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/init.hpp rename to executor/op-ompsimd/src/deepx/tf/init.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/io.hpp b/executor/op-ompsimd/src/deepx/tf/io.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/io.hpp rename to executor/op-ompsimd/src/deepx/tf/io.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/matmul.hpp b/executor/op-ompsimd/src/deepx/tf/matmul.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/matmul.hpp rename to executor/op-ompsimd/src/deepx/tf/matmul.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/reduce.hpp b/executor/op-ompsimd/src/deepx/tf/reduce.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/reduce.hpp rename to executor/op-ompsimd/src/deepx/tf/reduce.hpp diff --git a/executor/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp b/executor/op-ompsimd/src/deepx/tf/tensorlife.hpp similarity index 100% rename from executor/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp rename to executor/op-ompsimd/src/deepx/tf/tensorlife.hpp diff --git a/executor/op-mem-ompsimd/test/op/1_mem.cpp b/executor/op-ompsimd/test/op/1_mem.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/op/1_mem.cpp rename to executor/op-ompsimd/test/op/1_mem.cpp diff --git a/executor/op-mem-ompsimd/test/op/CMakeLists.txt b/executor/op-ompsimd/test/op/CMakeLists.txt similarity index 100% rename from executor/op-mem-ompsimd/test/op/CMakeLists.txt rename to executor/op-ompsimd/test/op/CMakeLists.txt diff --git a/executor/op-mem-ompsimd/test/tensorfunc/1_shape.cpp b/executor/op-ompsimd/test/tensorfunc/1_shape.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/1_shape.cpp rename to executor/op-ompsimd/test/tensorfunc/1_shape.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/2_shape_combintion.cpp b/executor/op-ompsimd/test/tensorfunc/2_shape_combintion.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/2_shape_combintion.cpp rename to executor/op-ompsimd/test/tensorfunc/2_shape_combintion.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/2_tensor_equal.cpp b/executor/op-ompsimd/test/tensorfunc/2_tensor_equal.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/2_tensor_equal.cpp rename to executor/op-ompsimd/test/tensorfunc/2_tensor_equal.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp b/executor/op-ompsimd/test/tensorfunc/2_tensor_new.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/2_tensor_new.cpp rename to executor/op-ompsimd/test/tensorfunc/2_tensor_new.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp b/executor/op-ompsimd/test/tensorfunc/2_tensor_range.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/2_tensor_range.cpp rename to executor/op-ompsimd/test/tensorfunc/2_tensor_range.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/2_tensor_range.py b/executor/op-ompsimd/test/tensorfunc/2_tensor_range.py similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/2_tensor_range.py rename to executor/op-ompsimd/test/tensorfunc/2_tensor_range.py diff --git a/executor/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp b/executor/op-ompsimd/test/tensorfunc/3_tensor_print.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/3_tensor_print.cpp rename to executor/op-ompsimd/test/tensorfunc/3_tensor_print.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp b/executor/op-ompsimd/test/tensorfunc/4_tensor_add.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/4_tensor_add.cpp rename to executor/op-ompsimd/test/tensorfunc/4_tensor_add.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp b/executor/op-ompsimd/test/tensorfunc/4_tensor_matmul.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/4_tensor_matmul.cpp rename to executor/op-ompsimd/test/tensorfunc/4_tensor_matmul.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp b/executor/op-ompsimd/test/tensorfunc/4_tensor_max.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/4_tensor_max.cpp rename to executor/op-ompsimd/test/tensorfunc/4_tensor_max.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp b/executor/op-ompsimd/test/tensorfunc/4_tensor_mul.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/4_tensor_mul.cpp rename to executor/op-ompsimd/test/tensorfunc/4_tensor_mul.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp b/executor/op-ompsimd/test/tensorfunc/4_tensor_sub.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/4_tensor_sub.cpp rename to executor/op-ompsimd/test/tensorfunc/4_tensor_sub.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp b/executor/op-ompsimd/test/tensorfunc/5_tensor_sum.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp rename to executor/op-ompsimd/test/tensorfunc/5_tensor_sum.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp b/executor/op-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp rename to executor/op-ompsimd/test/tensorfunc/6_tensor_broadcast.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp b/executor/op-ompsimd/test/tensorfunc/7_tensor_transpose.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp rename to executor/op-ompsimd/test/tensorfunc/7_tensor_transpose.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp b/executor/op-ompsimd/test/tensorfunc/8_tensor_concat.cpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp rename to executor/op-ompsimd/test/tensorfunc/8_tensor_concat.cpp diff --git a/executor/op-mem-ompsimd/test/tensorfunc/CMakeLists.txt b/executor/op-ompsimd/test/tensorfunc/CMakeLists.txt similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/CMakeLists.txt rename to executor/op-ompsimd/test/tensorfunc/CMakeLists.txt diff --git a/executor/op-mem-ompsimd/test/tensorfunc/tensorutil.hpp b/executor/op-ompsimd/test/tensorfunc/tensorutil.hpp similarity index 100% rename from executor/op-mem-ompsimd/test/tensorfunc/tensorutil.hpp rename to executor/op-ompsimd/test/tensorfunc/tensorutil.hpp