From 3bb450145eb3fadba025b229339d8195aa120914 Mon Sep 17 00:00:00 2001
From: lipeng <734991033@qq.com>
Date: Fri, 28 Mar 2025 18:25:41 +0800
Subject: [PATCH] =?UTF-8?q?front&excuter:=E8=81=94=E5=90=88=E8=B0=83?=
 =?UTF-8?q?=E8=AF=95mul,mulscalar,div,divscalar,rdivscalar?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 doc/excuter/op-mem-cuda/list.md               |  29 +-
 doc/excuter/op-mem-ompsimd/list.md            |  29 +-
 .../src/deepx/tensorfunc/elementwise.hpp      |   6 +-
 excuter/op-mem-cuda/src/client/tfs.cpp        |  64 +++-
 .../tensorfunc/elementwise_miaobyte_basic.cu  | 148 ++++++-
 .../tensorfunc/elementwise_miaobyte_basic.cuh | 159 +++++++-
 .../tensorfunc/elementwise_miaobyte_basic.hpp |  71 ++++
 .../src/deepx/tf/elementwise_basic.hpp        | 362 +++++++++++++++++-
 excuter/op-mem-ompsimd/src/client/tfs.cpp     |  65 +++-
 .../src/deepx/tf/elementwise.hpp              | 169 ++++++++
 .../examples/2_ir/2_elementwise_operator.dot  |  64 ++++
 .../2_ir/2_elementwise_operator.dot.svg       | 302 +++++++++++++++
 .../examples/2_ir/2_elementwise_operator.py   |  14 +-
 13 files changed, 1414 insertions(+), 68 deletions(-)
 create mode 100644 front/py/examples/2_ir/2_elementwise_operator.dot
 create mode 100644 front/py/examples/2_ir/2_elementwise_operator.dot.svg
diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md
index 1d63988d..27bdd297 100644
--- a/doc/excuter/op-mem-cuda/list.md
+++ b/doc/excuter/op-mem-cuda/list.md
@@ -4,18 +4,23 @@
 
 | Operation | Author | Func Def | Math Formula | IR Instruction |
 |-----------|--------|------------|--------------|----------------|
-| addscalar | miaobyte | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1+scalar | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
-| add | cublas | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
-| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
-| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
-| subscalar | miaobyte | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1-scalar | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
-| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
-| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1) | constant(tensor<any> t, var<any> value)->() |
-| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
-| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
-| newtensor |  none  | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
-| newtensor |  none  | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(var<string> shape)->(tensor<any> tensor1) |
-| vecset |  none  | vecset(vector<any> value)->(vector<any> name) | shape = [3  4  5] | vecset(vector<any> value)->(vector<any> name) |
 | matmul | cublas | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
+| div | miaobyte | div(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 | sub | miaobyte | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1-T2 | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 | argset |  none  | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
+| mulscalar | miaobyte | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
+| vecset |  none  | vecset(vector<any> value)->(vector<any> name) | shape = [3  4  5] | vecset(vector<any> value)->(vector<any> name) |
+| newtensor |  none  | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
+| newtensor |  none  | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(var<string> shape)->(tensor<any> tensor1) |
+| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
+| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
+| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=scalar/T1 | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
+| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1) | constant(tensor<any> t, var<any> value)->() |
+| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
+| subscalar | miaobyte | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1-scalar | subscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
+| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
+| add | cublas | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
+| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
+| addscalar | miaobyte | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1+scalar | addscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
+| mul | miaobyte | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1*T2 | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md
index 0b7d18d3..6e878c3a 100644
--- a/doc/excuter/op-mem-ompsimd/list.md
+++ b/doc/excuter/op-mem-ompsimd/list.md
@@ -5,19 +5,24 @@
 | Operation | Author | Func Def | Math Formula | IR Instruction |
 |-----------|--------|------------|--------------|----------------|
 | concat |  none  | concat()->() | Tresult = concat([T1, T2...], axis=3) | concat()->() |
-| addscalar | miaobyte | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
-| add | cblas | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) | T3=T1+T2 | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) |
-| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
-| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
-| subscalar | miaobyte | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1-scalar | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
-| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
-| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1,value) | constant(tensor<any> t, var<any> value)->() |
-| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
-| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
-| newtensor |  none  | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
-| newtensor |  none  | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
-| vecset |  none  | vecset(vector<any> value)->(vector<any> name) | shape = [3  4  5] | vecset(vector<any> value)->(vector<any> name) |
 | matmul | cblas | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) | T3=T1 @ T2 | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
 | matmul | miaobyte | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
+| div | miaobyte | div(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
 | sub | miaobyte | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1-T2 | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) |
 | argset |  none  | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
+| mulscalar | miaobyte | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
+| vecset |  none  | vecset(vector<any> value)->(vector<any> name) | shape = [3  4  5] | vecset(vector<any> value)->(vector<any> name) |
+| newtensor |  none  | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
+| newtensor |  none  | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
+| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
+| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
+| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
+| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1,value) | constant(tensor<any> t, var<any> value)->() |
+| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
+| subscalar | miaobyte | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1-scalar | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
+| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
+| add | cblas | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) | T3=T1+T2 | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) |
+| add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
+| addscalar | miaobyte | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
+| mul | miaobyte | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1*T2 | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
diff --git a/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp b/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp
index e05506f7..4ee525c3 100644
--- a/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp
+++ b/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp
@@ -150,13 +150,13 @@ namespace deepx::tensorfunc
     template <typename Author, typename T>
     struct rdivscalarDispatcher
     {
-        static void rdivscalar(const Tensor<T> &input, const T value, Tensor<T> &output) = delete;
+        static void rdivscalar(const T value, const Tensor<T> &input, Tensor<T> &output) = delete;
     };
 
     template <typename Author, typename T>
-    void rdivscalar(const Tensor<T> &input, const T value, Tensor<T> &output)
+    void rdivscalar(const T value, const Tensor<T> &input, Tensor<T> &output)
     {
-        rdivscalarDispatcher<Author, T>::rdivscalar(input, value, output);
+        rdivscalarDispatcher<Author, T>::rdivscalar(value, input, output);
     }
 
     template <typename Author, typename T>
diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp
index fa83af24..27361136 100644
--- a/excuter/op-mem-cuda/src/client/tfs.cpp
+++ b/excuter/op-mem-cuda/src/client/tfs.cpp
@@ -143,24 +143,52 @@ namespace deepx::tf
                                                                    {
                                                                        Param("C", DataCategory::Tensor, Precision::Any),
                                                                    })));
-
-        //     opfactory.add_op(Sub_cblas<float>());
-        //     opfactory.add_op(Sub_cblas<double>());
-
-        //     opfactory.add_op(Mul_miaobyte<float>());
-        //     opfactory.add_op(Mul_miaobyte<double>());
-
-        //     opfactory.add_op(Mulscalar_miaobyte<float>());
-        //     opfactory.add_op(Mulscalar_miaobyte<double>());
-
-        //     opfactory.add_op(Div_miaobyte<float>());
-        //     opfactory.add_op(Div_miaobyte<double>());
-
-        //     opfactory.add_op(Divscalar_miaobyte<float>());
-        //     opfactory.add_op(Divscalar_miaobyte<double>());
-
-        //     opfactory.add_op(RDivscalar_miaobyte<float>());
-        //     opfactory.add_op(RDivscalar_miaobyte<double>());
+        tffactory.add_tf(std::make_shared<Mul<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                                 Param("B", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                         vector<Param>( 
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                             })));
+        tffactory.add_tf(std::make_shared<MulScalar<miaobyte>>(vector<Param>(
+                                                                   {
+                                                                       Param("A", DataCategory::Tensor, Precision::Any),    
+                                                                       Param("b", DataCategory::Var, Precision::Any),
+                                                                   }),
+                                                               vector<Param>(
+                                                                   {
+                                                                       Param("C", DataCategory::Tensor, Precision::Any),
+                                                                   })));    
+        tffactory.add_tf(std::make_shared<Div<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                                 Param("B", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                         vector<Param>( 
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                             })));
+        tffactory.add_tf(std::make_shared<DivScalar<miaobyte>>(vector<Param>(
+                                                                   {
+                                                                       Param("A", DataCategory::Tensor, Precision::Any),    
+                                                                       Param("scalar", DataCategory::Var, Precision::Any),
+                                                                   }),
+                                                               vector<Param>(
+                                                                   {
+                                                                       Param("C", DataCategory::Tensor, Precision::Any),
+                                                                   })));
+        tffactory.add_tf(std::make_shared<RDivScalar<miaobyte>>(vector<Param>(
+                                                                   {
+                                                                       Param("scalar", DataCategory::Var, Precision::Any),
+                                                                       Param("A", DataCategory::Tensor, Precision::Any),
+                                                                   }),
+                                                               vector<Param>(   
+                                                                   {
+                                                                       Param("C", DataCategory::Tensor, Precision::Any),
+                                                                   })));    
+        
 
         //     opfactory.add_op(Sqrt_miaobyte<float>());
         //     opfactory.add_op(Sqrt_miaobyte<double>());
diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu
index f4836cd6..6d8e73ae 100644
--- a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu
+++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu
@@ -22,7 +22,6 @@ namespace deepx::tensorfunc
     template __global__ void add_kernel<int16_t>(const int16_t* A, const int16_t* B, int16_t* C,const int size);
     template __global__ void add_kernel<int8_t>(const int8_t* A, const int8_t* B, int8_t* C,const int size);
     
-
     template <typename T>
     void launch_add(int numBlocks, int blockSize,const T*  a, const  T* b,  T* c,const int size)
     {
@@ -133,6 +132,153 @@ namespace deepx::tensorfunc
     template void launch_subscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c, const int size);  
     template void launch_subscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c, const int size);  
     template void launch_subscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c, const int size);    
+    
+     template <typename T>
+    __global__ void mul_kernel(const T* A, const T* B, T* C,const int size){
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if (idx < size) {
+            C[idx] = A[idx] * B[idx];
+        }
+    }  
+    template __global__ void mul_kernel<double>(const double* A, const double* B, double* C,const int size);
+    template __global__ void mul_kernel<float>(const float* A, const float* B, float* C,const int size);
+    template __global__ void mul_kernel<half>(const half* A, const half* B, half* C,const int size);
+    template __global__ void mul_kernel<nv_bfloat16>(const nv_bfloat16* A, const nv_bfloat16* B, nv_bfloat16* C,const int size);
+    template __global__ void mul_kernel<int64_t>(const int64_t* A, const int64_t* B, int64_t* C,const int size);
+    template __global__ void mul_kernel<int32_t>(const int32_t* A, const int32_t* B, int32_t* C,const int size);    
+    template __global__ void mul_kernel<int16_t>(const int16_t* A, const int16_t* B, int16_t* C,const int size);
+    template __global__ void mul_kernel<int8_t>(const int8_t* A, const int8_t* B, int8_t* C,const int size);
+
+    template <typename T>
+    void launch_mul(const int numBlocks, const int blockSize, const T* a, const T* b, T* c, const int size) {
+        mul_kernel<<<numBlocks, blockSize>>>(a, b, c, size);
+    }   
+    template void launch_mul<double>(const int numBlocks, const int blockSize, const double* a, const double* b, double* c, const int size);    
+    template void launch_mul<float>(const int numBlocks, const int blockSize, const float* a, const float* b, float* c, const int size);
+    template void launch_mul<half>(const int numBlocks, const int blockSize, const half* a, const half* b, half* c, const int size);
+    template void launch_mul<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16* b, nv_bfloat16* c, const int size);
+    template void launch_mul<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t* b, int64_t* c, const int size);    
+    template void launch_mul<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t* b, int32_t* c, const int size);    
+    template void launch_mul<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t* b, int16_t* c, const int size);    
+    template void launch_mul<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t* b, int8_t* c, const int size);    
+    
+    template <typename T>
+    __global__ void mulscalar_kernel(const T* A, const T scalar, T* C,const int size){
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if (idx < size) {
+            C[idx] = A[idx] * scalar;
+        }
+    }   
+    template __global__ void mulscalar_kernel<double>(const double* A, const double scalar, double* C,const int size);
+    template __global__ void mulscalar_kernel<float>(const float* A, const float scalar, float* C,const int size);
+    template __global__ void mulscalar_kernel<half>(const half* A, const half scalar, half* C,const int size);
+    template __global__ void mulscalar_kernel<nv_bfloat16>(const nv_bfloat16* A, const nv_bfloat16 scalar, nv_bfloat16* C,const int size);  
+    template __global__ void mulscalar_kernel<int64_t>(const int64_t* A, const int64_t scalar, int64_t* C,const int size);  
+    template __global__ void mulscalar_kernel<int32_t>(const int32_t* A, const int32_t scalar, int32_t* C,const int size);  
+    template __global__ void mulscalar_kernel<int16_t>(const int16_t* A, const int16_t scalar, int16_t* C,const int size);  
+    template __global__ void mulscalar_kernel<int8_t>(const int8_t* A, const int8_t scalar, int8_t* C,const int size);  
+    
+    template <typename T>
+    void launch_mulscalar(const int numBlocks, const int blockSize, const T* a, const T scalar, T* c, const int size) {
+        mulscalar_kernel<<<numBlocks, blockSize>>>(a, scalar, c, size);
+    }
+    template void launch_mulscalar<double>(const int numBlocks, const int blockSize, const double* a, const double scalar, double* c, const int size);
+    template void launch_mulscalar<float>(const int numBlocks, const int blockSize, const float* a, const float scalar, float* c, const int size);
+    template void launch_mulscalar<half>(const int numBlocks, const int blockSize, const half* a, const half scalar, half* c, const int size);
+    template void launch_mulscalar<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16 scalar, nv_bfloat16* c, const int size);
+    template void launch_mulscalar<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t scalar, int64_t* c, const int size);
+    template void launch_mulscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c, const int size);  
+    template void launch_mulscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c, const int size);  
+    template void launch_mulscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c, const int size);  
+    
+    template <typename T>
+    __global__ void div_kernel(const T* A, const T* B, T* C,const int size){
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if (idx < size) {
+            C[idx] = A[idx] / B[idx];
+        }
+    }   
+    template __global__ void div_kernel<double>(const double* A, const double* B, double* C,const int size);
+    template __global__ void div_kernel<float>(const float* A, const float* B, float* C,const int size);
+    template __global__ void div_kernel<half>(const half* A, const half* B, half* C,const int size);
+    template __global__ void div_kernel<nv_bfloat16>(const nv_bfloat16* A, const nv_bfloat16* B, nv_bfloat16* C,const int size);    
+    template __global__ void div_kernel<int64_t>(const int64_t* A, const int64_t* B, int64_t* C,const int size);  
+    template __global__ void div_kernel<int32_t>(const int32_t* A, const int32_t* B, int32_t* C,const int size);  
+    template __global__ void div_kernel<int16_t>(const int16_t* A, const int16_t* B, int16_t* C,const int size);  
+    template __global__ void div_kernel<int8_t>(const int8_t* A, const int8_t* B, int8_t* C,const int size);  
+    
+    template <typename T>
+    void launch_div(const int numBlocks, const int blockSize, const T* a, const T* b, T* c, const int size) {
+        div_kernel<<<numBlocks, blockSize>>>(a, b, c, size);
+    }
+    template void launch_div<double>(const int numBlocks, const int blockSize, const double* a, const double* b, double* c, const int size);
+    template void launch_div<float>(const int numBlocks, const int blockSize, const float* a, const float* b, float* c, const int size);
+    template void launch_div<half>(const int numBlocks, const int blockSize, const half* a, const half* b, half* c, const int size);
+    template void launch_div<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16* b, nv_bfloat16* c, const int size);
+    template void launch_div<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t* b, int64_t* c, const int size);
+    template void launch_div<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t* b, int32_t* c, const int size);    
+    template void launch_div<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t* b, int16_t* c, const int size);  
+    template void launch_div<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t* b, int8_t* c, const int size);  
+    
+    template <typename T>
+    __global__ void divscalar_kernel(const T* A, const T scalar, T* C,const int size){
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;    
+        if (idx < size) {
+            C[idx] = A[idx] / scalar;
+        }
+    }   
+    template __global__ void divscalar_kernel<double>(const double* A, const double scalar, double* C,const int size);
+    template __global__ void divscalar_kernel<float>(const float* A, const float scalar, float* C,const int size);
+    template __global__ void divscalar_kernel<half>(const half* A, const half scalar, half* C,const int size);
+    template __global__ void divscalar_kernel<nv_bfloat16>(const nv_bfloat16* A, const nv_bfloat16 scalar, nv_bfloat16* C,const int size);
+    template __global__ void divscalar_kernel<int64_t>(const int64_t* A, const int64_t scalar, int64_t* C,const int size);
+    template __global__ void divscalar_kernel<int32_t>(const int32_t* A, const int32_t scalar, int32_t* C,const int size);
+    template __global__ void divscalar_kernel<int16_t>(const int16_t* A, const int16_t scalar, int16_t* C,const int size);
+    template __global__ void divscalar_kernel<int8_t>(const int8_t* A, const int8_t scalar, int8_t* C,const int size);
+    
+    template <typename T>
+    void launch_divscalar(const int numBlocks, const int blockSize, const T* a, const T scalar, T* c, const int size) {
+        divscalar_kernel<<<numBlocks, blockSize>>>(a, scalar, c, size);
+    }   
+    template void launch_divscalar<double>(const int numBlocks, const int blockSize, const double* a, const double scalar, double* c, const int size);
+    template void launch_divscalar<float>(const int numBlocks, const int blockSize, const float* a, const float scalar, float* c, const int size);
+    template void launch_divscalar<half>(const int numBlocks, const int blockSize, const half* a, const half scalar, half* c, const int size);
+    template void launch_divscalar<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16 scalar, nv_bfloat16* c, const int size);
+    template void launch_divscalar<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t scalar, int64_t* c, const int size);  
+    template void launch_divscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c, const int size);  
+    template void launch_divscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c, const int size);  
+    template void launch_divscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c, const int size);    
+    
+    template <typename T>
+    __global__ void rdivscalar_kernel(const T scalar, const T* A, T* C,const int size){
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if (idx < size) {
+            C[idx] = scalar / A[idx];
+        }
+    }      
+    template __global__ void rdivscalar_kernel<double>(const double scalar, const double* A, double* C,const int size); 
+    template __global__ void rdivscalar_kernel<float>(const float scalar, const float* A, float* C,const int size);
+    template __global__ void rdivscalar_kernel<half>(const half scalar, const half* A, half* C,const int size);
+    template __global__ void rdivscalar_kernel<nv_bfloat16>(const nv_bfloat16 scalar, const nv_bfloat16* A, nv_bfloat16* C,const int size);
+    template __global__ void rdivscalar_kernel<int64_t>(const int64_t scalar, const int64_t* A, int64_t* C,const int size);
+    template __global__ void rdivscalar_kernel<int32_t>(const int32_t scalar, const int32_t* A, int32_t* C,const int size); 
+    template __global__ void rdivscalar_kernel<int16_t>(const int16_t scalar, const int16_t* A, int16_t* C,const int size);
+    template __global__ void rdivscalar_kernel<int8_t>(const int8_t scalar, const int8_t* A, int8_t* C,const int size);
+    
+    template <typename T>
+    void launch_rdivscalar(const int numBlocks, const int blockSize, const T scalar, const T* a, T* c, const int size) {
+        rdivscalar_kernel<<<numBlocks, blockSize>>>(scalar, a, c, size);
+    }   
+    template void launch_rdivscalar<double>(const int numBlocks, const int blockSize, const double scalar, const double* a, double* c, const int size); 
+    template void launch_rdivscalar<float>(const int numBlocks, const int blockSize, const float scalar, const float* a, float* c, const int size);
+    template void launch_rdivscalar<half>(const int numBlocks, const int blockSize, const half scalar, const half* a, half* c, const int size);
+    template void launch_rdivscalar<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16 scalar, const nv_bfloat16* a, nv_bfloat16* c, const int size);
+    template void launch_rdivscalar<int64_t>(const int numBlocks, const int blockSize, const int64_t scalar, const int64_t* a, int64_t* c, const int size);
+    template void launch_rdivscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t scalar, const int32_t* a, int32_t* c, const int size);
+    template void launch_rdivscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t scalar, const int16_t* a, int16_t* c, const int size);
+    template void launch_rdivscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t scalar, const int8_t* a, int8_t* c, const int size);
+    
+ 
 }
 
 #endif // DEEPX_TENSORFUNC_ELEMENTWISE_MIAO_BYTE_BASIC_CUH
diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh
index 966cfa1c..2457a510 100644
--- a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh
+++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh
@@ -134,7 +134,164 @@ namespace deepx::tensorfunc
 
     template <>
     void launch_subscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c,const int size);    
- 
+
+    // mul
+    template <typename T>
+    __global__ void mul_kernel(const T* A, const T* B, T* C,const int size);
+
+    template <typename T>
+    void launch_mul(const int numBlocks, const int blockSize, const T* a, const T* b, T* c,const int size);
+
+    template <>
+    void launch_mul<double>(const int numBlocks, const int blockSize, const double* a, const double* b, double* c,const int size);
+
+    template <> 
+    void launch_mul<float>(const int numBlocks, const int blockSize, const float* a, const float* b, float* c,const int size);
+
+    template <>
+    void launch_mul<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16* b, nv_bfloat16* c,const int size);
+
+    template <> 
+    void launch_mul<__half>(const int numBlocks, const int blockSize, const __half* a, const __half* b, __half* c,const int size);
+
+    template <>
+    void launch_mul<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t* b, int64_t* c,const int size);
+
+    template <> 
+    void launch_mul<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t* b, int32_t* c,const int size);
+
+    template <>
+    void launch_mul<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t* b, int16_t* c,const int size);
+
+    template <> 
+    void launch_mul<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t* b, int8_t* c,const int size);
+
+    // mulscalar
+    template <typename T>
+    __global__ void mulscalar_kernel(const T* A, const T scalar, T* C,const int size);  
+
+    template <typename T>
+    void launch_mulscalar(const int numBlocks, const int blockSize, const T* a, const T scalar, T* c,const int size);
+
+    template <>
+    void launch_mulscalar<double>(const int numBlocks, const int blockSize, const double* a, const double scalar, double* c,const int size);
+
+    template <>
+    void launch_mulscalar<float>(const int numBlocks, const int blockSize, const float* a, const float scalar, float* c,const int size);
+
+    template <>
+    void launch_mulscalar<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16 scalar, nv_bfloat16* c,const int size);
+
+    template <>
+    void launch_mulscalar<__half>(const int numBlocks, const int blockSize, const __half* a, const __half scalar, __half* c,const int size);
+
+    template <>
+    void launch_mulscalar<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t scalar, int64_t* c,const int size);
+
+    template <>
+    void launch_mulscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c,const int size);
+
+    template <>
+    void launch_mulscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c,const int size);
+
+    template <>
+    void launch_mulscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c,const int size);
+
+    // div
+    template <typename T>
+    __global__ void div_kernel(const T* A, const T* B, T* C,const int size);
+
+    template <typename T>
+    void launch_div(const int numBlocks, const int blockSize, const T* a, const T* b, T* c,const int size);
+
+    template <>
+    void launch_div<double>(const int numBlocks, const int blockSize, const double* a, const double* b, double* c,const int size);
+
+    template <>
+    void launch_div<float>(const int numBlocks, const int blockSize, const float* a, const float* b, float* c,const int size);
+
+    template <>
+    void launch_div<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16* b, nv_bfloat16* c,const int size);
+
+    template <>
+    void launch_div<__half>(const int numBlocks, const int blockSize, const __half* a, const __half* b, __half* c,const int size);
+
+    template <> 
+    void launch_div<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t* b, int64_t* c,const int size);
+
+    template <>
+    void launch_div<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t* b, int32_t* c,const int size);
+
+    template <>
+    void launch_div<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t* b, int16_t* c,const int size);
+
+    template <>
+    void launch_div<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t* b, int8_t* c,const int size);
+
+    // divscalar
+    template <typename T>
+    __global__ void divscalar_kernel(const T* A, const T scalar, T* C,const int size);
+
+    template <typename T>
+    void launch_divscalar(const int numBlocks, const int blockSize, const T* a, const T scalar, T* c,const int size);
+
+    template <>
+    void launch_divscalar<double>(const int numBlocks, const int blockSize, const double* a, const double scalar, double* c,const int size);
+
+    template <>
+    void launch_divscalar<float>(const int numBlocks, const int blockSize, const float* a, const float scalar, float* c,const int size);
+
+    template <> 
+    void launch_divscalar<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16 scalar, nv_bfloat16* c,const int size);
+
+    template <>
+    void launch_divscalar<__half>(const int numBlocks, const int blockSize, const __half* a, const __half scalar, __half* c,const int size);
+
+    template <>
+    void launch_divscalar<int64_t>(const int numBlocks, const int blockSize, const int64_t* a, const int64_t scalar, int64_t* c,const int size);
+
+    template <>
+    void launch_divscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c,const int size);
+    
+    template <>
+    void launch_divscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c,const int size);
+
+    template <>
+    void launch_divscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c,const int size);
+
+    // rdivscalar
+    template <typename T>
+    __global__ void rdivscalar_kernel(const T scalar, const T* A, T* C,const int size);
+
+    template <typename T>
+    void launch_rdivscalar(const int numBlocks, const int blockSize, const T scalar, const T* a, T* c,const int size);
+
+    template <>
+    void launch_rdivscalar<double>(const int numBlocks, const int blockSize, const double scalar, const double* a, double* c,const int size);
+
+    template <>
+    void launch_rdivscalar<float>(const int numBlocks, const int blockSize, const float scalar, const float* a, float* c,const int size);
+
+    template <>
+    void launch_rdivscalar<nv_bfloat16>(const int numBlocks, const int blockSize, const nv_bfloat16 scalar, const nv_bfloat16* a, nv_bfloat16* c,const int size);
+
+    template <>
+    void launch_rdivscalar<__half>(const int numBlocks, const int blockSize, const __half scalar, const __half* a, __half* c,const int size);
+
+    template <>
+    void launch_rdivscalar<int64_t>(const int numBlocks, const int blockSize, const int64_t scalar, const int64_t* a, int64_t* c,const int size);
+
+    template <>
+    void launch_rdivscalar<int32_t>(const int numBlocks, const int blockSize, const int32_t scalar, const int32_t* a, int32_t* c,const int size);
+
+    template <>
+    void launch_rdivscalar<int16_t>(const int numBlocks, const int blockSize, const int16_t scalar, const int16_t* a, int16_t* c,const int size);
+
+    template <>
+    void launch_rdivscalar<int8_t>(const int numBlocks, const int blockSize, const int8_t scalar, const int8_t* a, int8_t* c,const int size);
+    
+    
+
 }
 
 #endif // DEEPX_TENSORFUNC_ELEMENTWISE_MIAO_BYTE_BASIC_CUH
diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp
index 0500dd60..72d0c32b 100644
--- a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp
+++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp
@@ -69,6 +69,77 @@ namespace deepx::tensorfunc
             launch_subscalar(numBlocks, blockSize, A.data, scalar, C.data, A.shape.size);
         }
     };  
+
+    template <typename T>
+    struct mulDispatcher<miaobyte, T>
+    {
+        static void mul(const Tensor<T> &A, const Tensor<T> &B, Tensor<T> &C)
+        {
+            if (A.shape.size != B.shape.size || A.shape.size != C.shape.size) { 
+                throw TensorShapeError("mul");
+            }
+            const int blockSize = A.shape.size > 256 ? 256 : A.shape.size;
+            int numBlocks = (A.shape.size + blockSize - 1) / blockSize;
+            launch_mul(numBlocks, blockSize, A.data, B.data, C.data, A.shape.size);
+        }
+    };
+
+    template <typename T>
+    struct mulscalarDispatcher<miaobyte, T>
+    {
+        static void mulscalar(const Tensor<T> &A, const T scalar, Tensor<T> &C)
+        {
+            if (A.shape.size != C.shape.size) { 
+                throw TensorShapeError("mulscalar");    
+            }
+            const int blockSize = A.shape.size > 256 ? 256 : A.shape.size;
+            int numBlocks = (A.shape.size + blockSize - 1) / blockSize;
+            launch_mulscalar(numBlocks, blockSize, A.data, scalar, C.data, A.shape.size);
+        }
+    };  
+
+    template <typename T>
+    struct divDispatcher<miaobyte, T>
+    {
+        static void div(const Tensor<T> &A, const Tensor<T> &B, Tensor<T> &C)   
+        {
+            if (A.shape.size != B.shape.size || A.shape.size != C.shape.size) { 
+                throw TensorShapeError("div");
+            }
+            const int blockSize = A.shape.size > 256 ? 256 : A.shape.size;
+            int numBlocks = (A.shape.size + blockSize - 1) / blockSize; 
+            launch_div(numBlocks, blockSize, A.data, B.data, C.data, A.shape.size);
+        }
+    };
+
+    template <typename T>
+    struct divscalarDispatcher<miaobyte, T>
+    {
+        static void divscalar(const Tensor<T> &A, const T scalar, Tensor<T> &C)
+        {
+            if (A.shape.size != C.shape.size) { 
+                throw TensorShapeError("divscalar");
+            }
+            const int blockSize = A.shape.size > 256 ? 256 : A.shape.size;
+            int numBlocks = (A.shape.size + blockSize - 1) / blockSize;
+            launch_divscalar(numBlocks, blockSize, A.data, scalar, C.data, A.shape.size);
+        }
+    };
+
+    template <typename T>
+    struct rdivscalarDispatcher<miaobyte, T>
+    {
+        static void rdivscalar(const T scalar, const Tensor<T> &A, Tensor<T> &C)
+        {
+            if (A.shape.size != C.shape.size) { 
+                throw TensorShapeError("rdivscalar");
+            }
+            const int blockSize = A.shape.size > 256 ? 256 : A.shape.size;
+            int numBlocks = (A.shape.size + blockSize - 1) / blockSize;
+            launch_rdivscalar(numBlocks, blockSize, scalar, A.data, C.data, A.shape.size);
+        }
+    };
+     
 }
 
 #endif // DEEPX_TENSORFUNC_ELEMENTWISE_MIAO_BYTE_BASIC_HPP
diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp
index c0910a99..91fa6326 100644
--- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp
+++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp
@@ -299,7 +299,367 @@ namespace deepx::tf
         }   
     };
  
-    
+    template <typename Author>
+    class Mul : public TF
+    {
+    public:
+        Mul(const vector<Param> &args, const vector<Param> &returns)
+        {
+            this->name = "mul";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }
+
+        Mul(string text)
+        {
+            this->parse(text);
+            this->author = Author::name();
+            if (this->name != "mul")
+            {
+                throw std::runtime_error("Invalid name: " + this->name);
+            }
+        }
+        string math_formula() const override
+        {
+            return "T3=T1*T2";
+        }
+        shared_ptr<TF> clone() const override
+        {
+            return make_shared<Mul<Author>>(*this);
+        }
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
+            Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != b_type || a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type);
+                return 1;
+            }
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::mul<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), *mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::mul<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), *mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Float16:
+                tensorfunc::mul<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), *mem->gettensor<half>(this->args[1].textvalue), *mem->gettensor<half>(this->returns[0].textvalue));
+                break;  
+            case Precision::BFloat16:
+                tensorfunc::mul<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::mul<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::mul<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::mul<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), *mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int8:
+                tensorfunc::mul<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;
+            }
+            return 0;
+        }
+    };
+
+    template <typename Author>
+    class MulScalar : public TF
+    {
+    public: 
+        MulScalar(const vector<Param> &args, const vector<Param> &returns)
+        {
+            this->name = "mulscalar";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }
+
+        MulScalar(string text)
+        {
+            this->parse(text);
+            this->author = Author::name();
+            if (this->name != "mulscalar")
+            {
+                throw std::runtime_error("Invalid name: " + this->name);
+            }
+        }
+        string math_formula() const override
+        {
+            return "T3=T1*scalar";
+        }
+        shared_ptr<TF> clone() const override
+        {
+            return make_shared<MulScalar<Author>>(*this);
+        }   
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type);
+                return 1;
+            }
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::mulscalar<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), this->getvar<double>(1, mem), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::mulscalar<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), this->getvar<float>(1, mem), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Float16:
+                tensorfunc::mulscalar<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), this->getvar<half>(1, mem), *mem->gettensor<half>(this->returns[0].textvalue));
+                break;
+            case Precision::BFloat16:
+                tensorfunc::mulscalar<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::mulscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::mulscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::mulscalar<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), this->getvar<int16_t>(1, mem), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;  
+            case Precision::Int8:
+                tensorfunc::mulscalar<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), this->getvar<int8_t>(1, mem), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;
+            }
+            return 0;
+        }   
+    };
+
+    template <typename Author>
+    class Div : public TF
+    {
+    public:
+        Div(const vector<Param> &args, const vector<Param> &returns)
+        {
+            this->name = "div";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }   
+
+        Div(string text)
+        {
+            this->parse(text);
+            this->author = Author::name();
+            if (this->name != "div")
+            {
+                throw std::runtime_error("Invalid name: " + this->name);
+            }
+        }   
+        string math_formula() const override
+        {
+            return "T3=T1/T2";
+        }
+        shared_ptr<TF> clone() const override
+        {
+            return make_shared<Div<Author>>(*this);
+        }   
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
+            Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;   
+            if (a_type != b_type || a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type);
+                return 1;
+            }
+            switch (a_type)
+            {
+            case Precision::Float64:    
+                tensorfunc::div<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), *mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::div<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), *mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Float16:
+                tensorfunc::div<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), *mem->gettensor<half>(this->args[1].textvalue), *mem->gettensor<half>(this->returns[0].textvalue));
+                break;  
+            case Precision::BFloat16:
+                tensorfunc::div<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::div<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;  
+            case Precision::Int32:
+                tensorfunc::div<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::div<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), *mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;  
+            case Precision::Int8:
+                tensorfunc::div<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;   
+            }
+            return 0;
+        }
+    };
+
+    template <typename Author>
+    class DivScalar : public TF
+    {
+    public:
+        DivScalar(const vector<Param> &args, const vector<Param> &returns)
+        {
+            this->name = "divscalar";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }
+
+        DivScalar(string text)
+        {
+            this->parse(text);
+            this->author = Author::name();
+            if (this->name != "divscalar")
+            {
+                throw std::runtime_error("Invalid name: " + this->name);
+            }
+        }
+        string math_formula() const override
+        {
+            return "T3=scalar/T1";
+        }
+        shared_ptr<TF> clone() const override
+        {
+            return make_shared<DivScalar<Author>>(*this);
+        }   
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type);
+                return 1;
+            }
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::divscalar<Author, double>( *mem->gettensor<double>(this->args[0].textvalue),this->getvar<double>(1, mem), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::divscalar<Author, float>( *mem->gettensor<float>(this->args[0].textvalue),this->getvar<float>(1, mem), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Float16:
+                tensorfunc::divscalar<Author, half>( *mem->gettensor<half>(this->args[0].textvalue),this->getvar<half>(1, mem), *mem->gettensor<half>(this->returns[0].textvalue));
+                break;
+            case Precision::BFloat16:
+                tensorfunc::divscalar<Author, nv_bfloat16>( *mem->gettensor<nv_bfloat16>(this->args[0].textvalue),this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::divscalar<Author, int32_t>( *mem->gettensor<int32_t>(this->args[0].textvalue),this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::divscalar<Author, int32_t>( *mem->gettensor<int32_t>(this->args[0].textvalue),this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::divscalar<Author, int16_t>( *mem->gettensor<int16_t>(this->args[0].textvalue),this->getvar<int16_t>(1, mem), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int8:
+                tensorfunc::divscalar<Author, int8_t>( *mem->gettensor<int8_t>(this->args[0].textvalue),this->getvar<int8_t>(1, mem), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;
+            }
+            return 0;
+        }   
+    };  
+
+    template <typename Author>
+    class RDivScalar : public TF
+    {
+    public:
+        RDivScalar(const vector<Param> &args, const vector<Param> &returns)
+        {
+            this->name = "rdivscalar";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+            }
+
+        RDivScalar(string text)
+        {
+            this->parse(text);
+            this->author = Author::name();
+            if (this->name != "rdivscalar")
+            {
+                throw std::runtime_error("Invalid name: " + this->name);
+            }
+        }
+        string math_formula() const override
+        {
+            return "T3=scalar/T1";
+        }
+        shared_ptr<TF> clone() const override
+        {
+            return make_shared<RDivScalar<Author>>(*this);
+        }       
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type);
+                return 1;
+            }
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::rdivscalar<Author, double>(this->getvar<double>(0, mem), *mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;  
+            case Precision::Float32:
+                tensorfunc::rdivscalar<Author, float>(this->getvar<float>(0, mem), *mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Float16:
+                tensorfunc::rdivscalar<Author, half>(this->getvar<half>(0, mem), *mem->gettensor<half>(this->args[1].textvalue), *mem->gettensor<half>(this->returns[0].textvalue));
+                break;
+            case Precision::BFloat16:
+                tensorfunc::rdivscalar<Author, nv_bfloat16>(this->getvar<nv_bfloat16>(0, mem), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::rdivscalar<Author, int32_t>(this->getvar<int32_t>(0, mem), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::rdivscalar<Author, int32_t>(this->getvar<int32_t>(0, mem), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::rdivscalar<Author, int16_t>(this->getvar<int16_t>(0, mem), *mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int8:
+                tensorfunc::rdivscalar<Author, int8_t>(this->getvar<int8_t>(0, mem), *mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;
+            }   
+            return 0;
+        }   
+    };
 };
 
 #endif // DEEPX_TF_ELEMENTWISE_BASIC_HPP
diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp
index 4eab0c4d..59dfba65 100644
--- a/excuter/op-mem-ompsimd/src/client/tfs.cpp
+++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp
@@ -150,29 +150,56 @@ namespace deepx::tf
                                                                  {
                                                                      Param("c", DataCategory::Tensor, Precision::Any),
                                                                  }))); 
-        //     opfactory.add_op(Addscalar_miaobyte<float>());
-        //     opfactory.add_op(Addscalar_miaobyte<double>());
 
-        //     opfactory.add_op(Sub_miaobyte<float>());
-        //     opfactory.add_op(Sub_miaobyte<double>());
-
-        //     opfactory.add_op(Sub_cblas<float>());
-        //     opfactory.add_op(Sub_cblas<double>());
-
-        //     opfactory.add_op(Mul_miaobyte<float>());
-        //     opfactory.add_op(Mul_miaobyte<double>());
-
-        //     opfactory.add_op(Mulscalar_miaobyte<float>());
-        //     opfactory.add_op(Mulscalar_miaobyte<double>());
+        tffactory.add_tf(std::make_shared<Mul<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                                 Param("B", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                         vector<Param>(
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                                 })));
+        tffactory.add_tf(std::make_shared<MulScalar<miaobyte>>(vector<Param>(
+                                                                   {
+                                                                       Param("A", DataCategory::Tensor, Precision::Any),
+                                                                       Param("b", DataCategory::Var, Precision::Any),
+                                                                   }),
+                                                               vector<Param>(
+                                                                   {
+                                                                       Param("C", DataCategory::Tensor, Precision::Any),
+                                                                   })));
+        
+        tffactory.add_tf(std::make_shared<Div<miaobyte>>(vector<Param>(
+                                                             {
+                                                                 Param("A", DataCategory::Tensor, Precision::Any),
+                                                                 Param("B", DataCategory::Tensor, Precision::Any),
+                                                             }),
+                                                         vector<Param>(
+                                                             {
+                                                                 Param("C", DataCategory::Tensor, Precision::Any),
+                                                             })));
+        tffactory.add_tf(std::make_shared<DivScalar<miaobyte>>(vector<Param>(   
+                                                                 {
+                                                                     Param("A", DataCategory::Tensor, Precision::Any),
+                                                                     Param("scalar", DataCategory::Var, Precision::Any),
+                                                                 }),
+                                                             vector<Param>(
+                                                                 {
+                                                                     Param("C", DataCategory::Tensor, Precision::Any),
+                                                                 })));
 
-        //     opfactory.add_op(Div_miaobyte<float>());
-        //     opfactory.add_op(Div_miaobyte<double>());
 
-        //     opfactory.add_op(Divscalar_miaobyte<float>());
-        //     opfactory.add_op(Divscalar_miaobyte<double>());
 
-        //     opfactory.add_op(RDivscalar_miaobyte<float>());
-        //     opfactory.add_op(RDivscalar_miaobyte<double>());
+        tffactory.add_tf(std::make_shared<RDivScalar<miaobyte>>(vector<Param>(
+                                                                 {
+                                                                     Param("scalar", DataCategory::Var, Precision::Any),
+                                                                     Param("A", DataCategory::Tensor, Precision::Any),
+                                                                 }),
+                                                             vector<Param>(
+                                                                 {
+                                                                     Param("C", DataCategory::Tensor, Precision::Any),
+                                                                 })));
 
         //     opfactory.add_op(Sqrt_miaobyte<float>());
         //     opfactory.add_op(Sqrt_miaobyte<double>());
diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp
index 7a69c776..622463d5 100644
--- a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp
+++ b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp
@@ -349,6 +349,175 @@ namespace deepx::tf
         }       
     };              
  
+    template <typename Author>
+    class Div : public TF
+    {
+    public:
+        Div(vector<Param> args, vector<Param> returns)
+        {   
+            this->name = "div";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }   
+        string math_formula() const override
+        {
+            return "T3=T1/T2";
+        }
+        shared_ptr<TF> clone() const override   
+        {
+            return make_shared<Div<Author>>(*this);
+        }
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;  
+            Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != b_type || a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type);
+                return 1;
+            }
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::div<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), *mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::div<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), *mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::div<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::div<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::div<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), *mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int8:
+                tensorfunc::div<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;
+            }
+            return 0;
+        }
+    };  
+
+    template <typename Author>
+    class DivScalar : public TF
+    {
+    public:
+        DivScalar(vector<Param> args, vector<Param> returns)
+        {
+            this->name = "divscalar";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }
+        string math_formula() const override
+        {
+            return "T3=T1/scalar";
+        }
+        shared_ptr<TF> clone() const override
+        {
+            return make_shared<DivScalar<Author>>(*this);
+        }
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type);
+                return 1;
+            }   
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::divscalar<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), this->getvar<double>(1, mem), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::divscalar<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), this->getvar<float>(1, mem), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::divscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::divscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::divscalar<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), this->getvar<int16_t>(1, mem), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;  
+            case Precision::Int8:
+                tensorfunc::divscalar<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), this->getvar<int8_t>(1, mem), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;   
+            }   
+            return 0;
+        }
+    };
+
+    template <typename Author>
+    class RDivScalar : public TF
+    {
+    public: 
+        RDivScalar(vector<Param> args, vector<Param> returns)
+        {
+            this->name = "rdivscalar";
+            this->author = Author::name();
+            this->args = args;
+            this->returns = returns;
+        }
+        string math_formula() const override
+        {
+            return "T3=scalar/T1";
+        }
+        shared_ptr<TF> clone() const override   
+        {
+            return make_shared<RDivScalar<Author>>(*this);
+        }
+        int run(shared_ptr<MemBase> mem, string &error) override
+        {
+            Precision a_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
+            Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
+            if (a_type != c_type)
+            {
+                error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type);
+                return 1;
+            }   
+            switch (a_type)
+            {
+            case Precision::Float64:
+                tensorfunc::rdivscalar<Author, double>( this->getvar<double>(0, mem),*mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
+                break;
+            case Precision::Float32:
+                tensorfunc::rdivscalar<Author, float>(this->getvar<float>(0, mem),*mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
+                break;
+            case Precision::Int64:
+                tensorfunc::rdivscalar<Author, int32_t>(this->getvar<int32_t>(0, mem),*mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int32:
+                tensorfunc::rdivscalar<Author, int32_t>(this->getvar<int32_t>(0, mem),*mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int16:
+                tensorfunc::rdivscalar<Author, int16_t>(this->getvar<int16_t>(0, mem),*mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
+                break;
+            case Precision::Int8:
+                tensorfunc::rdivscalar<Author, int8_t>(this->getvar<int8_t>(0, mem),*mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
+                break;
+            default:
+                error = "Unsupported dtype: " + precision_str(a_type);
+                return 1;
+            }
+            return 0;
+        }
+    };
+ 
 }
 
 #endif
diff --git a/front/py/examples/2_ir/2_elementwise_operator.dot b/front/py/examples/2_ir/2_elementwise_operator.dot
new file mode 100644
index 00000000..b39fa214
--- /dev/null
+++ b/front/py/examples/2_ir/2_elementwise_operator.dot
@@ -0,0 +1,64 @@
+// Computational Graph
+digraph {
+	rankdir=TB
+	node [shape=record]
+	134854829346096 [label="t1
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854521156512 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854521844832 [label="var_1
+0" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462386816 [label="t2
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462387008 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462386624 [label="var_2
+1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462387248 [label=add color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462387056 [label="t3
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462387344 [label="t4
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462387680 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462387632 [label="var_3
+0.5" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462387488 [label=add color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462387776 [label="t5
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462388016 [label="t6
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462388400 [label=div color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462388256 [label=rdivscalar color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462388352 [label="var_4
+0.05" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462388688 [label="t7
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462388832 [label=mulscalar color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462388880 [label="var_5
+2.5" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854462388736 [label=mul color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled]
+	134854462389168 [label="t8
+(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled]
+	134854521156512 -> 134854829346096 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854521844832 -> 134854521156512 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387008 -> 134854462386816 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462386624 -> 134854462387008 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854829346096 -> 134854462387248 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462386816 -> 134854462387248 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387248 -> 134854462387056 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387680 -> 134854462387344 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387632 -> 134854462387680 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387344 -> 134854462387488 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387056 -> 134854462387488 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462387488 -> 134854462387776 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388400 -> 134854462388016 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854829346096 -> 134854462388400 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462386816 -> 134854462388400 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388352 -> 134854462388256 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462386816 -> 134854462388256 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388256 -> 134854462388688 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388832 -> 134854462388688 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388688 -> 134854462388832 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388880 -> 134854462388832 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388688 -> 134854462388736 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462386816 -> 134854462388736 [arrowsize=0.8 color=gray40 penwidth=1.2]
+	134854462388736 -> 134854462389168 [arrowsize=0.8 color=gray40 penwidth=1.2]
+}
diff --git a/front/py/examples/2_ir/2_elementwise_operator.dot.svg b/front/py/examples/2_ir/2_elementwise_operator.dot.svg
new file mode 100644
index 00000000..1c50be16
--- /dev/null
+++ b/front/py/examples/2_ir/2_elementwise_operator.dot.svg
@@ -0,0 +1,302 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.43.0 (0)
+ -->
+<!-- Title: %3 Pages: 1 -->
+<svg width="450pt" height="484pt"
+ viewBox="0.00 0.00 449.50 484.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 480)">
+<title>%3</title>
+<polygon fill="white" stroke="transparent" points="-4,4 -4,-480 445.5,-480 445.5,4 -4,4"/>
+<!-- 134854829346096 -->
+<g id="node1" class="node">
+<title>134854829346096</title>
+<polygon fill="aliceblue" stroke="skyblue" points="326,-330 264,-330 264,-292 326,-292 326,-330"/>
+<text text-anchor="middle" x="295" y="-314.8" font-family="Sans-Serif" font-size="14.00">t1</text>
+<text text-anchor="middle" x="295" y="-299.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462387248 -->
+<g id="node7" class="node">
+<title>134854462387248</title>
+<polygon fill="lightgray" stroke="darkslategray" points="265,-256 211,-256 211,-220 265,-220 265,-256"/>
+<text text-anchor="middle" x="238" y="-234.3" font-family="Courier Bold" font-size="14.00">add</text>
+</g>
+<!-- 134854829346096&#45;&gt;134854462387248 -->
+<g id="edge5" class="edge">
+<title>134854829346096&#45;&gt;134854462387248</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M280.32,-291.72C273.14,-282.77 264.41,-271.9 256.75,-262.36"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="258.84,-260.49 251.65,-256 254.48,-263.99 258.84,-260.49"/>
+</g>
+<!-- 134854462388400 -->
+<g id="node15" class="node">
+<title>134854462388400</title>
+<polygon fill="lightgray" stroke="darkslategray" points="337,-256 283,-256 283,-220 337,-220 337,-256"/>
+<text text-anchor="middle" x="310" y="-234.3" font-family="Courier Bold" font-size="14.00">div</text>
+</g>
+<!-- 134854829346096&#45;&gt;134854462388400 -->
+<g id="edge14" class="edge">
+<title>134854829346096&#45;&gt;134854462388400</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M298.86,-291.72C300.64,-283.29 302.79,-273.15 304.71,-264.02"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="307.49,-264.41 306.41,-256 302.01,-263.25 307.49,-264.41"/>
+</g>
+<!-- 134854521156512 -->
+<g id="node2" class="node">
+<title>134854521156512</title>
+<polygon fill="lightgray" stroke="darkslategray" points="336.5,-402 253.5,-402 253.5,-366 336.5,-366 336.5,-402"/>
+<text text-anchor="middle" x="295" y="-380.3" font-family="Courier Bold" font-size="14.00">constant</text>
+</g>
+<!-- 134854521156512&#45;&gt;134854829346096 -->
+<g id="edge1" class="edge">
+<title>134854521156512&#45;&gt;134854829346096</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M295,-365.81C295,-357.52 295,-347.39 295,-338.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="297.8,-338.02 295,-330.02 292.2,-338.02 297.8,-338.02"/>
+</g>
+<!-- 134854521844832 -->
+<g id="node3" class="node">
+<title>134854521844832</title>
+<polygon fill="moccasin" stroke="orange" points="322,-476 268,-476 268,-438 322,-438 322,-476"/>
+<text text-anchor="middle" x="295" y="-460.8" font-family="Sans-Serif" font-size="14.00">var_1</text>
+<text text-anchor="middle" x="295" y="-445.8" font-family="Sans-Serif" font-size="14.00">0</text>
+</g>
+<!-- 134854521844832&#45;&gt;134854521156512 -->
+<g id="edge2" class="edge">
+<title>134854521844832&#45;&gt;134854521156512</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M295,-437.72C295,-429.29 295,-419.15 295,-410.02"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="297.8,-410 295,-402 292.2,-410 297.8,-410"/>
+</g>
+<!-- 134854462386816 -->
+<g id="node4" class="node">
+<title>134854462386816</title>
+<polygon fill="aliceblue" stroke="skyblue" points="219,-330 157,-330 157,-292 219,-292 219,-330"/>
+<text text-anchor="middle" x="188" y="-314.8" font-family="Sans-Serif" font-size="14.00">t2</text>
+<text text-anchor="middle" x="188" y="-299.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462386816&#45;&gt;134854462387248 -->
+<g id="edge6" class="edge">
+<title>134854462386816&#45;&gt;134854462387248</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M200.88,-291.72C207.12,-282.85 214.69,-272.11 221.36,-262.63"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="223.71,-264.16 226.03,-256 219.13,-260.93 223.71,-264.16"/>
+</g>
+<!-- 134854462386816&#45;&gt;134854462388400 -->
+<g id="edge15" class="edge">
+<title>134854462386816&#45;&gt;134854462388400</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M219.1,-291.9C236.27,-281.91 257.65,-269.47 275.35,-259.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="277.19,-261.34 282.69,-254.89 274.37,-256.5 277.19,-261.34"/>
+</g>
+<!-- 134854462388256 -->
+<g id="node16" class="node">
+<title>134854462388256</title>
+<polygon fill="lightgray" stroke="darkslategray" points="152.5,-256 53.5,-256 53.5,-220 152.5,-220 152.5,-256"/>
+<text text-anchor="middle" x="103" y="-234.3" font-family="Courier Bold" font-size="14.00">rdivscalar</text>
+</g>
+<!-- 134854462386816&#45;&gt;134854462388256 -->
+<g id="edge17" class="edge">
+<title>134854462386816&#45;&gt;134854462388256</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M166.11,-291.72C154.99,-282.42 141.38,-271.06 129.65,-261.26"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="131.29,-258.98 123.36,-256 127.7,-263.28 131.29,-258.98"/>
+</g>
+<!-- 134854462388736 -->
+<g id="node21" class="node">
+<title>134854462388736</title>
+<polygon fill="lightgray" stroke="darkslategray" points="208,-110 154,-110 154,-74 208,-74 208,-110"/>
+<text text-anchor="middle" x="181" y="-88.3" font-family="Courier Bold" font-size="14.00">mul</text>
+</g>
+<!-- 134854462386816&#45;&gt;134854462388736 -->
+<g id="edge23" class="edge">
+<title>134854462386816&#45;&gt;134854462388736</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M187.42,-291.87C186.17,-253.2 183.24,-162.52 181.81,-118.23"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="184.61,-117.93 181.55,-110.02 179.01,-118.11 184.61,-117.93"/>
+</g>
+<!-- 134854462387008 -->
+<g id="node5" class="node">
+<title>134854462387008</title>
+<polygon fill="lightgray" stroke="darkslategray" points="229.5,-402 146.5,-402 146.5,-366 229.5,-366 229.5,-402"/>
+<text text-anchor="middle" x="188" y="-380.3" font-family="Courier Bold" font-size="14.00">constant</text>
+</g>
+<!-- 134854462387008&#45;&gt;134854462386816 -->
+<g id="edge3" class="edge">
+<title>134854462387008&#45;&gt;134854462386816</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M188,-365.81C188,-357.52 188,-347.39 188,-338.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="190.8,-338.02 188,-330.02 185.2,-338.02 190.8,-338.02"/>
+</g>
+<!-- 134854462386624 -->
+<g id="node6" class="node">
+<title>134854462386624</title>
+<polygon fill="moccasin" stroke="orange" points="215,-476 161,-476 161,-438 215,-438 215,-476"/>
+<text text-anchor="middle" x="188" y="-460.8" font-family="Sans-Serif" font-size="14.00">var_2</text>
+<text text-anchor="middle" x="188" y="-445.8" font-family="Sans-Serif" font-size="14.00">1</text>
+</g>
+<!-- 134854462386624&#45;&gt;134854462387008 -->
+<g id="edge4" class="edge">
+<title>134854462386624&#45;&gt;134854462387008</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M188,-437.72C188,-429.29 188,-419.15 188,-410.02"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="190.8,-410 188,-402 185.2,-410 190.8,-410"/>
+</g>
+<!-- 134854462387056 -->
+<g id="node8" class="node">
+<title>134854462387056</title>
+<polygon fill="aliceblue" stroke="skyblue" points="271,-184 209,-184 209,-146 271,-146 271,-184"/>
+<text text-anchor="middle" x="240" y="-168.8" font-family="Sans-Serif" font-size="14.00">t3</text>
+<text text-anchor="middle" x="240" y="-153.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462387248&#45;&gt;134854462387056 -->
+<g id="edge7" class="edge">
+<title>134854462387248&#45;&gt;134854462387056</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M238.48,-219.81C238.72,-211.52 239,-201.39 239.26,-192.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="242.07,-192.1 239.49,-184.02 236.47,-191.94 242.07,-192.1"/>
+</g>
+<!-- 134854462387488 -->
+<g id="node12" class="node">
+<title>134854462387488</title>
+<polygon fill="lightgray" stroke="darkslategray" points="357,-110 303,-110 303,-74 357,-74 357,-110"/>
+<text text-anchor="middle" x="330" y="-88.3" font-family="Courier Bold" font-size="14.00">add</text>
+</g>
+<!-- 134854462387056&#45;&gt;134854462387488 -->
+<g id="edge11" class="edge">
+<title>134854462387056&#45;&gt;134854462387488</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M263.18,-145.72C275.06,-136.34 289.63,-124.85 302.13,-114.99"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="303.9,-117.16 308.45,-110 300.43,-112.76 303.9,-117.16"/>
+</g>
+<!-- 134854462387344 -->
+<g id="node9" class="node">
+<title>134854462387344</title>
+<polygon fill="aliceblue" stroke="skyblue" points="431,-184 369,-184 369,-146 431,-146 431,-184"/>
+<text text-anchor="middle" x="400" y="-168.8" font-family="Sans-Serif" font-size="14.00">t4</text>
+<text text-anchor="middle" x="400" y="-153.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462387344&#45;&gt;134854462387488 -->
+<g id="edge10" class="edge">
+<title>134854462387344&#45;&gt;134854462387488</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M381.97,-145.72C372.98,-136.6 362.02,-125.48 352.49,-115.81"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="354.37,-113.73 346.76,-110 350.39,-117.67 354.37,-113.73"/>
+</g>
+<!-- 134854462387680 -->
+<g id="node10" class="node">
+<title>134854462387680</title>
+<polygon fill="lightgray" stroke="darkslategray" points="441.5,-256 358.5,-256 358.5,-220 441.5,-220 441.5,-256"/>
+<text text-anchor="middle" x="400" y="-234.3" font-family="Courier Bold" font-size="14.00">constant</text>
+</g>
+<!-- 134854462387680&#45;&gt;134854462387344 -->
+<g id="edge8" class="edge">
+<title>134854462387680&#45;&gt;134854462387344</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M400,-219.81C400,-211.52 400,-201.39 400,-192.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="402.8,-192.02 400,-184.02 397.2,-192.02 402.8,-192.02"/>
+</g>
+<!-- 134854462387632 -->
+<g id="node11" class="node">
+<title>134854462387632</title>
+<polygon fill="moccasin" stroke="orange" points="427,-330 373,-330 373,-292 427,-292 427,-330"/>
+<text text-anchor="middle" x="400" y="-314.8" font-family="Sans-Serif" font-size="14.00">var_3</text>
+<text text-anchor="middle" x="400" y="-299.8" font-family="Sans-Serif" font-size="14.00">0.5</text>
+</g>
+<!-- 134854462387632&#45;&gt;134854462387680 -->
+<g id="edge9" class="edge">
+<title>134854462387632&#45;&gt;134854462387680</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M400,-291.72C400,-283.29 400,-273.15 400,-264.02"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="402.8,-264 400,-256 397.2,-264 402.8,-264"/>
+</g>
+<!-- 134854462387776 -->
+<g id="node13" class="node">
+<title>134854462387776</title>
+<polygon fill="aliceblue" stroke="skyblue" points="361,-38 299,-38 299,0 361,0 361,-38"/>
+<text text-anchor="middle" x="330" y="-22.8" font-family="Sans-Serif" font-size="14.00">t5</text>
+<text text-anchor="middle" x="330" y="-7.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462387488&#45;&gt;134854462387776 -->
+<g id="edge12" class="edge">
+<title>134854462387488&#45;&gt;134854462387776</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M330,-73.81C330,-65.52 330,-55.39 330,-46.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="332.8,-46.02 330,-38.02 327.2,-46.02 332.8,-46.02"/>
+</g>
+<!-- 134854462388016 -->
+<g id="node14" class="node">
+<title>134854462388016</title>
+<polygon fill="aliceblue" stroke="skyblue" points="351,-184 289,-184 289,-146 351,-146 351,-184"/>
+<text text-anchor="middle" x="320" y="-168.8" font-family="Sans-Serif" font-size="14.00">t6</text>
+<text text-anchor="middle" x="320" y="-153.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462388400&#45;&gt;134854462388016 -->
+<g id="edge13" class="edge">
+<title>134854462388400&#45;&gt;134854462388016</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M312.42,-219.81C313.59,-211.52 315.02,-201.39 316.32,-192.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="319.12,-192.33 317.46,-184.02 313.57,-191.55 319.12,-192.33"/>
+</g>
+<!-- 134854462388688 -->
+<g id="node18" class="node">
+<title>134854462388688</title>
+<polygon fill="aliceblue" stroke="skyblue" points="134,-184 72,-184 72,-146 134,-146 134,-184"/>
+<text text-anchor="middle" x="103" y="-168.8" font-family="Sans-Serif" font-size="14.00">t7</text>
+<text text-anchor="middle" x="103" y="-153.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462388256&#45;&gt;134854462388688 -->
+<g id="edge18" class="edge">
+<title>134854462388256&#45;&gt;134854462388688</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M103,-219.81C103,-211.52 103,-201.39 103,-192.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="105.8,-192.02 103,-184.02 100.2,-192.02 105.8,-192.02"/>
+</g>
+<!-- 134854462388352 -->
+<g id="node17" class="node">
+<title>134854462388352</title>
+<polygon fill="moccasin" stroke="orange" points="130,-330 76,-330 76,-292 130,-292 130,-330"/>
+<text text-anchor="middle" x="103" y="-314.8" font-family="Sans-Serif" font-size="14.00">var_4</text>
+<text text-anchor="middle" x="103" y="-299.8" font-family="Sans-Serif" font-size="14.00">0.05</text>
+</g>
+<!-- 134854462388352&#45;&gt;134854462388256 -->
+<g id="edge16" class="edge">
+<title>134854462388352&#45;&gt;134854462388256</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M103,-291.72C103,-283.29 103,-273.15 103,-264.02"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="105.8,-264 103,-256 100.2,-264 105.8,-264"/>
+</g>
+<!-- 134854462388832 -->
+<g id="node19" class="node">
+<title>134854462388832</title>
+<polygon fill="lightgray" stroke="darkslategray" points="135.5,-110 44.5,-110 44.5,-74 135.5,-74 135.5,-110"/>
+<text text-anchor="middle" x="90" y="-88.3" font-family="Courier Bold" font-size="14.00">mulscalar</text>
+</g>
+<!-- 134854462388688&#45;&gt;134854462388832 -->
+<g id="edge20" class="edge">
+<title>134854462388688&#45;&gt;134854462388832</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M93.66,-145.72C91.31,-137.29 89.29,-127.15 88.1,-118.02"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="90.88,-117.67 87.25,-110 85.31,-118.25 90.88,-117.67"/>
+</g>
+<!-- 134854462388688&#45;&gt;134854462388736 -->
+<g id="edge22" class="edge">
+<title>134854462388688&#45;&gt;134854462388736</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M123.09,-145.72C133.2,-136.51 145.55,-125.27 156.24,-115.53"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="158.29,-117.46 162.32,-110 154.52,-113.32 158.29,-117.46"/>
+</g>
+<!-- 134854462388832&#45;&gt;134854462388688 -->
+<g id="edge19" class="edge">
+<title>134854462388832&#45;&gt;134854462388688</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M98.97,-110C101.36,-118.25 103.44,-128.35 104.71,-137.57"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="101.95,-138.08 105.64,-145.72 107.52,-137.45 101.95,-138.08"/>
+</g>
+<!-- 134854462388880 -->
+<g id="node20" class="node">
+<title>134854462388880</title>
+<polygon fill="moccasin" stroke="orange" points="54,-184 0,-184 0,-146 54,-146 54,-184"/>
+<text text-anchor="middle" x="27" y="-168.8" font-family="Sans-Serif" font-size="14.00">var_5</text>
+<text text-anchor="middle" x="27" y="-153.8" font-family="Sans-Serif" font-size="14.00">2.5</text>
+</g>
+<!-- 134854462388880&#45;&gt;134854462388832 -->
+<g id="edge21" class="edge">
+<title>134854462388880&#45;&gt;134854462388832</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M43.22,-145.72C51.24,-136.68 61,-125.69 69.52,-116.08"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="71.7,-117.85 74.91,-110 67.51,-114.13 71.7,-117.85"/>
+</g>
+<!-- 134854462389168 -->
+<g id="node22" class="node">
+<title>134854462389168</title>
+<polygon fill="aliceblue" stroke="skyblue" points="212,-38 150,-38 150,0 212,0 212,-38"/>
+<text text-anchor="middle" x="181" y="-22.8" font-family="Sans-Serif" font-size="14.00">t8</text>
+<text text-anchor="middle" x="181" y="-7.8" font-family="Sans-Serif" font-size="14.00">(3, 4, 5)</text>
+</g>
+<!-- 134854462388736&#45;&gt;134854462389168 -->
+<g id="edge24" class="edge">
+<title>134854462388736&#45;&gt;134854462389168</title>
+<path fill="none" stroke="#666666" stroke-width="1.2" d="M181,-73.81C181,-65.52 181,-55.39 181,-46.16"/>
+<polygon fill="#666666" stroke="#666666" stroke-width="1.2" points="183.8,-46.02 181,-38.02 178.2,-46.02 183.8,-46.02"/>
+</g>
+</g>
+</svg>
diff --git a/front/py/examples/2_ir/2_elementwise_operator.py b/front/py/examples/2_ir/2_elementwise_operator.py
index e1e4502e..80ecd6f6 100644
--- a/front/py/examples/2_ir/2_elementwise_operator.py
+++ b/front/py/examples/2_ir/2_elementwise_operator.py
@@ -8,7 +8,13 @@
 torch_t4 = torch.full((3, 4, 5), 0.5)
 torch_t5 = torch_t4 + torch_t3
 print(torch_t5)
+torch_t6 = torch_t1 / torch_t2
+print(torch_t6)
+torch_t7=0.05/torch_t2*2.5
+print(torch_t7)
 
+torch_t8=torch_t7.mul(torch_t2)
+print(torch_t8)
 ############-------DEEPX-------################
 
 import deepx
@@ -20,7 +26,13 @@
 t4=deepx.full([3,4,5],value=0.5,name='t4')
 t5=t4.add(t3,out='t5')
 print(t5)
-
+t6=t1.div(t2,out='t6')
+print(t6)
+t7=t2.rdiv(0.05,out='t7')
+t7.mul_(2.5)
+print(t7)
+t8=t7.mul(t2,out='t8')
+print(t8)
 import os
 script_name = os.path.splitext(os.path.basename( os.path.abspath(__file__)))[0]  # 获取不带后缀的脚本名
 str=t3.graph.to_dot()