diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md index 80886ea9..eab757a0 100644 --- a/doc/excuter/op-mem-cuda/list.md +++ b/doc/excuter/op-mem-cuda/list.md @@ -4,6 +4,7 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| +| broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | | concat | miaobyte | concat(listtensor tensors, var axis)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var axis)->(tensor result) | | transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor A, vector dim_order)->(tensor C) | | reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 32aadf9d..f018d192 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -4,15 +4,16 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| +| broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | | concat | miaobyte | concat(listtensor tensors, var axis)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var axis)->(tensor result) | -| transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor A, vector dim_order)->(tensor C) | +| transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T1.transpose(dimorder=[1,0])->T2 | transpose(tensor A, vector dim_order)->(tensor C) | | add | cblas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | comparescalar | miaobyte | comparescalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1,scalar) | comparescalar(tensor A, var scalar)->(tensor mask) | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | | addscalar | miaobyte | addscalar(tensor a, var scalar)->(tensor c) | T3=T1+scalar | addscalar(tensor a, var scalar)->(tensor c) | | log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | -| reshape | miaobyte | reshape(tensor A, vector shape)->() | T2=T1.reshape(shape) | reshape(tensor A, vector shape)->() | +| reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | | arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | divscalar | miaobyte | divscalar(tensor A, var scalar)->(tensor C) | T3=T1/scalar | divscalar(tensor A, var scalar)->(tensor C) | | print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | diff --git a/excuter/cpp-common/src/deepx/shape.cpp b/excuter/cpp-common/src/deepx/shape.cpp index ebcb4533..c126cc58 100644 --- a/excuter/cpp-common/src/deepx/shape.cpp +++ b/excuter/cpp-common/src/deepx/shape.cpp @@ -64,7 +64,7 @@ namespace deepx } int Shape::linearat(const std::vector &indices) const{ int idx=0; - for(int i=0;i broadcastMap(const std::vector &a, const std::vector &b); - void fromBroadcastIndices(const std::vector &broadcastMap, const std::vector &broadcastIndices, std::vector &oldIndices ); - + } #endif // DEEPX_OP_CPU_SHAPE_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp index 1ed42cd6..42040543 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp @@ -68,6 +68,20 @@ namespace deepx::tensorfunc splitDispatcher::split(A, axis, num_outputs, B); } + template + struct broadcastToDispatcher + { + static void broadcastTo(const Tensor &A, const vector &new_shape, Tensor &B) = delete; + }; + + template + void broadcastTo(const Tensor &A, const vector &new_shape, Tensor &B) + { + broadcastToDispatcher::broadcastTo(A, new_shape, B); + } + + + template struct expandDispatcher { diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp index 07368d1b..5a4e5540 100644 --- a/excuter/op-mem-cuda/src/client/tfs.cpp +++ b/excuter/op-mem-cuda/src/client/tfs.cpp @@ -361,6 +361,15 @@ namespace deepx::tf { Param("result", DataCategory::Tensor, Precision::Any), }))); + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("new_shape", DataCategory::Vector, Precision::Int32), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); } // // reduce // void register_reduce(OpFactory &opfactory) diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/broadcast.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/broadcast.hpp deleted file mode 100644 index f2465561..00000000 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/broadcast.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef DEEPX_TENSORFUNC_BROADCAST_HPP -#define DEEPX_TENSORFUNC_BROADCAST_HPP - -#include "deepx/tensor.hpp" -#include "deepx/tensorfunc/new.hpp" -#include "deepx/shape_broadcast.hpp" -namespace deepx::tensorfunc -{ - template - void broadcast(const Tensor &tensor, Tensor &result); - -} -#endif \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu index 82c893c0..92940d90 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu @@ -7,6 +7,8 @@ #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/tensor_cuda.cuh" #include "deepx/tensorfunc/vector_cuda.cuh" +#include "deepx/shape_broadcast.hpp" + namespace deepx::tensorfunc { // transpose @@ -150,39 +152,37 @@ namespace deepx::tensorfunc currentTensorIndices.copyFromDevice(outputIndices.data, dim); currentTensorIndices[axis] = concatIdxCurrentTensor; - int idxCurrentTensor = linearAt(inputStrides+tensorIdx*dim, dim, currentTensorIndices.data); + int idxCurrentTensor = linearAt(inputStrides + tensorIdx * dim, dim, currentTensorIndices.data); int idx = linearAt(outputStrides, dim, outputIndices.data); outputData[idx] = tensorsData[tensorIdx][idxCurrentTensor]; } } - template void launch_concat( - const T **tensorsData, - const int *inputStrides, - T *outputData, - const int *outputStrides, - const int dim, - const int outputLen, - const int axis, - const int numTensors, - const int *shapeAtAxis) - { + const T **tensorsData, + const int *inputStrides, + T *outputData, + const int *outputStrides, + const int dim, + const int outputLen, + const int axis, + const int numTensors, + const int *shapeAtAxis) + { auto [numBlocks, blockSize] = BestDims(outputLen); - //output + // output cudaVector outputStrides_d(outputStrides, dim, cudaMemcpyHostToDevice); - //input - //datas - cudaVector tensorsDataList(tensorsData, numTensors, cudaMemcpyHostToDevice); - //strides - cudaVector inputStrides_d(inputStrides, numTensors*dim, cudaMemcpyHostToDevice); - + // input + // datas + cudaVector tensorsDataList(tensorsData, numTensors, cudaMemcpyHostToDevice); + // strides + cudaVector inputStrides_d(inputStrides, numTensors * dim, cudaMemcpyHostToDevice); - //shapeAtAxis + // shapeAtAxis cudaVector shapeAtAxis_d(shapeAtAxis, numTensors, cudaMemcpyHostToDevice); int powDim = nextPowerOf2(dim); @@ -227,5 +227,116 @@ namespace deepx::tensorfunc template void launch_concat(const int16_t **tensorsData, const int *inputStrides, int16_t *outputData, const int *outputStrides, const int dim, const int len, const int axis, const int numTensors, const int *shapeAtAxis); template void launch_concat(const int8_t **tensorsData, const int *inputStrides, int8_t *outputData, const int *outputStrides, const int dim, const int len, const int axis, const int numTensors, const int *shapeAtAxis); + // broadcastTo + __host__ __device__ void fromBroadcastIndices(const BroadcastMap *broadcastMap, const int *broadcastIndices, const int broadcastIndicesDim, int *indices) + { + for (int i = 0, j = 0; i < broadcastIndicesDim; ++i) + { + switch (broadcastMap[i]) + { + case xTox: + indices[j++] = broadcastIndices[i]; + break; + case nullTo1: + break; + case xTo1: + indices[j++] = 0; + break; + } + } + } + + template + __global__ void broadcastTo_kernel(const T *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + T *output, const int *outputStrides, const int outputDim, const int outputlen) + { + const int grid_stride = gridDim.x * blockDim.x; + int thread_id = blockIdx.x * blockDim.x + threadIdx.x; + for (; thread_id < outputlen; thread_id += grid_stride) + { + int output_indices[DIM]; + linearTo(outputStrides, outputDim, output_indices, thread_id); + int input_indices[DIM]; + fromBroadcastIndices(broadcastMap, output_indices, outputDim, input_indices); + int inputIdx = linearAt(inputStrides, inputDim, input_indices); + int outputIdx = linearAt(outputStrides, outputDim, output_indices); + output[outputIdx] = input[inputIdx]; + } + } + + template + void launch_broadcastTo(const T *input, const int *inputStrides, const int intputDim, + const BroadcastMap *broadcastMap, + T *output, const int *outputStrides, const int outputDim, const int outputlen){ + + auto [numBlocks, blockSize] = BestDims(outputlen); + + // output + cudaVector outputStrides_d(outputStrides, outputDim, cudaMemcpyHostToDevice); + + // broadcastMap + cudaVector broadcastMap_d(broadcastMap, outputDim, cudaMemcpyHostToDevice); + + // input + cudaVector inputStrides_d(inputStrides, intputDim, cudaMemcpyHostToDevice); + + + int powDim = nextPowerOf2(outputDim); + // 根据计算出的2的幂次选择对应的模板实例 + switch (powDim) + { + case 1: + broadcastTo_kernel<1, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 2: + broadcastTo_kernel<2, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 4: + broadcastTo_kernel<4, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 8: + broadcastTo_kernel<8, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 16: + broadcastTo_kernel<16, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 32: + broadcastTo_kernel<32, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 64: + broadcastTo_kernel<64, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + case 128: + broadcastTo_kernel<128, T><<>>(input, inputStrides_d.data, intputDim, broadcastMap_d.data, output, outputStrides_d.data, outputDim, outputlen); + break; + default: + throw std::runtime_error("dim too large, max support 128"); + } + } + template void launch_broadcastTo(const double *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + double *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo(const float *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + float *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo(const nv_bfloat16 *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + nv_bfloat16 *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo<__half>(const __half *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + __half *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo(const int64_t *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + int64_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo(const int32_t *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + int32_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo(const int16_t *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + int16_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_broadcastTo(const int8_t *input, const int *inputStrides, const int inputDim, + const BroadcastMap *broadcastMap, + int8_t *output, const int *outputStrides, const int outputDim, const int outputlen); } #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh index 9e9a8629..e44874ea 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh @@ -1,45 +1,47 @@ #ifndef DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_CUH #define DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_CUH - -#include + +#include #include + +#include "deepx/shape_broadcast.hpp" #include "deepx/tensorfunc/cuda.hpp" #include "deepx/tensorfunc/authors.hpp" namespace deepx::tensorfunc { - //transpose + // transpose template - __global__ void transpose_kernel(const T* input, const int* inputStrides, T* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + __global__ void transpose_kernel(const T *input, const int *inputStrides, T *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template - void launch_transpose(const int numBlocks, const int blockSize, const T* input, const int* inputStrides, T* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const T *input, const int *inputStrides, T *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose(const int numBlocks, const int blockSize, const double* input, const int* inputStrides, double* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const double *input, const int *inputStrides, double *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose(const int numBlocks, const int blockSize, const float* input, const int* inputStrides, float* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const float *input, const int *inputStrides, float *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); - template <> - void launch_transpose(const int numBlocks, const int blockSize, const nv_bfloat16* input, const int* inputStrides, nv_bfloat16* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + template <> + void launch_transpose(const int numBlocks, const int blockSize, const nv_bfloat16 *input, const int *inputStrides, nv_bfloat16 *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose<__half>(const int numBlocks, const int blockSize, const __half* input, const int* inputStrides, __half* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose<__half>(const int numBlocks, const int blockSize, const __half *input, const int *inputStrides, __half *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose(const int numBlocks, const int blockSize, const int64_t* input, const int* inputStrides, int64_t* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const int64_t *input, const int *inputStrides, int64_t *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose(const int numBlocks, const int blockSize, const int32_t* input, const int* inputStrides, int32_t* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const int32_t *input, const int *inputStrides, int32_t *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose(const int numBlocks, const int blockSize, const int16_t* input, const int* inputStrides, int16_t* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const int16_t *input, const int *inputStrides, int16_t *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); template <> - void launch_transpose(const int numBlocks, const int blockSize, const int8_t* input, const int* inputStrides, int8_t* output, const int* outputStrides, const int dim, const int len, const int* dimOrder); + void launch_transpose(const int numBlocks, const int blockSize, const int8_t *input, const int *inputStrides, int8_t *output, const int *outputStrides, const int dim, const int len, const int *dimOrder); - template + template __global__ void concat_kernel(const T **tensorsData, const int *inputStrides, T *outputData, @@ -76,7 +78,60 @@ namespace deepx::tensorfunc template <> void launch_concat(const int8_t **tensorsData, const int *inputStrides, int8_t *outputData, const int *outputStrides, const int dim, const int len, const int axis, const int numTensors, const int *shapeAtAxis); + + + __host__ __device__ void fromBroadcastIndices(const BroadcastMap *broadcastMap, const int *broadcastIndices, const int broadcastIndicesDim, int *indices); - + // broadcastTo + template + __global__ void broadcastTo_kernel( + const T *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + T *output, const int *outputStrides,const int outputDim,const int outputlen); + + template + void launch_broadcastTo(const T *input, const int *inputStrides,const int intputDim, + const BroadcastMap *broadcastMap, + T *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const double *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + double *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const float *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + float *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const nv_bfloat16 *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + nv_bfloat16 *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo<__half>(const __half *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + __half *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const int64_t *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + int64_t *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const int32_t *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + int32_t *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const int16_t *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + int16_t *output, const int *outputStrides,const int outputDim,const int outputlen); + + template <> + void launch_broadcastTo(const int8_t *input, const int *inputStrides,const int inputDim, + const BroadcastMap *broadcastMap, + int8_t *output, const int *outputStrides,const int outputDim,const int outputlen); } #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp index fb972636..1faf58e7 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -9,6 +9,8 @@ #include "deepx/tensorfunc/changeshape_miaobyte.cuh" #include "deepx/tensorfunc/cuda.hpp" #include "deepx/shape_concat.hpp" +#include "deepx/shape_broadcast.hpp" +#include "stdutil/error.hpp" namespace deepx::tensorfunc { template @@ -94,5 +96,23 @@ namespace deepx::tensorfunc axis, tensors.size(), shapeAtAxis.data()); }; }; + + + template + struct broadcastToDispatcher + { + static void broadcastTo(const Tensor &A, const vector &new_shape, Tensor &B) + { + auto A_broadcastShape = broadcastShape(A.shape.shape, new_shape); + if (A_broadcastShape.empty()||A_broadcastShape!=new_shape) + { + throw TensorShapeError("Broadcast shape mismatch"); + } + auto bmap = broadcastMap(A.shape.shape, new_shape); + launch_broadcastTo(A.data, A.shape.strides.data(), A.shape.dim, + bmap.data(), + B.data, B.shape.strides.data(), B.shape.dim, B.shape.size); + } + }; } #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp index 34046a1c..b375ce71 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp @@ -5,7 +5,6 @@ #include #include - #include "deepx/tensorfunc/changeshape_miaobyte.hpp" namespace deepx::tf @@ -99,7 +98,7 @@ namespace deepx::tf Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; vector dim_order = this->getvector(1, -1); Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; - + if (input_type != output_type) { error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); @@ -139,7 +138,7 @@ namespace deepx::tf } }; - template + template class Concat : public TF { public: @@ -163,7 +162,7 @@ namespace deepx::tf { vector tensor_names = this->getvector(0, true); Precision input_type = mem->gettensor(tensor_names[0]).get()->shape.dtype; - int axis = this->getvar(1, mem, false); + int axis = this->getvar(1, mem, true); switch (input_type) { case Precision::Float64: @@ -193,7 +192,7 @@ namespace deepx::tf std::vector *> input; for (int i = 0; i < tensor_names.size(); i++) { - input.push_back(mem->gettensor(tensor_names[i]).get()); + input.push_back(mem->gettensor(tensor_names[i]).get()); } auto output = mem->gettensor(this->returns[0].textvalue).get(); concat(input, axis, *output); @@ -205,7 +204,7 @@ namespace deepx::tf for (int i = 0; i < tensor_names.size(); i++) { input.push_back(mem->gettensor(tensor_names[i]).get()); - } + } auto output = mem->gettensor(this->returns[0].textvalue).get(); concat(input, axis, *output); break; @@ -263,5 +262,68 @@ namespace deepx::tf }; }; + template + class BroadcastTo : public TF + { + public: + BroadcastTo(const vector &args, const vector &returns) + { + this->name = "broadcastTo"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + + string math_formula() const override + { + return "T2 = T1.broadcastTo(new_shape=[4,3,2])"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + vector new_shape = this->getvector(1, true); + Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (input_type != output_type) + { + error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); + return 1; + } + switch (input_type) + { + case Precision::Float64: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float16: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::BFloat16: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported type: " + precision_str(input_type); + return 1; + } + return 0; + } + }; } #endif // DEEPX_TF_CHANGESHAPE_HPP diff --git a/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp b/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp index 66f5dc39..33ccab95 100644 --- a/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp +++ b/excuter/op-mem-cuda/test/tensorfunc/2_changeshape.cpp @@ -30,6 +30,15 @@ void test_concat() concat({&a,&b,&c},1,d); print(d,"%.0f"); } + +void test_broadcastTo() +{ + Tensor a=New({3,2}); + arange(a, 1.0f, 1.0f); + Tensor b=New({4,3,2}); + broadcastTo(a, b.shape.shape, b); + print(b,"%.0f"); +} int main(int argc, char **argv) { int casearg=atoi(argv[1]); @@ -41,6 +50,9 @@ int main(int argc, char **argv) case 1: test_concat(); break; + case 2: + test_broadcastTo(); + break; } return 0; } \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index 9c92fe74..0489f169 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -352,6 +352,15 @@ namespace deepx::tf { Param("result", DataCategory::Tensor, Precision::Any), }))); + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("new_shape", DataCategory::Vector, Precision::Int32), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); } // // reduce // void register_reduce(OpFactory &opfactory) diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp index ce786bab..f62146ac 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -5,10 +5,10 @@ #include #include "deepx/tensor.hpp" -#include "deepx/tensorfunc/new.hpp" -#include "deepx/tensorfunc/changeshape.hpp" #include "deepx/shape_concat.hpp" #include "deepx/shape_broadcast.hpp" +#include "deepx/tensorfunc/new.hpp" +#include "deepx/tensorfunc/changeshape.hpp" #include "deepx/tensorfunc/authors.hpp" namespace deepx::tensorfunc { @@ -100,89 +100,129 @@ namespace deepx::tensorfunc } }; - template - void split(const Tensor &tensor, const int axis, std::vector *> &results) + vector fromBroadcastIndices(const vector &broadcastMap, const vector &broadcastIndices) { - tensor.shape.rangeParallel(axis, [&](const int idx, const std::vector &indices) - { - int splitIdxCurrentTensor=indices[axis]; - int tensorIdx=0; - while (tensorIdx < results.size() ) { - if (splitIdxCurrentTensorshape[axis]) { - break; - }else{ - splitIdxCurrentTensor-=results[tensorIdx]->shape[axis]; - tensorIdx++; - } - } - std::vector currentTensorIndices=indices; - currentTensorIndices[axis]=splitIdxCurrentTensor; - results[tensorIdx]->shape.linearat(currentTensorIndices); - int idxCurrentTensor=results[tensorIdx]->shape.linearat(currentTensorIndices); - int copylen=results[tensorIdx]->shape.strides[axis]; - std::copy(tensor.data+idxCurrentTensor,tensor.data+idxCurrentTensor+copylen,results[tensorIdx]->data+idx); }); + vector srcindices; + for (int i = 0, j = 0; i < broadcastMap.size(); ++i) + { + switch (broadcastMap[i]) + { + case xTox: + srcindices.push_back(broadcastIndices[i]); + break; + case nullTo1: + break; + case xTo1: + srcindices.push_back(0); + break; + } + } + return srcindices; } - // 扩展张量维度 - 将形状中为1的维度扩展到目标维度 template - void expand(const Tensor &input, Tensor &output) + struct broadcastToDispatcher { - // 检查输入和目标形状的兼容性 - if (input.shape.dim != output.shape.dim) + static void broadcastTo(const Tensor &A, const vector &new_shape, Tensor &B) { - throw std::invalid_argument("expand维度不匹配: 输入维度 " + - std::to_string(input.shape.dim) + - ", 目标维度 " + - std::to_string(output.shape.dim) + - "请先前dim补1的方式reshape"); - } - - for (size_t i = 0; i < input.shape.dim; ++i) - { - if (input.shape[i] != output.shape[i] && input.shape[i] != 1) + auto A_broadcastShape = broadcastShape(A.shape.shape, new_shape); + if (A_broadcastShape.empty()||A_broadcastShape!=new_shape) { - throw std::invalid_argument("维度 " + std::to_string(i) + - " 不能被扩展: " + - std::to_string(input.shape[i]) + - " 到 " + - std::to_string(output.shape[i])); + throw TensorShapeError("Broadcast shape mismatch"); } + auto bmap = broadcastMap(A.shape.shape, new_shape); + + B.shape.rangeParallel(B.shape.dim, [&](const int idx, const std::vector &bindices) + { + vector aindices=fromBroadcastIndices(bmap, bindices); + B.data[idx] = A.data[A.shape.linearat(aindices)]; + }); } + }; - // 创建扩展映射 - std::vector bm = broadcastMap(input.shape.shape, output.shape.shape); + // template + // void split(const Tensor &tensor, const int axis, std::vector *> &results) + // { + // tensor.shape.rangeParallel(axis, [&](const int idx, const std::vector &indices) + // { + // int splitIdxCurrentTensor=indices[axis]; + // int tensorIdx=0; + // while (tensorIdx < results.size() ) { + // if (splitIdxCurrentTensorshape[axis]) { + // break; + // }else{ + // splitIdxCurrentTensor-=results[tensorIdx]->shape[axis]; + // tensorIdx++; + // } + // } + // std::vector currentTensorIndices=indices; + // currentTensorIndices[axis]=splitIdxCurrentTensor; + // results[tensorIdx]->shape.linearat(currentTensorIndices); + // int idxCurrentTensor=results[tensorIdx]->shape.linearat(currentTensorIndices); + // int copylen=results[tensorIdx]->shape.strides[axis]; + // std::copy(tensor.data+idxCurrentTensor,tensor.data+idxCurrentTensor+copylen,results[tensorIdx]->data+idx); }); + // } - // 找到最后一个需要扩展的维度 - int last_expand_dim = -1; - for (int i = input.shape.dim - 1; i >= 0; --i) - { - if (input.shape[i] != output.shape.shape[i]) - { - last_expand_dim = i; - break; - } - } + // // 扩展张量维度 - 将形状中为1的维度扩展到目标维度 + // template + // void expand(const Tensor &input, Tensor &output) + // { + // // 检查输入和目标形状的兼容性 + // if (input.shape.dim != output.shape.dim) + // { + // throw std::invalid_argument("expand维度不匹配: 输入维度 " + + // std::to_string(input.shape.dim) + + // ", 目标维度 " + + // std::to_string(output.shape.dim) + + // "请先前dim补1的方式reshape"); + // } - // 如果最后几个维度不需要扩展,可以连续复制 - if (last_expand_dim < output.shape.dim - 1) - { - int copy_len = output.shape.strides[last_expand_dim + 1]; - output.shape.rangeParallel(last_expand_dim + 1, [&bm, &output, &input, copy_len](int idx_linear, const std::vector &indices, std::vector &oldIndices) - { - fromBroadcastIndices(bm, indices, oldIndices); - int idx_old = input.shape.linearat(oldIndices); - std::copy(input.data + idx_old, - input.data + idx_old + copy_len, - output.data + idx_linear); }, input.shape.dim); - } - else - { - output.shape.rangeParallel(output.shape.dim, [&bm, &output, &input](int idx_linear, const std::vector &indices, std::vector &oldIndices) - { - fromBroadcastIndices(bm, indices, oldIndices); - int idx_old = input.shape.linearat(oldIndices); - output.data[idx_linear] = input.data[idx_old]; }, input.shape.dim); - } - } + // for (size_t i = 0; i < input.shape.dim; ++i) + // { + // if (input.shape[i] != output.shape[i] && input.shape[i] != 1) + // { + // throw std::invalid_argument("维度 " + std::to_string(i) + + // " 不能被扩展: " + + // std::to_string(input.shape[i]) + + // " 到 " + + // std::to_string(output.shape[i])); + // } + // } + + // // 创建扩展映射 + // std::vector bm = broadcastMap(input.shape.shape, output.shape.shape); + + // // 找到最后一个需要扩展的维度 + // int last_expand_dim = -1; + // for (int i = input.shape.dim - 1; i >= 0; --i) + // { + // if (input.shape[i] != output.shape.shape[i]) + // { + // last_expand_dim = i; + // break; + // } + // } + + // // 如果最后几个维度不需要扩展,可以连续复制 + // if (last_expand_dim < output.shape.dim - 1) + // { + // int copy_len = output.shape.strides[last_expand_dim + 1]; + // output.shape.rangeParallel(last_expand_dim + 1, [&bm, &output, &input, copy_len](int idx_linear, const std::vector &indices, std::vector &oldIndices) + // { + // fromBroadcastIndices(bm, indices, oldIndices); + // int idx_old = input.shape.linearat(oldIndices); + // std::copy(input.data + idx_old, + // input.data + idx_old + copy_len, + // output.data + idx_linear); }, input.shape.dim); + // } + // else + // { + // output.shape.rangeParallel(output.shape.dim, [&bm, &output, &input](int idx_linear, const std::vector &indices, std::vector &oldIndices) + // { + // fromBroadcastIndices(bm, indices, oldIndices); + // int idx_old = input.shape.linearat(oldIndices); + // output.data[idx_linear] = input.data[idx_old]; }, input.shape.dim); + // } + // } } #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp index 28fc4caf..e66dd538 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp @@ -156,7 +156,7 @@ namespace deepx::tf { vector tensor_names = this->getvector(0, true); Precision input_type = mem->gettensor(tensor_names[0]).get()->shape.dtype; - int axis = this->getvar(1, mem, false); + int axis = this->getvar(1, mem, true); switch (input_type) { case Precision::Float64: @@ -234,6 +234,64 @@ namespace deepx::tf }; }; + template + class BroadcastTo : public TF + { + public: + BroadcastTo(const vector &args, const vector &returns) + { + this->name = "broadcastTo"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + + string math_formula() const override + { + return "T2 = T1.broadcastTo(new_shape=[4,3,2])"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + vector new_shape = this->getvector(1, true); + Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (input_type != output_type) + { + error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); + return 1; + } + switch (input_type) + { + case Precision::Float64: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + broadcastTo(*mem->gettensor(this->args[0].textvalue), new_shape, *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported type: " + precision_str(input_type); + return 1; + } + return 0; + } + }; + // class Split : public TF // { // public: @@ -269,67 +327,7 @@ namespace deepx::tf // } // }; - // template - // class Transpose : public Op - // { - // public: - // Transpose() - // { - // this->init("transpose", "any", {}, {}, false, {}, {}); - // } - // Transpose(vector args, vector returns, bool require_grad = false, vector args_grad = {}, vector returns_grad = {}) - // { - // this->init("transpose", "any", args, returns, require_grad, args_grad, returns_grad); - // } - // Transpose(initializer_list args, initializer_list returns, bool require_grad = false, initializer_list args_grad = {}, initializer_list returns_grad = {}) - // { - // this->init("transpose", "any", args, returns, require_grad, args_grad, returns_grad); - // } - // void forward(mem::Mem &mem) override - // { - // auto input = mem.gettensor(this->args[0]).get(); - // vector dimOrder; - // if (this->args.size() == 2 && !is_integer(this->args[1])) - // { - // dimOrder = mem.getvector(this->args[1]); - // } - // else if (this->args.size() > 2) - // { - // for (int i = 1; i < this->args.size(); i++) - // { - // dimOrder.push_back(atoi(this->args[i].c_str())); - // } - // } - // auto output = mem.gettensor(this->returns[0]).get(); - // tensorfunc::transpose(*input, *output, dimOrder); - // } - // void backward(mem::Mem &mem) override - // { - // auto input_grad = mem.gettensor(this->args_grad[0]).get(); - // vector dimOrder; - // if (this->args.size() == 2 && !is_integer(this->args[1])) - // { - // dimOrder = mem.getvector(this->args[1]); - // } - // else if (this->args.size() > 2) - // { - // for (int i = 1; i < this->args.size(); i++) - // { - // dimOrder.push_back(atoi(this->args[i].c_str())); - // } - // } - // auto output_grad = mem.gettensor(this->returns_grad[0]).get(); - // tensorfunc::transpose(*output_grad, *input_grad, dimOrder); - // } - // void funcdef() override - // { - // this->init("transpose", "float32", {"T1", "1", "0"}, {"T2"}, false, {}, {}); - // } - // string math_formula() const override - // { - // return "T2 = transpose(T1, dimorder=[1,0])"; - // } - // }; + // template // class Expand : public Op diff --git a/front/py/deepx/nn/functional/__init__.py b/front/py/deepx/nn/functional/__init__.py index c5fc2a8c..46dac20d 100644 --- a/front/py/deepx/nn/functional/__init__.py +++ b/front/py/deepx/nn/functional/__init__.py @@ -13,7 +13,7 @@ "add","sub","mul","div","sqrt","pow","exp","log","rsqrt", "matmul", "max","min","sum","prod","mean", - "reshape","permute","transpose", + "reshape","permute","transpose","concat","broadcast_to", "relu","sigmoid","swish", ] \ No newline at end of file diff --git a/front/py/deepx/nn/functional/changeshape.py b/front/py/deepx/nn/functional/changeshape.py index 294c1dc1..bf4b7f53 100644 --- a/front/py/deepx/nn/functional/changeshape.py +++ b/front/py/deepx/nn/functional/changeshape.py @@ -1,5 +1,5 @@ from typing import Union,Tuple -from deepx.tensor import Tensor +from deepx.tensor import Tensor,Shape from deepx.nn.deepxir import DeepxIR from deepx.scheduler import send from deepx.autograd import OpNode,Function,Context @@ -8,18 +8,14 @@ def _A_v_elementwiseop_C( a:Tensor, b: list[int] , op:str=None, - out:Union[Tensor,str]="",author='miaobyte')->Tensor: + out:Tensor=None, + author='miaobyte')->Tensor: g=a.graph opnode = g.add_op(op) opnode.add_input(a.node) opnode.add_input(g.add_vector("",b)) - outtensor=None - if isinstance(out,str): - outtensor=Tensor(shape=b, dtype=a.dtype, device=a.device) - outtensor.addtograph(out) - else: - outtensor=out + outtensor=out outtensor.node.add_input(opnode) if g.eager: ir=DeepxIR(op, [a.node.name,b], [outtensor.node.name],author) @@ -30,9 +26,16 @@ def _A_v_elementwiseop_C( class Reshape(Function): @staticmethod def forward(ctx:Context,t:Tensor,shape:list[int],out,author='miaobyte'): - ctx.save_data('oldshape',t.shape) - ctx.save_tensors('t',t) - return _A_v_elementwiseop_C(t,shape,"reshape",out,author) + if ctx.requires_grad: + ctx.save_data('oldshape',t.shape) + ctx.save_tensors('t',t) + outtensor=out + if isinstance(out,str): + outshape=shape + outtensor=Tensor(shape=outshape, dtype=t.dtype, device=t.device) + outtensor.addtograph(out) + outtensor._shape=Shape(shape) + return _A_v_elementwiseop_C(t,shape,"reshape",outtensor,author) @staticmethod def backward(ctx:Context,out_grad): @@ -40,19 +43,28 @@ def backward(ctx:Context,out_grad): t=ctx.get_tensor('t') return _A_v_elementwiseop_C(out_grad,oldshape,"reshape",t.node.name,author) -def reshape(t:Tensor,shape:list[int],out:Union[Tensor,str]='')->Tensor: +def reshape(t:Tensor,shape:list[int],out:Union[Tensor,str]='',author='miaobyte',requires_grad:bool=False)->Tensor: if t.shape==shape: return t - return Reshape.apply(t,shape,out) + return Reshape.apply(t,shape,out,author,requires_grad=requires_grad) OpNode.register("transpose") class Permute(Function): @staticmethod - def forward(ctx:Context,t:Tensor,dimorder:list[int],out:Union[Tensor,str]='',author='miaobyte')->Tensor: - ctx.save_data('dimorder',dimorder) - ctx.save_tensor('t',t) - return _A_v_elementwiseop_C(t,dimorder,"transpose",out,author) + def forward(ctx:Context, + t:Tensor, + dimorder:list[int], + out:Union[Tensor,str]='', + author='miaobyte')->Tensor: + if ctx.requires_grad: + ctx.save_data('dimorder',dimorder) + outtensor=out + if isinstance(out,str): + outshape = [t.shape[dim] for dim in dimorder] + outtensor=Tensor(shape=outshape, dtype=t.dtype, device=t.device) + outtensor.addtograph(out) + return _A_v_elementwiseop_C(t,dimorder,"transpose",outtensor,author) @staticmethod def backward(ctx:Context,in_grad,out_grad,author='miaobyte'): @@ -62,51 +74,121 @@ def backward(ctx:Context,in_grad,out_grad,author='miaobyte'): inverse_dimorder[j] = i return _A_v_elementwiseop_C(out_grad,inverse_dimorder,"transpose",in_grad,author) -def permute(t:Tensor,dimorder:list[int],out:Union[Tensor,str]='')->Tensor: +def permute(t:Tensor, + dimorder:list[int], + out:Union[Tensor,str]='', + requires_grad:bool=False, + author='miaobyte')->Tensor: if t.dim!=len(dimorder): raise ValueError(f"shape参数不合法,当前输入维度数:{len(dimorder)},张量维度数:{t.dim}") dimorder = [d % t.ndim for d in dimorder] - return Permute.apply(t,dimorder,out) + return Permute.apply(t,dimorder,out,requires_grad=requires_grad) -def transpose(t: Tensor,out:Union[Tensor,str]='')->Tensor: +def transpose(t:Tensor,out:Union[Tensor,str]='',requires_grad:bool=False,author='miaobyte')->Tensor: dimorder = list(range(t.ndim)) dimorder[-1],dimorder[-2]=dimorder[-2],dimorder[-1] - return Permute.apply(t,dimorder,out) - + return Permute.apply(t,dimorder,out,author,requires_grad=requires_grad) -# def broadcast_shape(shape_a: tuple, shape_b: tuple) -> tuple: -# """计算两个形状的广播后形状""" -# # 获取形状的长度 -# len_a, len_b = len(shape_a), len(shape_b) + +OpNode.register("concat") +class Concat(Function): + @staticmethod + def forward(ctx:Context, + tensors:list[Tensor], + dim:int, + out:Union[Tensor,str]='', + author='miaobyte')->Tensor: + if ctx.requires_grad: + ctx.save_data('dim',dim) + outtensor=out + if isinstance(out,str): + outshape=list(tensors[0].shape) + outshape[dim]=sum(t.shape[dim] for t in tensors) + outtensor=Tensor(shape=outshape, dtype=tensors[0].dtype, device=tensors[0].device) + outtensor.addtograph(out) + + g=tensors[0].graph + opnode = g.add_op("concat") + for t in tensors: + opnode.add_input(t.node) + opnode.add_input(g.add_var("",dim)) + + outtensor.node.add_input(opnode) + if g.eager: + ir=DeepxIR("concat", [[t.node.name for t in tensors], dim], [outtensor.node.name],author) + send(ir) + return outtensor -# # 创建结果形状 -# result_shape = [] + @staticmethod + def backward(ctx:Context,out_grad,author='miaobyte'): + dim=ctx.get_data('dim') + return _A_v_elementwiseop_C(out_grad,dim,"concat",t.node.name,author) + +def concat(t:Tensor,dim:int,out:Union[Tensor,str]='',requires_grad:bool=False,author='miaobyte')->Tensor: + return Concat.apply(t,dim,out,author,requires_grad=requires_grad) + +def broadcast_shape(shape_a: tuple[int], shape_b: tuple[int]) -> tuple[int]: + """计算两个形状的广播后形状""" + # 获取形状的长度 + len_a, len_b = len(shape_a), len(shape_b) -# # 从右往左对齐并计算每个维度 -# for i in range(1, min(len_a, len_b) + 1): -# dim_a = shape_a[-i] -# dim_b = shape_b[-i] + # 创建结果形状 + result_shape = [] + + # 从右往左对齐并计算每个维度 + for i in range(1, min(len_a, len_b) + 1): + dim_a = shape_a[-i] + dim_b = shape_b[-i] -# if dim_a == 1 or dim_b == 1: -# # 广播规则:如果一个维度为1,取另一个维度的值 -# result_shape.insert(0, max(dim_a, dim_b)) -# elif dim_a == dim_b: -# # 维度相同,保持不变 -# result_shape.insert(0, dim_a) -# else: -# # 维度不同且都不为1,无法广播 -# raise ValueError(f"无法广播的形状:{shape_a} 和 {shape_b}") + if dim_a == 1 or dim_b == 1: + # 广播规则:如果一个维度为1,取另一个维度的值 + result_shape.insert(0, max(dim_a, dim_b)) + elif dim_a == dim_b: + # 维度相同,保持不变 + result_shape.insert(0, dim_a) + else: + # 维度不同且都不为1,无法广播 + raise ValueError(f"无法广播的形状:{shape_a} 和 {shape_b}") + + # 添加较长形状中多出的前导维度 + if len_a > len_b: + result_shape = list(shape_a[:len_a - len_b]) + result_shape + elif len_b > len_a: + result_shape = list(shape_b[:len_b - len_a]) + result_shape -# # 添加较长形状中多出的前导维度 -# if len_a > len_b: -# result_shape = list(shape_a[:len_a - len_b]) + result_shape -# elif len_b > len_a: -# result_shape = list(shape_b[:len_b - len_a]) + result_shape + return tuple(result_shape) + +OpNode.register("broadcastTo") +class BroadcastTo(Function): + @staticmethod + def forward(ctx:Context, + t:Tensor, + new_shape:tuple[int], + out:Union[Tensor,str]='',author='miaobyte')->Tensor: + bshape=broadcast_shape(t.shape,new_shape) + if bshape!=new_shape: + raise ValueError(f"广播失败:{t.shape} 无法广播为 {new_shape} ") + + if ctx.requires_grad: + ctx.save_data('new_shape',new_shape) + outtensor=out + if isinstance(out,str): + outshape=new_shape + outtensor=Tensor(shape=outshape, dtype=t.dtype, device=t.device) + outtensor.addtograph(out) + return _A_v_elementwiseop_C(t,new_shape,"broadcastTo",outtensor,author) -# return tuple(result_shape) + #todo: 反向传播 + @staticmethod + def backward(ctx:Context,out_grad,author='miaobyte'): + new_shape=ctx.get_data('new_shape') + return _A_v_elementwiseop_C(out_grad,new_shape,"broadcastTo",t.node.name,author) +def broadcast_to(t:Tensor,new_shape:tuple[int],out:Union[Tensor,str]='',requires_grad:bool=False,author='miaobyte')->Tensor: + return BroadcastTo.apply(t,new_shape,out,author,requires_grad=requires_grad) + # def unsqueeze(t:Tensor,dim:int)->Tensor: # # 确保dim是有效的 diff --git a/front/py/deepx/scheduler/client/udpconn.py b/front/py/deepx/scheduler/client/udpconn.py index 6a12c26a..a25b0963 100644 --- a/front/py/deepx/scheduler/client/udpconn.py +++ b/front/py/deepx/scheduler/client/udpconn.py @@ -3,7 +3,7 @@ import select class UDPConn: - def __init__(self, endpoint: str = "localhost:9090"): + def __init__(self, endpoint: str = "localhost:8080"): # 解析endpoint self._host, port_str = endpoint.split(':') self._port = int(port_str) diff --git a/front/py/deepx/tensor/changeshape.py b/front/py/deepx/tensor/changeshape.py index b20d6bb4..782e6fe7 100644 --- a/front/py/deepx/tensor/changeshape.py +++ b/front/py/deepx/tensor/changeshape.py @@ -14,33 +14,45 @@ def reshape_(self,*shape)->Tensor: return result @tensor_method -def transpose(self,*axes,out:Union[Tensor,str]=''): +def permute(self,*axes,out:Union[Tensor,str]=''): + from deepx.nn.functional import permute as permute_func + result=permute_func(self,axes,out) + return result + +@tensor_method +def permute_(self,*axes): + from deepx.nn.functional import permute as permute_func + permute_func(self,axes,self) + return self + +@tensor_method +def transpose(self,out:Union[Tensor,str]=''): from deepx.nn.functional import transpose as transpose_func - result=transpose_func(self,axes,False,out) + result=transpose_func(self,out) return result @tensor_method -def transpose_(self,*axes): +def transpose_(self): from deepx.nn.functional import transpose as transpose_func - transpose_func(self,axes,self) + transpose_func(self,self) return self +@tensor_method +def broadcastshape(self,other:Tensor)->tuple[int]: + from deepx.nn.functional import broadcastshape as broadcastshape_func + result=broadcastshape_func(self.shape,other.shape) + return result + +@tensor_method +def broadcast_to(self,shape:tuple[int],out:Union[Tensor,str]='')->Tensor: + from deepx.nn.functional import broadcast_to as broadcast_to_func + result=broadcast_to_func(self,shape,out) + return result + + # @tensor_method # def expand(self,shape:tuple)->Tensor: # from deepx.nn.functional import expand as expand_func # result=expand_func(self,shape,False) # return result - -# @tensor_method -# def broadcastshape(self,other:Tensor)->tuple[int]: -# from deepx.nn.functional import broadcastshape as broadcastshape_func -# result=broadcastshape_func(self.shape,other.shape) -# return result - -# @tensor_method -# def broadcast_to(self,shape:tuple,out:Union[Tensor,str]='')->Tensor: -# from deepx.nn.functional import broadcast_to as broadcast_to_func -# result=broadcast_to_func(self,shape,out) -# return result - diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index 9b586961..7c91ab88 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -59,8 +59,11 @@ def __del__(self): # shape @property - def shape(self): - return self._shape.shape + def shape(self,dim:int=None): + if dim is None: + return self._shape.shape + else: + return self._shape.shape[dim] @property def Shape(self): return self._shape diff --git a/front/py/examples/2_ir/4_changeshape_broadcast.dot b/front/py/examples/2_ir/4_changeshape_broadcast.dot index 6c895185..d58501d9 100644 --- a/front/py/examples/2_ir/4_changeshape_broadcast.dot +++ b/front/py/examples/2_ir/4_changeshape_broadcast.dot @@ -2,40 +2,34 @@ digraph { rankdir=TB node [shape=record] - 128096220888320 [label="a + 139260210680960 [label="a (4, 2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220884576 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 128096220884528 [label="var_1 -1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220884624 [label="b + 139258236846528 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139260210690752 [label="vector_1 +(4, 2, 3)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 139258235360000 [label="b (2, 1)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220884432 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 128096220884480 [label="var_2 -1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220884336 [label="tensor_3 -(1, 2, 1)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220884048 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 128096220884096 [label="vector_1 -[1, 2, 1]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220883952 [label="b.broadcasted + 139258235359616 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139258235359856 [label="vector_2 +(2, 1)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 139258235360048 [label="b.broadcasted (4, 2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220883664 [label=expand color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 128096220883712 [label="vector_2 + 139258235360096 [label=broadcastTo color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139258235360576 [label="vector_3 (4, 2, 3)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220883472 [label=add color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 128096220883376 [label="tensor_5 + 139258235360240 [label=add color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139258235360480 [label="tensor_4 (4, 2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 128096220884576 -> 128096220888320 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884528 -> 128096220884576 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884432 -> 128096220884624 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884480 -> 128096220884432 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884048 -> 128096220884336 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884624 -> 128096220884048 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884096 -> 128096220884048 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220883664 -> 128096220883952 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220884336 -> 128096220883664 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220883712 -> 128096220883664 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220888320 -> 128096220883472 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220883952 -> 128096220883472 [arrowsize=0.8 color=gray40 penwidth=1.2] - 128096220883472 -> 128096220883376 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258236846528 -> 139260210680960 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139260210680960 -> 139258236846528 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139260210690752 -> 139258236846528 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235359616 -> 139258235360000 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235360000 -> 139258235359616 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235359856 -> 139258235359616 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235360096 -> 139258235360048 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235360000 -> 139258235360096 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235360576 -> 139258235360096 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139260210680960 -> 139258235360240 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235360048 -> 139258235360240 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139258235360240 -> 139258235360480 [arrowsize=0.8 color=gray40 penwidth=1.2] } diff --git a/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg b/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg index a9e3c6b8..293e8a9a 100644 --- a/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg +++ b/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg @@ -4,181 +4,155 @@ - - + + %3 - - + + -128096220888320 - -a -(4, 2, 3) - - - -128096220883472 - -add - - - -128096220888320->128096220883472 - - +139260210680960 + +a +(4, 2, 3) - + -128096220884576 - -constant +139258236846528 + +reshape + + + +139260210680960->139258236846528 + + + + + +139258235360240 + +add - + + +139260210680960->139258235360240 + + + + -128096220884576->128096220888320 - - +139258236846528->139260210680960 + + - + -128096220884528 - -var_1 -1 +139260210690752 + +vector_1 +(4, 2, 3) - - -128096220884528->128096220884576 - - + + +139260210690752->139258236846528 + + - + -128096220884624 - -b -(2, 1) +139258235360000 + +b +(2, 1) - - -128096220884048 - -reshape + + +139258235359616 + +reshape - - -128096220884624->128096220884048 - - + + +139258235360000->139258235359616 + + - - -128096220884432 - -constant + + +139258235360096 + +broadcastTo - - -128096220884432->128096220884624 - - + + +139258235360000->139258235360096 + + - + + +139258235359616->139258235360000 + + + + -128096220884480 - -var_2 -1 +139258235359856 + +vector_2 +(2, 1) - - -128096220884480->128096220884432 - - + + +139258235359856->139258235359616 + + - + -128096220884336 - -tensor_3 -(1, 2, 1) +139258235360048 + +b.broadcasted +(4, 2, 3) - - -128096220883664 - -expand - - - -128096220884336->128096220883664 - - + + +139258235360048->139258235360240 + + - - -128096220884048->128096220884336 - - + + +139258235360096->139258235360048 + + - + -128096220884096 - -vector_1 -[1, 2, 1] +139258235360576 + +vector_3 +(4, 2, 3) - - -128096220884096->128096220884048 - - + + +139258235360576->139258235360096 + + - - -128096220883952 - -b.broadcasted -(4, 2, 3) + + +139258235360480 + +tensor_4 +(4, 2, 3) - + -128096220883952->128096220883472 - - - - - -128096220883664->128096220883952 - - - - - -128096220883712 - -vector_2 -(4, 2, 3) - - - -128096220883712->128096220883664 - - - - - -128096220883376 - -tensor_5 -(4, 2, 3) - - - -128096220883472->128096220883376 - - +139258235360240->139258235360480 + + diff --git a/front/py/examples/2_ir/4_changeshape_broadcast.py b/front/py/examples/2_ir/4_changeshape_broadcast.py index 42283682..523e4941 100644 --- a/front/py/examples/2_ir/4_changeshape_broadcast.py +++ b/front/py/examples/2_ir/4_changeshape_broadcast.py @@ -1,23 +1,27 @@ +#######====PYTORCH======######## + + +import torch +a=torch.arange(4*2*3).reshape(4,2,3) +b=torch.arange(2*1).reshape(2,1) +bb_torch = torch.broadcast_to(b, (4,2,3)) +print(bb_torch) +c_torch=a+bb_torch +print(c_torch) + ########====DEEPX====######## -from deepx import Tensor,ones,broadcast_to +from deepx import Tensor,arange,broadcast_to -a=ones( 4,2,3 ,name="a") -b=ones( 2,1 ,name='b') +a=arange(end=4*2*3 ,name="a").reshape_(4,2,3) +b=arange(end=2*1 ,name='b').reshape_(2,1) bb=b.broadcast_to( a.shape,out="b.broadcasted") print(bb) c=a+bb - print(c) + import os script_name = os.path.splitext(os.path.basename( os.path.abspath(__file__)))[0] # 获取不带后缀的脚本名 str=b.graph.to_dot() str.render(script_name+".dot", format='svg') - -########====pytorch====######## -import torch -a=torch.ones(4,2,3) -b=torch.ones(2,1) -bb=b.expand(4,2,3) -c=a+bb -print(c) + diff --git a/front/py/examples/2_ir/4_changeshape_concat.dot b/front/py/examples/2_ir/4_changeshape_concat.dot new file mode 100644 index 00000000..a78082a7 --- /dev/null +++ b/front/py/examples/2_ir/4_changeshape_concat.dot @@ -0,0 +1,36 @@ +// Computational Graph +digraph { + rankdir=TB + node [shape=record] + 127589636296416 [label="t1 +(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660992560 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 127587662470000 [label="var_1 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587662471056 [label="t2 +(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660992704 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 127587660992848 [label="var_2 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660992368 [label="t3 +(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660992992 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 127587660993232 [label="var_3 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660993184 [label="t +(3, 12, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660993616 [label=concat color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 127587660993568 [label="var_4 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 127587660992560 -> 127589636296416 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587662470000 -> 127587660992560 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660992704 -> 127587662471056 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660992848 -> 127587660992704 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660992992 -> 127587660992368 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660993232 -> 127587660992992 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660993616 -> 127587660993184 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127589636296416 -> 127587660993616 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587662471056 -> 127587660993616 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660992368 -> 127587660993616 [arrowsize=0.8 color=gray40 penwidth=1.2] + 127587660993568 -> 127587660993616 [arrowsize=0.8 color=gray40 penwidth=1.2] +} diff --git a/front/py/examples/2_ir/4_changeshape_concat.dot.svg b/front/py/examples/2_ir/4_changeshape_concat.dot.svg new file mode 100644 index 00000000..5388daf1 --- /dev/null +++ b/front/py/examples/2_ir/4_changeshape_concat.dot.svg @@ -0,0 +1,159 @@ + + + + + + +%3 + + + +127589636296416 + +t1 +(3, 4, 5) + + + +127587660993616 + +concat + + + +127589636296416->127587660993616 + + + + + +127587660992560 + +constant + + + +127587660992560->127589636296416 + + + + + +127587662470000 + +var_1 +1 + + + +127587662470000->127587660992560 + + + + + +127587662471056 + +t2 +(3, 4, 5) + + + +127587662471056->127587660993616 + + + + + +127587660992704 + +constant + + + +127587660992704->127587662471056 + + + + + +127587660992848 + +var_2 +1 + + + +127587660992848->127587660992704 + + + + + +127587660992368 + +t3 +(3, 4, 5) + + + +127587660992368->127587660993616 + + + + + +127587660992992 + +constant + + + +127587660992992->127587660992368 + + + + + +127587660993232 + +var_3 +1 + + + +127587660993232->127587660992992 + + + + + +127587660993184 + +t +(3, 12, 5) + + + +127587660993616->127587660993184 + + + + + +127587660993568 + +var_4 +1 + + + +127587660993568->127587660993616 + + + + + diff --git a/front/py/examples/2_ir/4_changeshape_concat.py b/front/py/examples/2_ir/4_changeshape_concat.py new file mode 100644 index 00000000..af068303 --- /dev/null +++ b/front/py/examples/2_ir/4_changeshape_concat.py @@ -0,0 +1,28 @@ +############-------PyTorch-------################ + +import torch +torch_t1 = torch.ones(3, 4,5, dtype=torch.float32) +torch_t2 = torch.ones(3, 4,5, dtype=torch.float32) +torch_t3 = torch.ones(3, 4,5, dtype=torch.float32) + +torch_t = torch.concat([torch_t1, torch_t2, torch_t3], dim=1) +print(torch_t) + + +############-------DEEPX-------################ + +from deepx import Tensor,zeros, ones, concat + +print() + +t1 = ones([3,4,5],dtype='float32',name='t1') +t2=ones([3,4,5],dtype='float32',name='t2') +t3=ones([3,4,5],dtype='float32',name='t3') + +t=concat([t1,t2,t3],dim=1,out='t') +print(t) + +import os +script_name = os.path.splitext(os.path.basename( os.path.abspath(__file__)))[0] # 获取不带后缀的脚本名 +str=t.graph.to_dot() +str.render(script_name+".dot", format='svg') \ No newline at end of file diff --git a/front/py/examples/2_ir/4_changeshape_reshape.dot b/front/py/examples/2_ir/4_changeshape_reshape.dot index 8db2ca57..9102e255 100644 --- a/front/py/examples/2_ir/4_changeshape_reshape.dot +++ b/front/py/examples/2_ir/4_changeshape_reshape.dot @@ -2,32 +2,32 @@ digraph { rankdir=TB node [shape=record] - 138483665701232 [label="t1 + 135050279734960 [label="t1 (3, 4)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406184944 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 138481407714656 [label="var_1 + 135048020341280 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 135048021923184 [label="var_1 1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406184992 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 138481407705776 [label="vector_1 + 135048020341328 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 135048021923520 [label="vector_1 (3, 2, 2)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406185280 [label="tensor_2 + 135048020341664 [label="tensor_2 (3, 2, 2)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406185328 [label="tensor_3 + 135048020341712 [label="tensor_3 (4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406185136 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 138481406185712 [label="var_2 + 135048020341472 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 135048020342096 [label="var_2 1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406185472 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 138481406185184 [label="vector_2 + 135048020341856 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 135048020341520 [label="vector_2 (20,)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 138481406184944 -> 138483665701232 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481407714656 -> 138481406184944 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138483665701232 -> 138481406184992 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481407705776 -> 138481406184992 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481406184992 -> 138481406185280 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481406185136 -> 138481406185328 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481406185472 -> 138481406185328 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481406185712 -> 138481406185136 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481406185328 -> 138481406185472 [arrowsize=0.8 color=gray40 penwidth=1.2] - 138481406185184 -> 138481406185472 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020341280 -> 135050279734960 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048021923184 -> 135048020341280 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135050279734960 -> 135048020341328 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048021923520 -> 135048020341328 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020341328 -> 135048020341664 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020341472 -> 135048020341712 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020341856 -> 135048020341712 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020342096 -> 135048020341472 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020341712 -> 135048020341856 [arrowsize=0.8 color=gray40 penwidth=1.2] + 135048020341520 -> 135048020341856 [arrowsize=0.8 color=gray40 penwidth=1.2] } diff --git a/front/py/examples/2_ir/4_changeshape_reshape.dot.svg b/front/py/examples/2_ir/4_changeshape_reshape.dot.svg index 4592b180..e5caae8f 100644 --- a/front/py/examples/2_ir/4_changeshape_reshape.dot.svg +++ b/front/py/examples/2_ir/4_changeshape_reshape.dot.svg @@ -9,136 +9,136 @@ %3 - + -138483665701232 +135050279734960 t1 (3, 4) - + -138481406184992 +135048020341328 reshape - + -138483665701232->138481406184992 +135050279734960->135048020341328 - + -138481406184944 +135048020341280 constant - + -138481406184944->138483665701232 +135048020341280->135050279734960 - + -138481407714656 +135048021923184 var_1 1 - + -138481407714656->138481406184944 +135048021923184->135048020341280 - + -138481406185280 +135048020341664 tensor_2 (3, 2, 2) - + -138481406184992->138481406185280 +135048020341328->135048020341664 - + -138481407705776 +135048021923520 vector_1 (3, 2, 2) - + -138481407705776->138481406184992 +135048021923520->135048020341328 - + -138481406185328 +135048020341712 tensor_3 (4, 5) - + -138481406185472 +135048020341856 reshape - + -138481406185328->138481406185472 +135048020341712->135048020341856 - + -138481406185136 +135048020341472 constant - + -138481406185136->138481406185328 +135048020341472->135048020341712 - + -138481406185712 +135048020342096 var_2 1 - + -138481406185712->138481406185136 +135048020342096->135048020341472 - + -138481406185472->138481406185328 +135048020341856->135048020341712 - + -138481406185184 +135048020341520 vector_2 (20,) - + -138481406185184->138481406185472 +135048020341520->135048020341856 diff --git a/front/py/examples/2_ir/4_changeshape_transpose.dot b/front/py/examples/2_ir/4_changeshape_transpose.dot new file mode 100644 index 00000000..676d92a1 --- /dev/null +++ b/front/py/examples/2_ir/4_changeshape_transpose.dot @@ -0,0 +1,35 @@ +// Computational Graph +digraph { + rankdir=TB + node [shape=record] + 129501590933168 [label="t1 +(3, 4)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615596016 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 129499617100368 [label="var_1 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615595968 [label="t2 +(4, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615596304 [label=transpose color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 129499615596256 [label="vector_1 +[1, 0]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499617100464 [label="t3 +(2, 3, 4)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615596496 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 129499615596160 [label="var_2 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615596832 [label="t4 +(2, 4, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615597072 [label=transpose color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 129499615597024 [label="vector_2 +[0, 2, 1]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 129499615596016 -> 129501590933168 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499617100368 -> 129499615596016 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499615596304 -> 129499615595968 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129501590933168 -> 129499615596304 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499615596256 -> 129499615596304 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499615596496 -> 129499617100464 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499615596160 -> 129499615596496 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499615597072 -> 129499615596832 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499617100464 -> 129499615597072 [arrowsize=0.8 color=gray40 penwidth=1.2] + 129499615597024 -> 129499615597072 [arrowsize=0.8 color=gray40 penwidth=1.2] +} diff --git a/front/py/examples/2_ir/4_changeshape_transpose.dot.svg b/front/py/examples/2_ir/4_changeshape_transpose.dot.svg new file mode 100644 index 00000000..4e152a70 --- /dev/null +++ b/front/py/examples/2_ir/4_changeshape_transpose.dot.svg @@ -0,0 +1,153 @@ + + + + + + +%3 + + + +129501590933168 + +t1 +(3, 4) + + + +129499615596304 + +transpose + + + +129501590933168->129499615596304 + + + + + +129499615596016 + +constant + + + +129499615596016->129501590933168 + + + + + +129499617100368 + +var_1 +1 + + + +129499617100368->129499615596016 + + + + + +129499615595968 + +t2 +(4, 3) + + + +129499615596304->129499615595968 + + + + + +129499615596256 + +vector_1 +[1, 0] + + + +129499615596256->129499615596304 + + + + + +129499617100464 + +t3 +(2, 3, 4) + + + +129499615597072 + +transpose + + + +129499617100464->129499615597072 + + + + + +129499615596496 + +constant + + + +129499615596496->129499617100464 + + + + + +129499615596160 + +var_2 +1 + + + +129499615596160->129499615596496 + + + + + +129499615596832 + +t4 +(2, 4, 3) + + + +129499615597072->129499615596832 + + + + + +129499615597024 + +vector_2 +[0, 2, 1] + + + +129499615597024->129499615597072 + + + + + diff --git a/front/py/examples/2_ir/4_changeshape_transpose.py b/front/py/examples/2_ir/4_changeshape_transpose.py index 2b85e971..e2ee5f96 100644 --- a/front/py/examples/2_ir/4_changeshape_transpose.py +++ b/front/py/examples/2_ir/4_changeshape_transpose.py @@ -18,11 +18,11 @@ t1 = ones([3,4],dtype='float32',name='t1') print(t1) -t2=t1.transpose(0,1) +t2=t1.transpose(out='t2') print(t2) t3=ones([2,3,4],dtype='float32',name='t3') -t4=t3.transpose(1,2) +t4=t3.transpose(out='t4') print(t4) import os