From b7f038aff5fa84233dfcb8d9c3d1081c89548d25 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 22 Apr 2025 17:45:58 +0800 Subject: [PATCH 1/5] =?UTF-8?q?benchmark.IR:=E6=94=AF=E6=8C=81=E5=AF=B9?= =?UTF-8?q?=E7=AE=97=E5=AD=90=E8=BF=9B=E8=A1=8Cbenchmark?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/excuter/op-mem-ompsimd/list.md | 2 +- excuter/cpp-common/src/client/udpserver.cpp | 2 +- excuter/cpp-common/src/deepx/tf/tf.cpp | 144 +++++++++++------- excuter/cpp-common/src/deepx/tf/tf.hpp | 35 +++-- excuter/cpp-common/src/deepx/tf/tffactory.cpp | 19 ++- excuter/cpp-common/src/deepx/tf/tffactory.hpp | 6 +- excuter/op-mem-cuda/src/client/main.cpp | 4 +- excuter/op-mem-cuda/src/deepx/tf/arg.hpp | 2 - .../op-mem-cuda/src/deepx/tf/changeshape.hpp | 10 +- .../src/deepx/tf/elementwise_basic.hpp | 20 +-- .../src/deepx/tf/elementwise_compare.hpp | 22 +-- .../src/deepx/tf/elementwise_sin.hpp | 6 +- .../src/deepx/tf/elementwise_sqrt.hpp | 12 +- excuter/op-mem-cuda/src/deepx/tf/init.hpp | 8 +- excuter/op-mem-cuda/src/deepx/tf/io.hpp | 2 +- excuter/op-mem-cuda/src/deepx/tf/matmul.hpp | 2 +- excuter/op-mem-cuda/src/deepx/tf/reduce.hpp | 8 +- excuter/op-mem-ompsimd/src/client/main.cpp | 4 +- .../src/deepx/tf/changeshape.hpp | 10 +- .../src/deepx/tf/elementwise.hpp | 60 ++++---- excuter/op-mem-ompsimd/src/deepx/tf/init.hpp | 8 +- excuter/op-mem-ompsimd/src/deepx/tf/io.hpp | 2 +- .../op-mem-ompsimd/src/deepx/tf/matmul.hpp | 37 +++-- .../op-mem-ompsimd/src/deepx/tf/reduce.hpp | 8 +- front/py/deepx/nn/deepxir.py | 56 +++++-- .../py/deepx/nn/functional/leaffunc_matmul.py | 4 +- front/py/deepx/nn/functional/rtf_matmul.py | 4 +- front/py/deepx/scheduler/client/udpconn.py | 2 +- front/py/examples/2_ir/3_matmul.py | 45 ++++-- 29 files changed, 333 insertions(+), 211 deletions(-) diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 66a03786..8ca0e1d6 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -25,7 +25,7 @@ | save | none | save(tensor t, var path)->() | save(T1,path) | save(tensor t, var path)->() | | print | miaobyte | print(tensor t)->() | print(T1) | print(tensor t)->() | | print | miaobyte | print(tensor t, var format)->() | print(T1) | print(tensor t, var format)->() | -| load | none | load(var path)->() | load(path) | load(var path)->() | +| load | none | load(var path)->() | mem.load(path) | load(var path)->() | ### init diff --git a/excuter/cpp-common/src/client/udpserver.cpp b/excuter/cpp-common/src/client/udpserver.cpp index 664083be..44759d42 100644 --- a/excuter/cpp-common/src/client/udpserver.cpp +++ b/excuter/cpp-common/src/client/udpserver.cpp @@ -51,7 +51,7 @@ namespace client while (getline(ss, line)) { if (!line.empty()) { deepx::tf::TF tf; - tf.recv_at = chrono::system_clock::now(); + tf.metadata.recv_at = chrono::system_clock::now(); tf.parse(line); queue.push(tf); } diff --git a/excuter/cpp-common/src/deepx/tf/tf.cpp b/excuter/cpp-common/src/deepx/tf/tf.cpp index d8bce199..b6ffa66f 100644 --- a/excuter/cpp-common/src/deepx/tf/tf.cpp +++ b/excuter/cpp-common/src/deepx/tf/tf.cpp @@ -1,17 +1,17 @@ #include #include #include +#include #include "deepx/tf/tf.hpp" #include "stdutil/time.hpp" #include "stdutil/string.hpp" namespace deepx::tf -{ - +{ void Param::parse(const string ¶m) { - //1. 按:分割类型和值 + // 1. 按:分割类型和值 size_t colon_pos = param.find(':'); string type, textvalue; if (colon_pos != string::npos) @@ -36,8 +36,88 @@ namespace deepx::tf this->dtype = deepx::dtype(textvalue); this->textvalue = textvalue; } - - } + } + + string TFMetadata::to_string() const + { + stringstream ss; + if (!author.empty()) + { + ss << "author=" << author << " "; + } + if (id > 0) + { + ss << "id=" << id << " "; + } + if (created_at != system_clock::time_point::min()) + { + ss << "created_at=" << duration_cast(created_at.time_since_epoch()).count() << " "; + } + if (sent_at != system_clock::time_point::min()) + { + ss << "sent_at=" << duration_cast(sent_at.time_since_epoch()).count() << " "; + } + if (recv_at != system_clock::time_point::min()) + { + ss << "recv_at=" << duration_cast(recv_at.time_since_epoch()).count() << " "; + } + if (benchmark.repeat > 0) + { + ss << "benchmark.repeat=" << benchmark.repeat << " "; + } + return ss.str(); + } + + std::unordered_map parse_metadata_map(const string &meta) + { + std::unordered_map metadata; + stringstream meta_ss(meta); + string key_value; + while (meta_ss >> key_value) + { + size_t eq_pos = key_value.find('='); + if (eq_pos == string::npos) + continue; + string key = key_value.substr(0, eq_pos); + string value = key_value.substr(eq_pos + 1); + metadata[key] = value; + } + return metadata; + } + + // 解析元数据 + void TFMetadata::parse(const string &meta) + { + if (meta.empty()) + return; + + auto metadata_map = parse_metadata_map(meta); + if (metadata_map.find("id") != metadata_map.end()) + { + id = stoi(metadata_map["id"]); + } + if (metadata_map.find("author") != metadata_map.end()) + { + author = metadata_map["author"]; + } + if (metadata_map.find("created_at") != metadata_map.end()) + { + created_at = system_clock::from_time_t(stod(metadata_map["created_at"])); + } + if (metadata_map.find("sent_at") != metadata_map.end()) + { + sent_at = system_clock::from_time_t(stod(metadata_map["sent_at"])); + } + if (metadata_map.find("recv_at") != metadata_map.end()) + { + recv_at = system_clock::from_time_t(stod(metadata_map["recv_at"])); + } + if (metadata_map.find("benchmark.repeat") != metadata_map.end()) + { + benchmark.repeat = stoi(metadata_map["benchmark.repeat"]); + } + } + // 分割主体和元数据 std::pair split_body_metadata(const string &input) { @@ -186,7 +266,6 @@ namespace deepx::tf return value_str; // 默认作为字符串处理 } - // 解析参数列表 vector parse_params(const string ¶ms_str) { @@ -211,51 +290,7 @@ namespace deepx::tf return params; } - // 解析元数据键值对 - void parse_metadata_pair(const string &key_value, int &id, string &author, - system_clock::time_point &created_at, - system_clock::time_point &sent_at) - { - size_t eq_pos = key_value.find('='); - if (eq_pos == string::npos) - return; - - string key = key_value.substr(0, eq_pos); - string value = key_value.substr(eq_pos + 1); - - if (key == "id") - { - id = stoi(value); - } - else if (key == "author") - { - author = value; - } - else if (key == "created_at") - { - created_at = system_clock::from_time_t(stod(value)); - } - else if (key == "sent_at") - { - sent_at = system_clock::from_time_t(stod(value)); - } - } - - // 解析元数据 - void parse_metadata(const string &meta, int &id, string &author, - system_clock::time_point &created_at, - system_clock::time_point &sent_at) - { - if (meta.empty()) - return; - - stringstream meta_ss(meta); - string key_value; - while (meta_ss >> key_value) - { - parse_metadata_pair(key_value, id, author, created_at, sent_at); - } - } + // 主解析函数 void TF::parse(const string &input) @@ -274,7 +309,7 @@ namespace deepx::tf returns = parse_params(output_part); // 5. 解析元数据 - parse_metadata(meta, id, author, created_at, sent_at); + metadata.parse(meta); } void TF::init(const string &opname, @@ -282,7 +317,6 @@ namespace deepx::tf const vector &returns) { this->name = opname; - this->author = ""; this->args = args; this->returns = returns; } @@ -330,9 +364,7 @@ namespace deepx::tf if (show_extra) { - ss << " //id=" << id - << " created_at=" << stdutil::format_time(created_at) - << " sent_at=" << stdutil::format_time(sent_at); + ss << " //" << metadata.to_string(); } return ss.str(); diff --git a/excuter/cpp-common/src/deepx/tf/tf.hpp b/excuter/cpp-common/src/deepx/tf/tf.hpp index e123f10c..d08b85b5 100644 --- a/excuter/cpp-common/src/deepx/tf/tf.hpp +++ b/excuter/cpp-common/src/deepx/tf/tf.hpp @@ -26,27 +26,40 @@ namespace deepx::tf { TypeDef dtype; string textvalue; - + Param(const string &textvalue = "", const DataCategory &dt = DataCategory::Unknown, const Precision &prec = Precision::Any) : textvalue(textvalue), dtype(make_dtype(dt, prec)) {} - + void parse(const string ¶m); }; + // 元数据 + struct Benchmark + { + int repeat = 0; + }; + struct TFMetadata + { + string author; + int id; + system_clock::time_point created_at; + system_clock::time_point sent_at; + system_clock::time_point recv_at; + Benchmark benchmark; + string to_string() const; + void parse(const string &str); + }; // TF:Tensor Function的缩写 class TF { public: string name; - string author; - string tftype; + + string tftype; vector args; vector returns; - // - int id; - system_clock::time_point created_at; - system_clock::time_point sent_at; - system_clock::time_point recv_at; + // metadata + TFMetadata metadata; public: TF() = default; @@ -128,8 +141,8 @@ namespace deepx::tf } vector result; - string textvalue =vars[idx].textvalue; - stdutil::trim(textvalue,"[]"); + string textvalue = vars[idx].textvalue; + stdutil::trim(textvalue, "[]"); if (textvalue.empty()) { throw std::invalid_argument("Invalid argument index"); diff --git a/excuter/cpp-common/src/deepx/tf/tffactory.cpp b/excuter/cpp-common/src/deepx/tf/tffactory.cpp index 1936acfc..48dd52bb 100644 --- a/excuter/cpp-common/src/deepx/tf/tffactory.cpp +++ b/excuter/cpp-common/src/deepx/tf/tffactory.cpp @@ -22,11 +22,18 @@ namespace deepx::tf } // 检查作者是否存在 - auto author_it = family_it->second->tf_authors.find(other.author); + auto author_it = family_it->second->tf_authors.find(other.metadata.author); if (author_it == family_it->second->tf_authors.end()) { - cerr << " " << other.name << " author:" << other.author << " not found" << endl; - return nullptr; + cerr << " " << other.name << " author:" << other.metadata.author << " not found" << endl; + //使用第一个作者 + author_it = family_it->second->tf_authors.begin(); + cerr << " " << other.name << " use first author:" << author_it->first << endl; + if (author_it == family_it->second->tf_authors.end()) + { + cerr << " " << other.name << " default author:" << author_it->first << " not found" << endl; + return nullptr; + } } // 提取参数和返回值类型 @@ -71,7 +78,9 @@ namespace deepx::tf } // 使用clone()方法创建新实例,而不是直接复制构造 - return tf->clone(); + auto cloned = tf->clone(); + cloned->metadata=other.metadata; + return cloned; } string TfFactory::print_markdown(string excuter_name) const { @@ -99,7 +108,7 @@ namespace deepx::tf for (const auto &tf : tfs) { ss << "| " << tf->name << " | "; - ss << (tf->author.empty() ? " none " : tf->author) << " | "; + ss << (tf->metadata.author.empty() ? " none " : tf->metadata.author) << " | "; ss << tf->to_string(false, true) << " | "; ss << tf->math_formula() << " | "; ss << tf->to_string(false, true) << " |\n"; diff --git a/excuter/cpp-common/src/deepx/tf/tffactory.hpp b/excuter/cpp-common/src/deepx/tf/tffactory.hpp index 3262ffaf..ebe836c0 100644 --- a/excuter/cpp-common/src/deepx/tf/tffactory.hpp +++ b/excuter/cpp-common/src/deepx/tf/tffactory.hpp @@ -85,14 +85,14 @@ namespace deepx::tf } // 检查是否存在该作者的实现,不存在则创建 - if (tf_families[tf->name]->tf_authors.find(tf->author) == + if (tf_families[tf->name]->tf_authors.find(tf->metadata.author) == tf_families[tf->name]->tf_authors.end()) { - tf_families[tf->name]->tf_authors[tf->author] = std::make_shared(); + tf_families[tf->name]->tf_authors[tf->metadata.author] = std::make_shared(); } // 直接添加到vector中 - tf_families[tf->name]->tf_authors[tf->author]->tfs.push_back(tf); + tf_families[tf->name]->tf_authors[tf->metadata.author]->tfs.push_back(tf); } shared_ptr get_tf(const TF &other) const; // 输出为markdown表格格式 diff --git a/excuter/op-mem-cuda/src/client/main.cpp b/excuter/op-mem-cuda/src/client/main.cpp index 8c00a50a..b3cd40d6 100644 --- a/excuter/op-mem-cuda/src/client/main.cpp +++ b/excuter/op-mem-cuda/src/client/main.cpp @@ -60,8 +60,8 @@ int main() } deepx::tf::OpResp opresp; - opresp.id = op.id; - opresp.recv_at = op.recv_at; + opresp.id = op.metadata.id; + opresp.recv_at = op.metadata.recv_at; auto src = tf_factory.get_tf(op); if (src == nullptr) diff --git a/excuter/op-mem-cuda/src/deepx/tf/arg.hpp b/excuter/op-mem-cuda/src/deepx/tf/arg.hpp index 5c8fa93b..dcc845fd 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/arg.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/arg.hpp @@ -14,7 +14,6 @@ namespace deepx::tf ArgSet(vector args, vector returns) { this->name = "argset"; - this->author = ""; this->tftype = "arg"; this->args = args; this->returns = returns; @@ -76,7 +75,6 @@ namespace deepx::tf VecSet(vector args, vector returns) { this->name = "vecset"; - this->author = ""; this->tftype = "arg"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp index 6320b7d4..d869a435 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp @@ -20,7 +20,7 @@ namespace deepx::tf Reshape(const vector &args, const vector &returns) { this->name = "reshape"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -82,7 +82,7 @@ namespace deepx::tf Transpose(const vector &args, const vector &returns) { this->name = "transpose"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -151,7 +151,7 @@ namespace deepx::tf Concat(const vector &args, const vector &returns) { this->name = "concat"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -286,7 +286,7 @@ namespace deepx::tf BroadcastTo(const vector &args, const vector &returns) { this->name = "broadcastTo"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -352,7 +352,7 @@ namespace deepx::tf Gather(const vector &args, const vector &returns) { this->name = "gather"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp index d18eeb88..97e4b17e 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp @@ -16,7 +16,7 @@ namespace deepx::tf Add(const vector &args, const vector &returns) { this->name = "add"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -85,7 +85,7 @@ namespace deepx::tf AddScalar(const vector &args, const vector &returns) { this->name = "addscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -153,7 +153,7 @@ namespace deepx::tf Sub(const vector &args, const vector &returns) { this->name = "sub"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -222,7 +222,7 @@ namespace deepx::tf SubScalar(const vector &args, const vector &returns) { this->name = "subscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -290,7 +290,7 @@ namespace deepx::tf Mul(const vector &args, const vector &returns) { this->name = "mul"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -359,7 +359,7 @@ namespace deepx::tf MulScalar(const vector &args, const vector &returns) { this->name = "mulscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -427,7 +427,7 @@ namespace deepx::tf Div(const vector &args, const vector &returns) { this->name = "div"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -496,7 +496,7 @@ namespace deepx::tf DivScalar(const vector &args, const vector &returns) { this->name = "divscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -564,7 +564,7 @@ namespace deepx::tf RDivScalar(const vector &args, const vector &returns) { this->name = "rdivscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -633,7 +633,7 @@ namespace deepx::tf Invert(const vector &args, const vector &returns) { this->name = "invert"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp index 0ee58de8..fe3734a1 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp @@ -15,7 +15,7 @@ namespace deepx::tf Max(const vector &args, const vector &returns) { this->name = "max"; - this->author = Author::name(); + this->metadata.author =Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -80,7 +80,7 @@ namespace deepx::tf MaxScalar(const vector &args, const vector &returns) { this->name = "maxscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -145,7 +145,7 @@ namespace deepx::tf Min(const vector &args, const vector &returns) { this->name = "min"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -211,7 +211,7 @@ namespace deepx::tf MinScalar(const vector &args, const vector &returns) { this->name = "minscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -276,7 +276,7 @@ namespace deepx::tf Equal(const vector &args, const vector &returns) { this->name = "equal"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -343,7 +343,7 @@ namespace deepx::tf EqualScalar(const vector &args, const vector &returns) { this->name = "equalscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -410,7 +410,7 @@ namespace deepx::tf Less(const vector &args, const vector &returns) { this->name = "less"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -477,7 +477,7 @@ namespace deepx::tf LessScalar(const vector &args, const vector &returns) { this->name = "lessscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -543,7 +543,7 @@ namespace deepx::tf Greater(const vector &args, const vector &returns) { this->name = "greater"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -610,7 +610,7 @@ namespace deepx::tf GreaterScalar(const vector &args, const vector &returns) { this->name = "greaterscalar"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -676,7 +676,7 @@ namespace deepx::tf Switch(const vector &args, const vector &returns) { this->name = "switch"; - this->author = Author::name(); + this->metadata.author=Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp index d5eae4e2..ec35e1f4 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp @@ -15,7 +15,7 @@ namespace deepx::tf Sin(const vector &args, const vector &returns) { this->name = "sin"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -68,7 +68,7 @@ namespace deepx::tf Cos(const vector &args, const vector &returns) { this->name = "cos"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -123,7 +123,7 @@ namespace deepx::tf Tan(const vector &args, const vector &returns) { this->name = "tan"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp index d13b22f6..9f514264 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp @@ -15,7 +15,7 @@ namespace deepx::tf Pow(const vector &args, const vector &returns) { this->name = "pow"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -70,7 +70,7 @@ namespace deepx::tf PowScalar(const vector &args, const vector &returns) { this->name = "powscalar"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -123,7 +123,7 @@ namespace deepx::tf RpowScalar(const vector &args, const vector &returns) { this->name = "rpowscalar"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -174,7 +174,7 @@ namespace deepx::tf Sqrt(const vector &args, const vector &returns) { this->name = "sqrt"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -232,7 +232,7 @@ namespace deepx::tf Log(const vector &args, const vector &returns) { this->name = "log"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -289,7 +289,7 @@ namespace deepx::tf Exp(const vector &args, const vector &returns) { this->name = "exp"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/init.hpp b/excuter/op-mem-cuda/src/deepx/tf/init.hpp index 480bf18c..839c83bf 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/init.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/init.hpp @@ -18,7 +18,7 @@ namespace deepx::tf Constant(const vector &args, const vector &returns) { this->name = "constant"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -98,7 +98,7 @@ namespace deepx::tf Arange(const vector &args, const vector &returns) { this->name = "arange"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -183,7 +183,7 @@ namespace deepx::tf Uniform(const vector &args, const vector &returns) { this->name = "uniform"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -269,7 +269,7 @@ namespace deepx::tf Normal(const vector &args, const vector &returns) { this->name = "normal"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index b5a5f4d1..8049fc81 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -15,7 +15,7 @@ namespace deepx::tf Print(vector args, vector returns) { this->name = "print"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "io"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp b/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp index a61b5c04..3b18f939 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp @@ -18,7 +18,7 @@ namespace deepx::tf MatMul(const vector &args, const vector &returns) { this->name = "matmul"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "matmul"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp b/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp index a23319d7..2bfd36a2 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp @@ -16,7 +16,7 @@ namespace deepx::tf Sum(const vector &args, const vector &returns) { this->name = "sum"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; @@ -82,7 +82,7 @@ namespace deepx::tf Prod(const vector &args, const vector &returns) { this->name = "prod"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; @@ -141,7 +141,7 @@ namespace deepx::tf ReduceMax(const vector &args, const vector &returns) { this->name = "reducemax"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; @@ -200,7 +200,7 @@ namespace deepx::tf ReduceMin(const vector &args, const vector &returns) { this->name = "reducemin"; - this->author = Author::name(); + this->metadata.author= Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-ompsimd/src/client/main.cpp b/excuter/op-mem-ompsimd/src/client/main.cpp index 7f0378d9..96ca39bb 100644 --- a/excuter/op-mem-ompsimd/src/client/main.cpp +++ b/excuter/op-mem-ompsimd/src/client/main.cpp @@ -59,8 +59,8 @@ int main() } deepx::tf::OpResp opresp; - opresp.id = op.id; - opresp.recv_at = op.recv_at; + opresp.id = op.metadata.id; + opresp.recv_at = op.metadata.recv_at; auto src = tf_factory.get_tf(op); if (src == nullptr) diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp index fd4f0e07..8afa8e14 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp @@ -19,7 +19,7 @@ namespace deepx::tf Reshape(const vector &args, const vector &returns) { this->name = "reshape"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->args = args; this->returns = returns; this->tftype = "changeshape"; @@ -86,7 +86,7 @@ namespace deepx::tf Transpose(const vector &args, const vector &returns) { this->name = "transpose"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -153,7 +153,7 @@ namespace deepx::tf Concat(const vector &args, const vector &returns) { this->name = "concat"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -266,7 +266,7 @@ namespace deepx::tf BroadcastTo(const vector &args, const vector &returns) { this->name = "broadcastTo"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; @@ -330,7 +330,7 @@ namespace deepx::tf Gather(const vector &args, const vector &returns) { this->name = "gather"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp index e1914688..1f754006 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp @@ -18,7 +18,7 @@ namespace deepx::tf Add(vector args, vector returns) { this->name = "add"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -80,7 +80,7 @@ namespace deepx::tf AddScalar(vector args, vector returns) { this->name = "addscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -140,7 +140,7 @@ namespace deepx::tf Sub(vector args, vector returns) { this->name = "sub"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -202,7 +202,7 @@ namespace deepx::tf SubScalar(vector args, vector returns) { this->name = "subscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -263,7 +263,7 @@ namespace deepx::tf Mul(vector args, vector returns) { this->name = "mul"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -325,7 +325,7 @@ namespace deepx::tf MulScalar(vector args, vector returns) { this->name = "mulscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -386,7 +386,7 @@ namespace deepx::tf Div(vector args, vector returns) { this->name = "div"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -448,7 +448,7 @@ namespace deepx::tf DivScalar(vector args, vector returns) { this->name = "divscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -505,7 +505,7 @@ namespace deepx::tf RDivScalar(vector args, vector returns) { this->name = "rdivscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -563,7 +563,7 @@ namespace deepx::tf Invert(vector args, vector returns) { this->name = "invert"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -618,7 +618,7 @@ namespace deepx::tf Sqrt(vector args, vector returns) { this->name = "sqrt"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -663,7 +663,7 @@ namespace deepx::tf Pow(vector args, vector returns) { this->name = "pow"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -709,7 +709,7 @@ namespace deepx::tf PowScalar(vector args, vector returns) { this->name = "powscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -755,7 +755,7 @@ namespace deepx::tf RpowScalar(vector args, vector returns) { this->name = "rpowscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -800,7 +800,7 @@ namespace deepx::tf Log(vector args, vector returns) { this->name = "log"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -845,7 +845,7 @@ namespace deepx::tf Exp(vector args, vector returns) { this->name = "exp"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -890,7 +890,7 @@ namespace deepx::tf Sin(vector args, vector returns) { this->name = "sin"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -935,7 +935,7 @@ namespace deepx::tf Cos(vector args, vector returns) { this->name = "cos"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -980,7 +980,7 @@ namespace deepx::tf Tan(vector args, vector returns) { this->name = "tan"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1025,7 +1025,7 @@ namespace deepx::tf Max(vector args, vector returns) { this->name = "max"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1083,7 +1083,7 @@ namespace deepx::tf MaxScalar(vector args, vector returns) { this->name = "maxscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1140,7 +1140,7 @@ namespace deepx::tf Min(vector args, vector returns) { this->name = "min"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1198,7 +1198,7 @@ namespace deepx::tf MinScalar(vector args, vector returns) { this->name = "minscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1257,7 +1257,7 @@ namespace deepx::tf Equal(vector args, vector returns) { this->name = "equal"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1317,7 +1317,7 @@ namespace deepx::tf EqualScalar(vector args, vector returns) { this->name = "equalscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1376,7 +1376,7 @@ namespace deepx::tf Less(vector args, vector returns) { this->name = "less"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1435,7 +1435,7 @@ namespace deepx::tf LessScalar(vector args, vector returns) { this->name = "lessscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1493,7 +1493,7 @@ namespace deepx::tf Greater(vector args, vector returns) { this->name = "greater"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1552,7 +1552,7 @@ namespace deepx::tf GreaterScalar(vector args, vector returns) { this->name = "greaterscalar"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; @@ -1610,7 +1610,7 @@ namespace deepx::tf Switch(vector args, vector returns) { this->name = "switch"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "elementwise"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp index c28b569e..54410653 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp @@ -15,7 +15,7 @@ namespace deepx::tf Constant(const vector &args, const vector &returns) { this->name = "constant"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -96,7 +96,7 @@ namespace deepx::tf Arange(const vector &args, const vector &returns) { this->name = "arange"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -171,7 +171,7 @@ namespace deepx::tf Uniform(const vector &args, const vector &returns) { this->name = "uniform"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -246,7 +246,7 @@ namespace deepx::tf Normal(const vector &args, const vector &returns) { this->name = "normal"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index a6fb83dc..8c6fdc56 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -16,7 +16,7 @@ namespace deepx::tf { this->name = "print"; this->tftype = "io"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp index 89804a18..f5cafd18 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp @@ -16,7 +16,7 @@ namespace deepx::tf MatMul(const vector &args, const vector &returns) { this->name = "matmul"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "matmul"; this->args = args; this->returns = returns; @@ -30,16 +30,7 @@ namespace deepx::tf { return make_shared>(*this); } - int run(shared_ptr mem, string &error) override - { - Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; - Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; - if (a_type != b_type || a_type != c_type) - { - error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type); - return 1; - } + int compute(shared_ptr mem, Precision a_type,string &error){ switch (a_type) { case Precision::Float64: @@ -66,6 +57,30 @@ namespace deepx::tf } return 0; } + int run(shared_ptr mem, string &error) override + { + Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; + Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (a_type != b_type || a_type != c_type) + { + error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type); + return 1; + } + if (metadata.benchmark.repeat > 0) + { + for (int i = 0; i < metadata.benchmark.repeat; i++) + { + if (compute(mem, a_type, error)) + { + return 1; + } + } + }else{ + return compute(mem, a_type, error); + } + return 0; + } }; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp index f8b43e53..ff483da6 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp @@ -16,7 +16,7 @@ namespace deepx::tf Sum(const vector &args, const vector &returns) { this->name = "sum"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; @@ -76,7 +76,7 @@ namespace deepx::tf Prod(const vector &args, const vector &returns) { this->name = "prod"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; @@ -135,7 +135,7 @@ namespace deepx::tf ReduceMax(const vector &args, const vector &returns) { this->name = "reducemax"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; @@ -194,7 +194,7 @@ namespace deepx::tf ReduceMin(const vector &args, const vector &returns) { this->name = "reducemin"; - this->author = Author::name(); + this->metadata.author = Author::name(); this->tftype = "reduce"; this->args = args; this->returns = returns; diff --git a/front/py/deepx/nn/deepxir.py b/front/py/deepx/nn/deepxir.py index 33b7e037..b1f6880e 100644 --- a/front/py/deepx/nn/deepxir.py +++ b/front/py/deepx/nn/deepxir.py @@ -19,7 +19,8 @@ def __str__(self): return f"{self._category}:{self._textvalue}" else: return self._textvalue - + + @classmethod def tensorName(cls,name:str,dtype:str): return Param(name,category="tensor",precision=dtype) @@ -72,6 +73,43 @@ def listtensor(cls,value:tuple[Tensor]): # newtensor ( [3 4 5]) -> ( tensor_136144420556608) # // id=1 created_at=1744724799.0650852 sent_at=1744724799.0650952 +class Benchmark: + def __init__(self,repeat:int): + self._repeat=repeat + + def __str__(self): + return f"benchmark.repeat={self._repeat}" + +class Metadata: + def __init__(self,author:str=None,id:str=None,created_at:datetime=None,sent_at:datetime=None): + self._author=None + if author is not None and author != "": + self._author=author + + self._id=None + if id is not None and id != "": + self._id=id + self._created_at=created_at + self._sent_at=sent_at + self._benchmark=None + + def __str__(self): + parts =[] + if self._author is not None : + parts.append(f"author={self._author}") + if self._id is not None and self._id != "": + parts.append(f" id={self._id}") + if self._created_at is not None: + parts.append(f" created_at={self._created_at}") + if self._sent_at is not None: + parts.append(f" sent_at={self._sent_at}") + if self._benchmark is not None: + parts.append(f" {self._benchmark}") + return ' '.join(parts) + + def openbench(self,repeat:int): + self._benchmark=Benchmark(repeat) + class DeepxIR: def __init__(self, @@ -90,11 +128,8 @@ def __init__(self, self._name = name self._args = [arg if isinstance(arg, Param) else Param(arg) for arg in args] self._returns = [ret if isinstance(ret, Param) else Param(ret) for ret in returns] - self._author = author - self._id=None - self._created_at=time.time() - self._sent_at=None - + self._metadata=Metadata(author=author,id=None,created_at=time.time()) + def __str__(self): # 函数名部分 parts = [self._name] @@ -120,14 +155,7 @@ def __str__(self): # 添加元数据 parts.append("//") - if self._id is not None: - parts.append(f"id={self._id}") - if self._author: - parts.append(f"author={self._author}") - parts.append(f"created_at={self._created_at}") - if self._sent_at is not None: - parts.append(f"sent_at={self._sent_at}") - + parts.append(str(self._metadata)) return ' '.join(parts) class DeepxIRResp: diff --git a/front/py/deepx/nn/functional/leaffunc_matmul.py b/front/py/deepx/nn/functional/leaffunc_matmul.py index bb69b838..8cad3127 100644 --- a/front/py/deepx/nn/functional/leaffunc_matmul.py +++ b/front/py/deepx/nn/functional/leaffunc_matmul.py @@ -4,11 +4,11 @@ from .leaffunc_life import newtensor from .authormap import defaultauthor -def matmul(a:Tensor,b:Tensor,out:Union[Tensor,str]='')->Tensor: +def matmul(a:Tensor,b:Tensor,out:Union[Tensor,str]='',bench:tuple[int,int]=None)->Tensor: outtensor=out if isinstance(out,str): outshape=Shape.matmul(a.shape,b.shape) outtensor=newtensor(outshape,dtype=a.dtype,name=out) from .rtf_matmul import rtf_matmul - rtf_matmul(a,b,outtensor,defaultauthor['matmul']) + rtf_matmul(a,b,outtensor,defaultauthor['matmul'],bench) return outtensor diff --git a/front/py/deepx/nn/functional/rtf_matmul.py b/front/py/deepx/nn/functional/rtf_matmul.py index a6a3af5d..bfe23cee 100644 --- a/front/py/deepx/nn/functional/rtf_matmul.py +++ b/front/py/deepx/nn/functional/rtf_matmul.py @@ -3,9 +3,11 @@ from deepx.scheduler import send from .rtf import A_B_op_C -def rtf_matmul(a:Tensor,b:Tensor,out: Tensor ,author='cublas'): +def rtf_matmul(a:Tensor,b:Tensor,out: Tensor ,author='cublas',bench:int=None): args=[Param.tensor(a),Param.tensor(b)] returns=[Param.tensor(out)] ir=DeepxIR("matmul", args, returns, author) + if bench is not None: + ir._metadata.openbench(bench) send(ir) return out \ No newline at end of file diff --git a/front/py/deepx/scheduler/client/udpconn.py b/front/py/deepx/scheduler/client/udpconn.py index 6a12c26a..a25b0963 100644 --- a/front/py/deepx/scheduler/client/udpconn.py +++ b/front/py/deepx/scheduler/client/udpconn.py @@ -3,7 +3,7 @@ import select class UDPConn: - def __init__(self, endpoint: str = "localhost:9090"): + def __init__(self, endpoint: str = "localhost:8080"): # 解析endpoint self._host, port_str = endpoint.split(':') self._port = int(port_str) diff --git a/front/py/examples/2_ir/3_matmul.py b/front/py/examples/2_ir/3_matmul.py index 144cbdf7..bf682a88 100644 --- a/front/py/examples/2_ir/3_matmul.py +++ b/front/py/examples/2_ir/3_matmul.py @@ -1,22 +1,47 @@ +benchcnt=1000 + +from deepx.nn.functional import save_npy +import numpy as np +np_T1 = np.random.randn(1024, 1024).astype(np.float32) +np_T2 = np.random.randn(1024, 1024).astype(np.float32) + +npy_path = '/home/lipeng/model/deepxmodel/tester/' +save_npy(np_T1,npy_path+'t1') +save_npy(np_T2,npy_path+'t2') + ############-------PyTorch-------################ import torch -torch_t1 = torch.ones(3, 4, dtype=torch.float32) -torch_t2 = torch.ones(4, 5, dtype=torch.float32) -torch_t3 = torch_t1 @ torch_t2 -print() +import time +torch_t1 = torch.from_numpy(np_T1) +torch_t2 = torch.from_numpy(np_T2) +# warmup +_=torch_t1 @ torch_t2 + +torch_start = time.time() +for i in range(benchcnt): + torch_t3 = torch_t1 @ torch_t2 + print(torch_t3) - +torch_end = time.time() +print(f"PyTorch time: {torch_end - torch_start} seconds") ############-------DEEPX-------################ -from deepx import zeros, ones, full, arange - +from deepx import uniform, matmul, zeros,load +from deepx.nn.functional import save,load print() -t1 = ones([3,4],dtype='float32',name="t1") -t2 = ones([4,5],dtype='float32',name="t2") -t3 = t1 @ t2 +t1 = load(npy_path+'t1') +t2 = load(npy_path+'t2') +t3= zeros(1024,1024,dtype='float32',name="t3") +# warmup +matmul(t1,t2,out=t3) + +deepx_start = time.time() +matmul(t1,t2,out=t3,bench=(benchcnt)) t3.print() +deepx_end = time.time() +print(f"DeepX time: {deepx_end - deepx_start} seconds") From 116f9f5a51518f8e1a798d66619b9df98aeed15e Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Tue, 22 Apr 2025 19:46:41 +0800 Subject: [PATCH 2/5] =?UTF-8?q?gather,save,load:cuda=20=E9=AA=8C=E8=AF=81?= =?UTF-8?q?=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp | 6 ++++++ .../op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp | 2 ++ front/py/deepx/scheduler/client/udpconn.py | 2 +- front/py/examples/2_ir/3_matmul.py | 4 +++- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 02fee22f..3a2a4032 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -30,6 +30,12 @@ namespace deepx::tensorfunc { throw std::runtime_error("Failed to allocate host memory"); } + cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) + { + delete[] host_data; + throw std::runtime_error("Failed to copy data from device to host"); + } stdutil::print(t.shape.shape, host_data, t.shape.dtype, f); delete[] host_data; diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp index 92b16f13..e5dadce8 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp @@ -14,6 +14,8 @@ namespace deepx::tensorfunc { throw std::invalid_argument("A.shape could matmul with B.shape"); } + //TODO + //这里如果对二维矩阵运算,则omp并行不起来,因为C.shape.dim - 2刚好=0 C.shape.rangeParallel(C.shape.dim - 2, [&](const std::vector &indices) { int aIdx=A.shape.linearat(indices); diff --git a/front/py/deepx/scheduler/client/udpconn.py b/front/py/deepx/scheduler/client/udpconn.py index a25b0963..6a12c26a 100644 --- a/front/py/deepx/scheduler/client/udpconn.py +++ b/front/py/deepx/scheduler/client/udpconn.py @@ -3,7 +3,7 @@ import select class UDPConn: - def __init__(self, endpoint: str = "localhost:8080"): + def __init__(self, endpoint: str = "localhost:9090"): # 解析endpoint self._host, port_str = endpoint.split(':') self._port = int(port_str) diff --git a/front/py/examples/2_ir/3_matmul.py b/front/py/examples/2_ir/3_matmul.py index bf682a88..3c22593f 100644 --- a/front/py/examples/2_ir/3_matmul.py +++ b/front/py/examples/2_ir/3_matmul.py @@ -1,4 +1,4 @@ -benchcnt=1000 +benchcnt=100 from deepx.nn.functional import save_npy import numpy as np @@ -34,6 +34,8 @@ t1 = load(npy_path+'t1') t2 = load(npy_path+'t2') t3= zeros(1024,1024,dtype='float32',name="t3") +from deepx.nn.functional import defaultauthor +defaultauthor['matmul']='miaobyte' # warmup matmul(t1,t2,out=t3) From 33ae53b0d6640e725b2bcd4cf5307c373f4710a3 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Wed, 23 Apr 2025 02:52:10 +0800 Subject: [PATCH 3/5] =?UTF-8?q?IndexSelect:=E6=B5=8B=E8=AF=95=E9=AA=8C?= =?UTF-8?q?=E8=AF=81=20RenameTensor=EF=BC=9A=E6=B5=8B=E8=AF=95=E9=AA=8C?= =?UTF-8?q?=E8=AF=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit front:embedding ok --- doc/excuter/op-mem-ompsimd/list.md | 33 +++++----- excuter/cpp-common/src/deepx/mem/mem.hpp | 22 +++---- .../src/deepx/shape_changeshape.cpp | 23 ++++++- .../src/deepx/shape_changeshape.hpp | 6 +- .../src/deepx/tensorfunc/changeshape.hpp | 10 ++-- .../src/deepx/tensorfunc/tensorlife.hpp | 2 + excuter/op-mem-ompsimd/src/client/tfs.cpp | 13 +++- .../deepx/tensorfunc/changeshape_miaobyte.hpp | 31 +++++----- .../src/deepx/tensorfunc/io_miaobyte.hpp | 2 +- .../src/deepx/tf/changeshape.hpp | 54 ++++++++--------- .../src/deepx/tf/tensorlife.hpp | 32 ++++++++++ front/py/deepx/nn/functional/__init__.py | 4 +- front/py/deepx/nn/functional/authormap.py | 2 +- front/py/deepx/nn/functional/leaffunc.py | 2 +- .../nn/functional/leaffunc_changeshape.py | 35 ++++++++--- front/py/deepx/nn/functional/leaffunc_io.py | 18 +++++- front/py/deepx/nn/functional/leaffunc_life.py | 7 +++ front/py/deepx/nn/functional/reduce.py | 9 +-- .../py/deepx/nn/functional/rtf_changeshape.py | 5 +- front/py/deepx/nn/functional/rtf_io.py | 2 +- front/py/deepx/nn/functional/rtf_life.py | 6 ++ front/py/deepx/nn/modules/__init__.py | 5 +- front/py/deepx/nn/modules/module.py | 39 ++++-------- front/py/deepx/nn/modules/sparse.py | 60 ++++++++++--------- front/py/deepx/scheduler/client/udpconn.py | 2 +- front/py/deepx/tensor/changeshape.py | 30 +++++----- front/py/deepx/tensor/io.py | 2 +- front/py/deepx/tensor/shape.py | 37 +++++++----- front/py/deepx/tensor/tensor.py | 17 ++++-- front/py/examples/3_module/1_embedding.py | 54 +++++++++++++++++ 30 files changed, 364 insertions(+), 200 deletions(-) diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 8ca0e1d6..9f43ccf4 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -13,10 +13,11 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| renametensor | none | renametensor(tensor t, var new_name)->() | rename T1 to T2 | renametensor(tensor t, var new_name)->() | | newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor tensor1) | | newtensor | none | newtensor(var shape)->(tensor t) | T1 =Tensor(shape=[...]) | newtensor(var shape)->(tensor t) | | deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | +| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | ### io @@ -31,9 +32,9 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | normal | miaobyte | normal(tensor t, var mean, var std, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var std, var seed)->() | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | +| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | constant | miaobyte | constant(tensor t, var value)->() | constant(T1,value) | constant(tensor t, var value)->() | ### elementwise @@ -46,8 +47,8 @@ | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1,T2) | min(tensor A, tensor B)->(tensor C) | | maxscalar | miaobyte | maxscalar(tensor A, var scalar)->(tensor C) | T3=max(T1,scalar) | maxscalar(tensor A, var scalar)->(tensor C) | | divscalar | miaobyte | divscalar(tensor A, var scalar)->(tensor C) | T3=T1/scalar | divscalar(tensor A, var scalar)->(tensor C) | -| add | cblas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | -| add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | +| log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | +| addscalar | miaobyte | addscalar(tensor a, var scalar)->(tensor c) | T3=T1+scalar | addscalar(tensor a, var scalar)->(tensor c) | | greater | miaobyte | greater(tensor A, tensor B)->(tensor mask) | mask=greater(T1,T2) | greater(tensor A, tensor B)->(tensor mask) | | lessscalar | miaobyte | lessscalar(tensor A, var scalar)->(tensor mask) | mask=less(T1,scalar) | lessscalar(tensor A, var scalar)->(tensor mask) | | less | miaobyte | less(tensor A, tensor B)->(tensor mask) | mask=less(T1,T2) | less(tensor A, tensor B)->(tensor mask) | @@ -55,8 +56,8 @@ | minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1,scalar) | minscalar(tensor A, var scalar)->(tensor C) | | rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | | rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=scalar^T1 | rpowscalar(var scalar, tensor A)->(tensor C) | -| log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | -| addscalar | miaobyte | addscalar(tensor a, var scalar)->(tensor c) | T3=T1+scalar | addscalar(tensor a, var scalar)->(tensor c) | +| add | cblas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | +| add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | sub | miaobyte | sub(tensor a, tensor b)->(tensor c) | T3=T1-T2 | sub(tensor a, tensor b)->(tensor c) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | @@ -76,22 +77,22 @@ | matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | | matmul | miaobyte | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | +### reduce + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| prod | miaobyte | prod(tensor A, vector axis, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector axis, var keepdims)->(tensor B) | +| reducemax | miaobyte | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | +| sum | miaobyte | sum(tensor A, vector axis, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector axis, var keepdims)->(tensor B) | +| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | + ### changeshape | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| gather | miaobyte | gather(tensor A, tensor indices, var axis)->(tensor B) | T2 = T1.gather(indices=T3, axis=3) | gather(tensor A, tensor indices, var axis)->(tensor B) | +| indexselect | miaobyte | indexselect(tensor A, tensor index, var axis)->(tensor B) | T2 = T1.indexselect(index=T3, axis=3) | indexselect(tensor A, tensor index, var axis)->(tensor B) | | broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | | concat | miaobyte | concat(listtensor tensors, var dim)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var dim)->(tensor result) | | transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T1.transpose(dimorder=[1,0])->T2 | transpose(tensor A, vector dim_order)->(tensor C) | | reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | -### reduce - -| Operation | Author | Func Def | Math Formula | IR Instruction | -|-----------|--------|------------|--------------|----------------| -| reducemax | miaobyte | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | -| prod | miaobyte | prod(tensor A, vector axis, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector axis, var keepdims)->(tensor B) | -| sum | miaobyte | sum(tensor A, vector axis, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector axis, var keepdims)->(tensor B) | -| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | - diff --git a/excuter/cpp-common/src/deepx/mem/mem.hpp b/excuter/cpp-common/src/deepx/mem/mem.hpp index 9ea7ab70..710c3773 100644 --- a/excuter/cpp-common/src/deepx/mem/mem.hpp +++ b/excuter/cpp-common/src/deepx/mem/mem.hpp @@ -108,17 +108,7 @@ namespace deepx::mem } mem[name] = tensor; } - - // template - // shared_ptr> temptensor(vector shape) - // { - // // 直接构造到shared_ptr避免移动 - // auto temp = tensorfunc::New(shape); // 临时对象 - // auto cloned = make_shared>(std::move(temp)); - // mem["temp" + to_string(tempidx)] = cloned; - // tempidx++; - // return cloned; - // } + bool existstensor(const string &name) const { @@ -172,6 +162,16 @@ namespace deepx::mem { args.erase(name); } + + void rename_tensor(const string &old_name, const string &new_name) + { + if (mem.find(old_name) == mem.end()) + { + throw std::runtime_error("tensor not found: " + old_name); + } + mem[new_name] = mem[old_name]; + mem.erase(old_name); + } }; } #endif // DEEPX_MEM_MEMBASE_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_changeshape.cpp b/excuter/cpp-common/src/deepx/shape_changeshape.cpp index c0002617..f3a60bd1 100644 --- a/excuter/cpp-common/src/deepx/shape_changeshape.cpp +++ b/excuter/cpp-common/src/deepx/shape_changeshape.cpp @@ -120,5 +120,26 @@ namespace deepx } } } - + + vector indexselectShape(const vector &input_shape, const vector &index_shape, const int axis){ + + vector output_shape(input_shape.size()-1+index_shape.size()); + for (int output_idx=0,input_idx=0,index_idx=0;output_idx=axis+index_shape.size()){ + if (input_idx==axis){ + input_idx++; + } + output_shape[output_idx]=input_shape[input_idx++]; + }else{ + //index + output_shape[output_idx]=index_shape[index_idx++]; + } + } + return output_shape; + } } \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_changeshape.hpp b/excuter/cpp-common/src/deepx/shape_changeshape.hpp index ac2a588a..3f299885 100644 --- a/excuter/cpp-common/src/deepx/shape_changeshape.hpp +++ b/excuter/cpp-common/src/deepx/shape_changeshape.hpp @@ -70,8 +70,8 @@ namespace deepx }; std::vector broadcastMap(const std::vector &a, const std::vector &b); - - //gather - //gather的out.shape=indices.shape,所以无需计算 + + //indexselect + vector indexselectShape(const vector &input_shape, const vector &index_shape, const int axis); } #endif // DEEPX_SHAPE_CHANGESHAPE_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp index 5e359dbc..100f408b 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp @@ -61,17 +61,17 @@ namespace deepx::tensorfunc broadcastToDispatcher::broadcastTo(A, new_shape, B); } - // gather + // indexselect template - struct gatherDispatcher + struct indexselectDispatcher { - static void gather(const Tensor &input, const Tensor &indices, const int axis, Tensor &output) = delete; + static void indexselect(const Tensor &input, const Tensor &indices, const int axis, Tensor &output) = delete; }; template - void gather(const Tensor &input, const Tensor &indices, const int axis, Tensor &output) + void indexselect(const Tensor &input, const Tensor &indices, const int axis, Tensor &output) { - gatherDispatcher::gather(input, indices, axis, output); + indexselectDispatcher::indexselect(input, indices, axis, output); } // // split diff --git a/excuter/cpp-common/src/deepx/tensorfunc/tensorlife.hpp b/excuter/cpp-common/src/deepx/tensorfunc/tensorlife.hpp index 0dc884ac..cc06c69d 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/tensorlife.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/tensorlife.hpp @@ -19,5 +19,7 @@ namespace deepx::tensorfunc template void copy(const Tensor &src,Tensor &dst); + //rename + //通过tf直接实现 } #endif \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index 6eed5e04..07df04bd 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -68,6 +68,13 @@ namespace deepx::tf Param("t", DataCategory::Tensor, Precision::Any), }), vector())); + //renametensor + tffactory.add_tf(std::make_shared(vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + Param("new_name", DataCategory::Var, Precision::String), + }), + vector())); } // init @@ -488,11 +495,11 @@ namespace deepx::tf { Param("B", DataCategory::Tensor, Precision::Any), }))); - // gather author=miaobyte - tffactory.add_tf(std::make_shared>(vector( + // indexselect author=miaobyte + tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), - Param("indices", DataCategory::Tensor, Precision::Int32 | Precision::Int64), + Param("index", DataCategory::Tensor, Precision::Int32 | Precision::Int64), Param("axis", DataCategory::Var, Precision::Int32), }), vector( diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp index 7e2985fb..f9466111 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -138,21 +138,24 @@ namespace deepx::tensorfunc } }; - // gather - // 支持高维indices - // 结果写入input_indices + // indexselect + // output_indices,index,index_indices,gatheraxis->input_indices template - void fromGatherIndices(const vector &output_indices, const Tensor &indices, const int gatherAxis, vector &input_indices) + void fromIndexselectIndices(const vector &output_indices, const Tensor &index,vector &index_indices, const int gatherAxis, vector &input_indices) { - std::copy(output_indices.begin(), output_indices.begin()+input_indices.size(), input_indices.begin()); - int indices_idx = indices.shape.linearat(output_indices); - input_indices[gatherAxis] = indices.data[indices_idx]; + + std::copy(output_indices.begin(), output_indices.begin()+gatherAxis, input_indices.begin()); + std::copy(output_indices.begin()+gatherAxis,output_indices.begin()+gatherAxis+index_indices.size(), index_indices.begin()); + int index_idx=index.shape.linearat(index_indices); + input_indices[gatherAxis] = index.data[index_idx]; + std::copy(output_indices.begin()+gatherAxis+index_indices.size(),output_indices.begin()+output_indices.size(), input_indices.begin()+gatherAxis+1); + } template - struct gatherDispatcher + struct indexselectDispatcher { - static void gather(const Tensor &input, const Tensor &indices, const int axis, Tensor &output) + static void indexselect(const Tensor &input, const Tensor &index, const int axis, Tensor &output) { int gatherAxis = axis < 0 ? input.shape.dim + axis : axis; if (gatherAxis < 0 || gatherAxis >= input.shape.dim) @@ -160,17 +163,17 @@ namespace deepx::tensorfunc throw std::invalid_argument("Axis is out of bounds"); } - vector input_gatherShape = indices.shape.shape; - if (input_gatherShape.empty() || input_gatherShape != output.shape.shape) + vector gatherShape = indexselectShape(input.shape.shape,index.shape.shape,gatherAxis); + if (gatherShape.empty() || gatherShape != output.shape.shape) { - throw TensorShapeError("Gather shape mismatch"); + throw TensorShapeError("Indexselect shape mismatch"); } output.shape.rangeParallel(output.shape.dim, [&](const int idx, const std::vector &output_indices, ThreadLocalVectors &tlv) { - fromGatherIndices(output_indices, indices, gatherAxis, tlv.get(0)); + fromIndexselectIndices(output_indices, index,tlv.get(1), gatherAxis, tlv.get(0)); output.data[idx] = input.data[input.shape.linearat(tlv.get(0))]; }, - {input.shape.dim}); + {input.shape.dim,index.shape.dim}); } }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp index f219ca59..38c12f32 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp @@ -85,7 +85,7 @@ namespace deepx::tensorfunc std::ifstream data_fs(datapath, std::ios::binary); data_fs.seekg(0, std::ios::end); std::streamsize fileSize = data_fs.tellg(); - std::streamsize expectedSize = shape.size * precision_bits(shape.dtype) / 8; + std::streamsize expectedSize = shape.size * (precision_bits(shape.dtype) / 8); if (fileSize != expectedSize) { diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp index 8afa8e14..e0d96019 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp @@ -322,14 +322,14 @@ namespace deepx::tf } }; - // gather + // indexselect template - class Gather : public TF + class IndexSelect : public TF { public: - Gather(const vector &args, const vector &returns) + IndexSelect(const vector &args, const vector &returns) { - this->name = "gather"; + this->name = "indexselect"; this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; @@ -338,11 +338,11 @@ namespace deepx::tf string math_formula() const override { - return "T2 = T1.gather(indices=T3, axis=3)"; + return "T2 = T1.indexselect(index=T3, axis=3)"; } shared_ptr clone() const override { - return make_shared>(*this); + return make_shared>(*this); } int run(shared_ptr mem, string &error) override { @@ -358,10 +358,10 @@ namespace deepx::tf error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); return 1; } - Precision indices_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; - if (indices_type != Precision::Int32 && indices_type != Precision::Int64) + Precision index_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; + if (index_type != Precision::Int32 && index_type != Precision::Int64) { - error = "indices only support int32 or int64"; + error = "index only support int32 or int64"; return 1; } int axis = this->getvar(2, mem, true); @@ -369,73 +369,73 @@ namespace deepx::tf { case Precision::Float64: { - if (indices_type == Precision::Int32) + if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } else { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Float32: { - if (indices_type == Precision::Int32) + if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } else { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int64: { - if (indices_type == Precision::Int32) + if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } else { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int16: { - if (indices_type == Precision::Int32) + if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } else { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int8: { - if (indices_type == Precision::Int32) + if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } else { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Bool: { - if (indices_type == Precision::Int32) + if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } else { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp index d703355c..8d4d4f23 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp @@ -230,5 +230,37 @@ namespace deepx::tf return make_shared(*this); } }; + + //rename + class RenameTensor : public TF + { + public: + RenameTensor(vector args, vector returns) + { + this->name = "renametensor"; + this->args = args; + this->returns = returns; + this->tftype = "tensorlife"; + } + int run(shared_ptr mem, string &error) override + { + string old_name = this->args[0].textvalue; + if (!checktensors({this->args[0].textvalue}, mem, error) != 0) + { + return 1; + } + string new_name = this->args[1].textvalue; + mem->rename_tensor(old_name, new_name); + return 0; + } + string math_formula() const override + { + return "rename T1 to T2"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + }; } #endif diff --git a/front/py/deepx/nn/functional/__init__.py b/front/py/deepx/nn/functional/__init__.py index 0f729cfa..ff85363e 100644 --- a/front/py/deepx/nn/functional/__init__.py +++ b/front/py/deepx/nn/functional/__init__.py @@ -19,12 +19,12 @@ #leaffunc "newtensor","printtensor","load", #life - "printtensor","save","save_npy",#io + "printtensor","save","save_npy","save_torch",#io "constant","constant_","full","zeros","ones","uniform","uniform_","arange","arange_","kaiming_uniform","kaiming_uniform_","calculate_fan_in_and_fan_out", "add","sub","mul","div","sqrt","pow","exp","log", "matmul", "reducemax","reducemin","sum","prod", - "reshape","permute","transpose","concat","broadcastTo", + "reshape","permute","transpose","concat","broadcastTo","indexselect", #functional "relu","sigmoid","swish", diff --git a/front/py/deepx/nn/functional/authormap.py b/front/py/deepx/nn/functional/authormap.py index 4a241865..120db25b 100644 --- a/front/py/deepx/nn/functional/authormap.py +++ b/front/py/deepx/nn/functional/authormap.py @@ -34,7 +34,7 @@ 'transpose':'miaobyte', 'broadcastTo':'miaobyte', 'concat':'miaobyte', - 'gather':'miaobyte', + 'indexselect':'miaobyte', #matmul # 'matmul':'miaobyte', 'matmul':'cublas', diff --git a/front/py/deepx/nn/functional/leaffunc.py b/front/py/deepx/nn/functional/leaffunc.py index 58d21105..566e2d48 100644 --- a/front/py/deepx/nn/functional/leaffunc.py +++ b/front/py/deepx/nn/functional/leaffunc.py @@ -61,7 +61,7 @@ def op_func( def create_A_dim_keepdim_tf_C(op_name): def op_func( a:Tensor, - dim:list[int], + dim:tuple[int,...], keepdim:bool=False, out:Union[Tensor,str]='', author:str='miaobyte', diff --git a/front/py/deepx/nn/functional/leaffunc_changeshape.py b/front/py/deepx/nn/functional/leaffunc_changeshape.py index bd86edf6..cee587bf 100644 --- a/front/py/deepx/nn/functional/leaffunc_changeshape.py +++ b/front/py/deepx/nn/functional/leaffunc_changeshape.py @@ -4,7 +4,11 @@ from .leaffunc_life import newtensor from .authormap import defaultauthor -def reshape(t:Tensor,shape:list[int],out:Union[Tensor,str]='')->Tensor: +def reshape(t:Tensor,shape:tuple[int,...],out:Union[Tensor,str]='')->Tensor: + assert isinstance(shape,tuple) + for i in shape: + assert isinstance(i,int) and i>0 + outtensor=out if isinstance(out,str): outshape=shape @@ -18,8 +22,12 @@ def reshape(t:Tensor,shape:list[int],out:Union[Tensor,str]='')->Tensor: def permute(t:Tensor, - dimorder:list[int], + dimorder:tuple[int,...], out:Union[Tensor,str]='')->Tensor: + assert isinstance(dimorder,tuple) + for i in dimorder: + assert isinstance(i,int) + if t.ndim!=len(dimorder): raise ValueError(f"shape参数不合法,当前输入维度数:{len(dimorder)},张量维度数:{t.ndim}") dimorder = [d % t.ndim for d in dimorder] @@ -49,11 +57,15 @@ def concat(tensors:Union[list[Tensor],tuple[Tensor]],dim:int,out:Union[Tensor,st rtf_concat(tensors,dim,outtensor,defaultauthor['concat']) return outtensor -def broadcastTo(t:Tensor,new_shape:tuple[int],out:Union[Tensor,str]='',requires_grad:bool=False,author='miaobyte')->Tensor: +def broadcastTo(t:Tensor,new_shape:tuple[int,...],out:Union[Tensor,str]='',requires_grad:bool=False,author='miaobyte')->Tensor: + assert isinstance(new_shape,tuple) + for i in new_shape: + assert isinstance(i,int) and i>0 + if t.shape==new_shape: return t bshape=Shape.broadcast_shape(t.shape,new_shape) - if bshape!=new_shape: + if bshape!=tuple(new_shape): raise ValueError(f"广播失败:{t.shape} 无法广播为 {new_shape} ") outtensor=out if isinstance(out,str): @@ -64,12 +76,17 @@ def broadcastTo(t:Tensor,new_shape:tuple[int],out:Union[Tensor,str]='',requires_ return outtensor broadcast_to = broadcastTo -def gather(input:Tensor,indices:Tensor,gatheraxis:int,out:Union[Tensor,str]='')->Tensor: +def indexselect(input:Tensor,indices:Tensor,gatheraxis:int,out:Union[Tensor,str]='')->Tensor: + assert gatheraxis>=0 and gatheraxisTensor: @@ -87,7 +104,7 @@ def gather(input:Tensor,indices:Tensor,gatheraxis:int,out:Union[Tensor,str]='')- # return reshape(t, new_shape) # OpNode.register("expand") -# def expand(t:Tensor,shape:list[int],out:Union[Tensor,str]='')->Tensor: +# def expand(t:Tensor,shape:tuple[int,...],out:Union[Tensor,str]='')->Tensor: # outtensor=None # if isinstance(out,str): # outtensor=Tensor(shape=shape, dtype=t.dtype, device=t.device) diff --git a/front/py/deepx/nn/functional/leaffunc_io.py b/front/py/deepx/nn/functional/leaffunc_io.py index dd09abc0..d9551c6c 100644 --- a/front/py/deepx/nn/functional/leaffunc_io.py +++ b/front/py/deepx/nn/functional/leaffunc_io.py @@ -13,13 +13,25 @@ def save(t:Tensor,path:str): def save_npy(t,path:str): r''' - 保存numpy.tensor为deepxtensor格式 + 保存numpy.ndarray为deepx.tensor格式 ''' - from numpy import save,ndarray,ascontiguousarray + from numpy import ascontiguousarray shape=Shape(t.shape) shape._dtype=str(t.dtype) saveShape(shape,path+".shape") array = ascontiguousarray(t) array.tofile(path+'.data') - return t \ No newline at end of file + return t + +def save_torch(t,path:str): + r''' + 保存torch.Tensor为deepx.tensor格式 + ''' + from torch import Tensor as torch_Tensor + if isinstance(t,torch_Tensor): + t=t.detach().cpu().numpy() + else: + raise ValueError("t must be a torch.Tensor") + save_npy(t,path) + \ No newline at end of file diff --git a/front/py/deepx/nn/functional/leaffunc_life.py b/front/py/deepx/nn/functional/leaffunc_life.py index 8952b9d6..8921f8e3 100644 --- a/front/py/deepx/nn/functional/leaffunc_life.py +++ b/front/py/deepx/nn/functional/leaffunc_life.py @@ -22,6 +22,13 @@ def copytensor(t:Tensor,out:Tensor): def deltensor(t:Tensor): from .rtf_life import rtf_deltensor rtf_deltensor(t) +def renametensor(t:Tensor,new_name:str): + assert isinstance(t,Tensor) + assert isinstance(new_name,str) and new_name != '' + assert t.name is not None and t.name != '' + + from .rtf_life import rtf_renametensor + rtf_renametensor(t,new_name) def load(path:str)->Tensor: from .rtf_io import rtf_load diff --git a/front/py/deepx/nn/functional/reduce.py b/front/py/deepx/nn/functional/reduce.py index c5998e6a..f2731fd4 100644 --- a/front/py/deepx/nn/functional/reduce.py +++ b/front/py/deepx/nn/functional/reduce.py @@ -4,14 +4,11 @@ from .leaffunc_life import newtensor #mean -def mean(a:Tensor,dim:tuple[int]=None,keepdim:bool=False)->Tensor: +def mean(a:Tensor,dim:tuple[int,...]=None,keepdim:bool=False)->Tensor: # 如果dim为None,则对所有维度求平均 if dim is None: dim = list(range(a.ndim)) - elif isinstance(dim, int): - dim = [dim] - else: - dim = list(dim) + dim=list(dim) total = 1 for i in dim: if i < 0: @@ -19,6 +16,6 @@ def mean(a:Tensor,dim:tuple[int]=None,keepdim:bool=False)->Tensor: total *= a.shape[i] reduceshape=Shape.reduceshape(a.shape,dim,keepdim) out=newtensor(reduceshape,dtype=a.dtype) - sum(a, dim, keepdim, out) + sum(a, tuple(dim), keepdim, out) out.div_(total) return out diff --git a/front/py/deepx/nn/functional/rtf_changeshape.py b/front/py/deepx/nn/functional/rtf_changeshape.py index c7233ab0..37b38bad 100644 --- a/front/py/deepx/nn/functional/rtf_changeshape.py +++ b/front/py/deepx/nn/functional/rtf_changeshape.py @@ -28,9 +28,10 @@ def rtf_broadcastTo(t:Tensor,new_shape:tuple[int],out:Tensor,author='miaobyte'): ir=DeepxIR("broadcastTo", args, returns,author) send(ir) -def rtf_gather(input:Tensor,indices:Tensor,axis:int,out:Tensor,author='miaobyte'): +def rtf_indexselect(input:Tensor,indices:Tensor,axis:int,out:Tensor,author='miaobyte'): + assert axis>=0 and axisTensor: send(ir) shapefile=path+'.shape' tensor_name,shape,dtype=loadShape(shapefile) - return Tensor(shape,dtype,tensor_name) + return Tensor(shape.shape,dtype,tensor_name) diff --git a/front/py/deepx/nn/functional/rtf_life.py b/front/py/deepx/nn/functional/rtf_life.py index b233df70..014cd505 100644 --- a/front/py/deepx/nn/functional/rtf_life.py +++ b/front/py/deepx/nn/functional/rtf_life.py @@ -20,3 +20,9 @@ def rtf_deltensor(t:Tensor): returns=[] ir=DeepxIR("deltensor", args, returns,'') send(ir) + +def rtf_renametensor(t:Tensor,new_name:str): + args=[Param.tensor(t),Param.varstr(new_name)] + returns=[] + ir=DeepxIR("renametensor", args, returns,'') + send(ir) diff --git a/front/py/deepx/nn/modules/__init__.py b/front/py/deepx/nn/modules/__init__.py index 83ce046c..bf433622 100644 --- a/front/py/deepx/nn/modules/__init__.py +++ b/front/py/deepx/nn/modules/__init__.py @@ -1,8 +1,9 @@ from .module import Module, Sequential from .linear import Linear - +from .sparse import Embedding __all__ = [ "Module", "Linear", - "Sequential" + "Sequential", + "Embedding", ] diff --git a/front/py/deepx/nn/modules/module.py b/front/py/deepx/nn/modules/module.py index 5c7be9b2..6966bebd 100644 --- a/front/py/deepx/nn/modules/module.py +++ b/front/py/deepx/nn/modules/module.py @@ -12,8 +12,7 @@ def __init__(self, name: Optional[str] = None): def _generate_default_name(self) -> str: class_name = self.__class__.__name__ - # 修改正则表达式,保留连续大写字母为一个单词 - base_name = re.sub(r'(? None: - if not name.startswith('_'): - if isinstance(value, Module): - self.register_module(name, value) - elif isinstance(value, Tensor): - self.register_parameter(name, value) - # 使用父类方法设置属性,避免递归 - super().__setattr__(name, value) + # def __setattr__(self, name: str, value: Any) -> None: + # if not name.startswith('_'): + # if isinstance(value, Module): + # self.register_module(name, value) + # elif isinstance(value, Tensor): + # self.register_parameter(name, value) + # # 使用父类方法设置属性,避免递归 + # super().__setattr__(name, value) def register_module(self, name: str, module: Optional['Module']) -> None: if module is None: @@ -89,25 +88,7 @@ def named_modules(self, memo: Optional[set] = None, prefix: str = '' for name, module in self._modules.items(): submodule_prefix = f"{prefix}.{name}" if prefix else name yield from module.named_modules(memo, submodule_prefix) - - # def to(self, device: Union[Device, str]) -> 'Module': - # """移动模块到指定设备""" - # for param in self.parameters(): - # param.to(device) - # for buf in self.buffers(): - # buf.to(device) - # return self - - # def train(self, mode: bool = True) -> 'Module': - # self.training = mode - # for module in self.children(): - # module.train(mode) - # return self - - # def eval(self) -> 'Module': - # """设置评估模式""" - # return self.train(False) - + def state_dict(self) -> Dict[str, Tensor]: """返回模型状态字典""" state = {} diff --git a/front/py/deepx/nn/modules/sparse.py b/front/py/deepx/nn/modules/sparse.py index a7727a67..ca00f28c 100644 --- a/front/py/deepx/nn/modules/sparse.py +++ b/front/py/deepx/nn/modules/sparse.py @@ -93,51 +93,55 @@ class Embedding(Module): def __init__(self, num_embeddings:int, #嵌入字典的大小(词汇表大小)vocab_size,llama=128256 embedding_dim:int, #每个嵌入向量的维度,隐藏层大小hidden_size,llama=4096 - padding_idx:int=None, - max_norm:float=None, - norm_type:float=2.0, - scale_grad_by_freq:bool=False, - _weight:Tensor=None,dtype=None, - sparse:bool=False): + # padding_idx:int=None, + # max_norm:float=None, + # norm_type:float=2.0, + # scale_grad_by_freq:bool=False, + weight:Tensor=None,dtype='float32', + # sparse:bool=False + ): super(Embedding, self).__init__() self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim - if padding_idx is not None: - if padding_idx > 0: - assert ( - padding_idx < self.num_embeddings - ), "Padding_idx必须在num_embeddings范围内" - elif padding_idx < 0: - assert ( - padding_idx >= -self.num_embeddings - ), "Padding_idx必须在num_embeddings范围内" - padding_idx = self.num_embeddings + padding_idx - self.padding_idx = padding_idx - self.max_norm = max_norm - self.norm_type = norm_type - self.scale_grad_by_freq = scale_grad_by_freq - if _weight is None: + # if padding_idx is not None: + # if padding_idx > 0: + # assert ( + # padding_idx < self.num_embeddings + # ), "Padding_idx必须在num_embeddings范围内" + # elif padding_idx < 0: + # assert ( + # padding_idx >= -self.num_embeddings + # ), "Padding_idx必须在num_embeddings范围内" + # padding_idx = self.num_embeddings + padding_idx + # self.padding_idx = padding_idx + # self.max_norm = max_norm + # self.norm_type = norm_type + # self.scale_grad_by_freq = scale_grad_by_freq + if weight is None: self.weight = Tensor(shape=(num_embeddings, embedding_dim),dtype=dtype) + self.register_parameter('weight', self.weight) self.reset_parameters() else: - assert list(_weight.shape) == [ + assert list(weight.shape) == [ num_embeddings, embedding_dim, ], "权重形状与num_embeddings和embedding_dim不匹配" - self.weight = _weight + self.weight = weight - self.sparse = sparse + # self.sparse = sparse - if padding_idx is not None: - self.weight[padding_idx] = 0 + # if padding_idx is not None: + # self.weight[padding_idx] = 0 def reset_parameters(self) -> None: self.weight.normal_() # 正态分布初始化权重 self._fill_padding_idx_with_zero() # 填充索引位置归零 def _fill_padding_idx_with_zero(self) -> None: - if self.padding_idx is not None: - self.weight[self.padding_idx].fill_(0) + #TODO + pass + # if self.padding_idx is not None: + # self.weight[self.padding_idx].fill_(0) def forward(self, input:Tensor)->Tensor: return self.weight[input] diff --git a/front/py/deepx/scheduler/client/udpconn.py b/front/py/deepx/scheduler/client/udpconn.py index 6a12c26a..a25b0963 100644 --- a/front/py/deepx/scheduler/client/udpconn.py +++ b/front/py/deepx/scheduler/client/udpconn.py @@ -3,7 +3,7 @@ import select class UDPConn: - def __init__(self, endpoint: str = "localhost:9090"): + def __init__(self, endpoint: str = "localhost:8080"): # 解析endpoint self._host, port_str = endpoint.split(':') self._port = int(port_str) diff --git a/front/py/deepx/tensor/changeshape.py b/front/py/deepx/tensor/changeshape.py index f5534541..462fc9d3 100644 --- a/front/py/deepx/tensor/changeshape.py +++ b/front/py/deepx/tensor/changeshape.py @@ -2,31 +2,36 @@ from .tensor import Tensor,tensor_method @tensor_method -def reshape(self,*shape,out:Union[Tensor,str]='')->Tensor: +def reshape(self,shape:tuple[int,...],out:Union[Tensor,str]='')->Tensor: + assert isinstance(shape,tuple) from deepx.nn.functional import reshape as reshape_func result=reshape_func(self,shape,out) return result @tensor_method -def reshape_(self,*shape)->Tensor: +def reshape_(self,shape:tuple[int,...])->Tensor: + assert isinstance(shape,tuple) from deepx.nn.functional import reshape as reshape_func result=reshape_func(self,shape,self) return result @tensor_method -def permute(self,*dimorder,out:Union[Tensor,str]=''): +def permute(self,dimorder:tuple[int,...],out:Union[Tensor,str]=''): + assert isinstance(dimorder,tuple) from deepx.nn.functional import permute as permute_func result=permute_func(self,dimorder,out) return result @tensor_method -def permute_(self,*dimorder): +def permute_(self,dimorder:tuple[int,...])->Tensor: + assert isinstance(dimorder,tuple) from deepx.nn.functional import permute as permute_func permute_func(self,dimorder,self) return self @tensor_method def transpose(self,out:Union[Tensor,str]=''): + assert isinstance(out,str) or isinstance(out,Tensor) from deepx.nn.functional import transpose as transpose_func result=transpose_func(self,out) return result @@ -38,26 +43,23 @@ def transpose_(self): return self @tensor_method -def broadcastshape(self,other:Tensor)->tuple[int]: +def broadcastshape(self,other:Tensor)->tuple[int,...]: from deepx.nn.functional import broadcastshape as broadcastshape_func result=broadcastshape_func(self.shape,other.shape) return result @tensor_method -def broadcastTo(self,shape:tuple[int],out:Union[Tensor,str]='')->Tensor: +def broadcastTo(self,shape:tuple[int,...],out:Union[Tensor,str]='')->Tensor: from deepx.nn.functional import broadcastTo as broadcastTo_func result=broadcastTo_func(self,shape,out) return result @tensor_method -def gather(self,indices:Tensor,dim:int,out:Union[Tensor,str]='')->Tensor: - final_indices=indices - #TODO 当indices不是tensor时,需要转换为tensor - if not isinstance(indices,Tensor): - raise ValueError("indices must be a Tensor") - - from deepx.nn.functional import gather as gather_func - result=gather_func(self,final_indices,dim,out) +def indexselect(self,index:Tensor,axis:int=0,out:Union[Tensor,str]='')->Tensor: + assert isinstance(index,Tensor) + gatheraxis=axis%self.ndim + from deepx.nn.functional import indexselect as indexselect_func + result=indexselect_func(self,index,gatheraxis,out) return result diff --git a/front/py/deepx/tensor/io.py b/front/py/deepx/tensor/io.py index 23027ee4..35e3b0f7 100644 --- a/front/py/deepx/tensor/io.py +++ b/front/py/deepx/tensor/io.py @@ -11,7 +11,7 @@ def loadShape(path:str)->tuple[str,Shape,str]: raise ValueError("文件名必须以.shape结尾") tensor_name = filename[:-6] # 移除'.shape'后缀 - return (tensor_name,Shape(shape['shape']),shape['dtype']) + return (tensor_name,Shape(tuple(shape['shape'])),shape['dtype']) def saveShape(t:Shape,path:str): if path.endswith('.shape'): diff --git a/front/py/deepx/tensor/shape.py b/front/py/deepx/tensor/shape.py index dfc4f5a0..4ce87937 100644 --- a/front/py/deepx/tensor/shape.py +++ b/front/py/deepx/tensor/shape.py @@ -1,10 +1,12 @@ import numpy as np from typing import Optional,Union class Shape: - def __init__(self, - shape:Optional[Union[tuple[int],list[int],int]]=None): + def __init__(self, shape:tuple[int,...]=None): # 确保 shape 是元组类型 - self._shape = tuple(shape) + assert isinstance(shape,tuple) + self._shape = shape + for i in self._shape: + assert isinstance(i,int) and i>0 self._size = int(np.prod(self.shape)) if self.shape else 0 # 计算 stride(步长) self._strides = self._compute_strides() @@ -17,7 +19,7 @@ def shape(self,dim=None): else: return self._shape[dim] - def numel(self): + def numel(self)->int: """计算张量中所有元素的数量(与torch.Tensor.numel()行为一致) 实现说明: @@ -27,7 +29,7 @@ def numel(self): """ return self._size # 在__init__中已预先计算好 - def dim(self): + def dim(self)->int: """返回张量的维度数(与torch.Tensor.dim()行为一致) 实现说明: @@ -38,7 +40,7 @@ def dim(self): return len(self._shape) @property - def ndim(self): + def ndim(self)->int: """返回张量的维度数(dim的别名,与PyTorch命名习惯保持一致) 设计考虑: @@ -48,7 +50,7 @@ def ndim(self): """ return self.dim() - def ndimension(self): + def ndimension(self)->int: """返回张量的维度数(dim的别名,与PyTorch命名习惯保持一致) 设计考虑: @@ -59,7 +61,7 @@ def ndimension(self): return self.dim() @property - def stride(self): + def stride(self)->tuple[int,...]: """返回所有维度的步长元组""" return self._strides @@ -81,13 +83,13 @@ def __repr__(self): def __getitem__(self, idx): return self.shape[idx] - def __len__(self): + def __len__(self)->int: return len(self.shape) def __iter__(self): return iter(self.shape) - def __eq__(self, other): + def __eq__(self, other)->bool: """比较两个形状是否相等""" if isinstance(other, Shape): return self.shape == other.shape @@ -100,7 +102,7 @@ def __hash__(self): return hash(self.shape) @classmethod - def total_size(cls,other:tuple[int])->int: + def total_size(cls,other:tuple[int,...])->int: total_size=1 for i in other: total_size*=i @@ -108,9 +110,9 @@ def total_size(cls,other:tuple[int])->int: @classmethod - def transpose(cls,shape:tuple[int],dimorder:list[int]=None): + def transpose(cls,shape:tuple[int,...],dimorder:tuple[int,...]=None)->tuple[int,...]: if dimorder is None: - dimorder=list(range(len(shape))) + dimorder=tuple(range(len(shape))) return Shape(tuple(shape[i] for i in dimorder)) @classmethod @@ -126,7 +128,7 @@ def matmul(cls,shape:tuple[int],other:tuple[int])->tuple[int]: return tuple(resultshape) @classmethod - def broadcast_shape(cls,shape_a: tuple[int], shape_b: tuple[int]) -> tuple[int]: + def broadcast_shape(cls,shape_a: tuple[int,...], shape_b: tuple[int,...]) -> tuple[int,...]: """计算两个形状的广播后形状""" # 获取形状的长度 len_a, len_b = len(shape_a), len(shape_b) @@ -159,7 +161,7 @@ def broadcast_shape(cls,shape_a: tuple[int], shape_b: tuple[int]) -> tuple[int]: @classmethod - def reduceshape(cls,shape:tuple[int],dim:list[int],keepdim:bool)->tuple[int]: + def reduceshape(cls,shape:tuple[int,...],dim:tuple[int,...],keepdim:bool)->tuple[int,...]: ndim = len(shape) # 处理负数维度 normalized_dim = [d % ndim for d in dim] @@ -172,4 +174,9 @@ def reduceshape(cls,shape:tuple[int],dim:list[int],keepdim:bool)->tuple[int]: else: return tuple(s for i, s in enumerate(shape) if i not in unique_dim) + + # 参考自 https://www.tensorflow.org/api_docs/python/tf/gather + @classmethod + def indexselectshape(cls,input_shape:tuple[int,...],index_shape:tuple[int,...],gatheraxis:int)->tuple[int,...]: + return input_shape[:gatheraxis]+index_shape+input_shape[gatheraxis+1:] \ No newline at end of file diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index 69a328e9..7929ba7f 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -9,8 +9,13 @@ class Tensor: #life - def __init__(self,shape:Union[tuple[int],list[int],Shape],dtype:str='float32',name:str=None): + def __init__(self,shape:tuple[int,...],dtype:str='float32',name:str=None): # name + assert isinstance(name,str) or name is None + assert isinstance(shape,tuple) + for i in shape: + assert isinstance(i,int) and i>0 + assert isinstance(dtype,str) self._name = name if name is None or name =='': @@ -45,8 +50,12 @@ def name(self): return self._name @name.setter def name(self,name:str): - self._name=name + assert isinstance(name,str) and name != '' + assert self.name is not None and self.name != '' + from deepx.nn.functional import renametensor + renametensor(self,name) + self._name = name # shape @property def shape(self,dim:int=None): @@ -120,8 +129,8 @@ def __matmul__(self, other:Union[Number,'Tensor']): return self.matmul(other) #gather - def __getitem__(self, indices:'Tensor'): - return self.gather(indices) + def __getitem__(self, index:'Tensor'): + return self.indexselect(index) #shape操作 @property diff --git a/front/py/examples/3_module/1_embedding.py b/front/py/examples/3_module/1_embedding.py index e69de29b..ddf52f44 100644 --- a/front/py/examples/3_module/1_embedding.py +++ b/front/py/examples/3_module/1_embedding.py @@ -0,0 +1,54 @@ +from transformers import AutoTokenizer +print() +def init_tokenizer(model_path): + tokenizer = AutoTokenizer.from_pretrained(model_path) + tokenizer.pad_token = tokenizer.eos_token + return tokenizer + +tokenizer = init_tokenizer("/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B") + +def tokenize_text(text, tokenizer): + tokens = tokenizer(text, return_tensors="pt").input_ids + import torch + # 处理超出词汇表范围的token + if torch.any(tokens >= tokenizer.vocab_size): + # 获取UNK token ID,如果没有则使用0 + unk_token_id = tokenizer.unk_token_id if hasattr(tokenizer, 'unk_token_id') and tokenizer.unk_token_id is not None else 0 + # 替换所有超出范围的token为UNK + tokens = torch.where(tokens < tokenizer.vocab_size, tokens, torch.tensor(unk_token_id, device=tokens.device)) + return tokens + +dir="/home/lipeng/model/deepxmodel/embeddingtest/" + +############-------PyTorch-------################ +import torch.nn as nn + +# 创建输入 +text = "这是一个测试文本,用于演示嵌入层的使用。" +torch_input = tokenize_text(text, tokenizer) +from deepx.nn.functional import save_torch +save_torch(torch_input,dir+'input') +print(torch_input) +# 创建网络 +torch_net = nn.Embedding(tokenizer.vocab_size, 4096) +save_torch(torch_net.weight,dir+'weight') +# 前向传播 +torch_output = torch_net(torch_input) +print() +print(torch_output.shape) +print(torch_output) + + +############-------DEEPX-------################ +from deepx.nn.modules import Embedding +from deepx.nn.functional import load + +input=load(dir+'input') +input.print() + +weight=load(dir+'weight') +weight.name='embedding_0.weight' +net = Embedding(tokenizer.vocab_size, 4096,weight=weight) +out=net.forward(input) +out.print() + From 33ea00f861abbf5c891223df84ea482b974d829c Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Wed, 23 Apr 2025 16:39:23 +0800 Subject: [PATCH 4/5] =?UTF-8?q?cuda:load,save,indexselect=EF=BC=8C?= =?UTF-8?q?=E5=92=8Cpytorch=E5=AF=B9=E9=BD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/excuter/op-mem-cuda/list.md | 35 ++--- excuter/cpp-common/src/deepx/shape.cpp | 3 + excuter/cpp-common/src/deepx/shape.hpp | 3 +- excuter/op-mem-cuda/src/client/tfs.cpp | 11 +- .../deepx/tensorfunc/changeshape_miaobyte.cu | 140 ++++++++++-------- .../deepx/tensorfunc/changeshape_miaobyte.cuh | 17 ++- .../deepx/tensorfunc/changeshape_miaobyte.hpp | 19 +-- .../src/deepx/tensorfunc/io_miaobyte.hpp | 14 +- .../deepx/tensorfunc/tensorlife_miaobyte.hpp | 3 + .../op-mem-cuda/src/deepx/tf/changeshape.hpp | 82 +++++----- .../op-mem-cuda/src/deepx/tf/tensorlife.hpp | 34 +++++ front/py/deepx/scheduler/client/udpconn.py | 2 +- 12 files changed, 212 insertions(+), 151 deletions(-) diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md index 2a73fd50..aef5e0a8 100644 --- a/doc/excuter/op-mem-cuda/list.md +++ b/doc/excuter/op-mem-cuda/list.md @@ -13,10 +13,11 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| renametensor | none | renametensor(tensor t, var new_name)->() | rename T1 to T2 | renametensor(tensor t, var new_name)->() | | newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(vector shape)->(tensor tensor1) | | newtensor | none | newtensor(var shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(var shape)->(tensor tensor1) | | deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | +| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | ### io @@ -31,9 +32,9 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | normal | miaobyte | normal(tensor t, var mean, var stddev, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var stddev, var seed)->() | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | +| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | constant | miaobyte | constant(tensor t, var value)->() | constant(T1) | constant(tensor t, var value)->() | ### elementwise @@ -46,10 +47,10 @@ | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1, T2) | min(tensor A, tensor B)->(tensor C) | | maxscalar | miaobyte | maxscalar(tensor A, var scalar)->(tensor C) | T3=max(T1, scalar) | maxscalar(tensor A, var scalar)->(tensor C) | | tan | miaobyte | tan(tensor A)->(tensor C) | T3=tan(T1) | tan(tensor A)->(tensor C) | -| divscalar | miaobyte | divscalar(tensor A, var scalar)->(tensor C) | T3=scalar/T1 | divscalar(tensor A, var scalar)->(tensor C) | | sin | miaobyte | sin(tensor A)->(tensor C) | T3=sin(T1) | sin(tensor A)->(tensor C) | -| add | cublas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | -| add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | +| divscalar | miaobyte | divscalar(tensor A, var scalar)->(tensor C) | T3=scalar/T1 | divscalar(tensor A, var scalar)->(tensor C) | +| log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | +| addscalar | miaobyte | addscalar(tensor A, var b)->(tensor C) | T3=T1+scalar | addscalar(tensor A, var b)->(tensor C) | | greater | miaobyte | greater(tensor A, tensor B)->(tensor mask) | mask=compare(T1, T2) | greater(tensor A, tensor B)->(tensor mask) | | lessscalar | miaobyte | lessscalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1, scalar) | lessscalar(tensor A, var scalar)->(tensor mask) | | cos | miaobyte | cos(tensor A)->(tensor C) | T3=cos(T1) | cos(tensor A)->(tensor C) | @@ -58,8 +59,8 @@ | minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1, scalar) | minscalar(tensor A, var scalar)->(tensor C) | | rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | | rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=pow(scalar, T1) | rpowscalar(var scalar, tensor A)->(tensor C) | -| log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | -| addscalar | miaobyte | addscalar(tensor A, var b)->(tensor C) | T3=T1+scalar | addscalar(tensor A, var b)->(tensor C) | +| add | cublas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | +| add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | sub | miaobyte | sub(tensor A, tensor B)->(tensor C) | T3=T1-T2 | sub(tensor A, tensor B)->(tensor C) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | subscalar | miaobyte | subscalar(tensor A, var b)->(tensor C) | T3=T1-scalar | subscalar(tensor A, var b)->(tensor C) | @@ -78,22 +79,22 @@ |-----------|--------|------------|--------------|----------------| | matmul | cublas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | +### reduce + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| prod | miaobyte | prod(tensor A, vector dims, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector dims, var keepdims)->(tensor B) | +| reducemax | miaobyte | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | +| sum | miaobyte | sum(tensor A, vector dims, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector dims, var keepdims)->(tensor B) | +| reducemin | miaobyte | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | + ### changeshape | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| gather | miaobyte | gather(tensor A, tensor indices, var axis)->(tensor B) | T2 = T1.gather(indices=[1,2], axis=1) | gather(tensor A, tensor indices, var axis)->(tensor B) | +| indexselect | miaobyte | indexselect(tensor A, tensor indices, var axis)->(tensor B) | T2 = T1.indexselect(index=[1,2], axis=1) | indexselect(tensor A, tensor indices, var axis)->(tensor B) | | broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | | concat | miaobyte | concat(listtensor tensors, var dim)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var dim)->(tensor result) | | transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor A, vector dim_order)->(tensor C) | | reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | -### reduce - -| Operation | Author | Func Def | Math Formula | IR Instruction | -|-----------|--------|------------|--------------|----------------| -| reducemax | miaobyte | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | -| prod | miaobyte | prod(tensor A, vector dims, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector dims, var keepdims)->(tensor B) | -| sum | miaobyte | sum(tensor A, vector dims, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector dims, var keepdims)->(tensor B) | -| reducemin | miaobyte | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | - diff --git a/excuter/cpp-common/src/deepx/shape.cpp b/excuter/cpp-common/src/deepx/shape.cpp index cedca724..fa207e98 100644 --- a/excuter/cpp-common/src/deepx/shape.cpp +++ b/excuter/cpp-common/src/deepx/shape.cpp @@ -13,6 +13,9 @@ namespace deepx { setshape(shape, dim); } + int64_t Shape::bytes() const{ + return size * (precision_bits(dtype) / 8); + } void Shape::setshape(const int *shape, int dim) { this->shape.resize(dim); diff --git a/excuter/cpp-common/src/deepx/shape.hpp b/excuter/cpp-common/src/deepx/shape.hpp index ff4fea69..655dce38 100644 --- a/excuter/cpp-common/src/deepx/shape.hpp +++ b/excuter/cpp-common/src/deepx/shape.hpp @@ -44,7 +44,8 @@ namespace deepx std::vector shape; std::vector strides; int dim; - int size; + int64_t size; + int64_t bytes() const; Shape() = default; Shape(const std::vector &shape); diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp index 44fbcfc6..4e9095ca 100644 --- a/excuter/op-mem-cuda/src/client/tfs.cpp +++ b/excuter/op-mem-cuda/src/client/tfs.cpp @@ -65,6 +65,13 @@ namespace deepx::tf Param("t", DataCategory::Tensor, Precision::Any), }), vector())); + //renametensor + tffactory.add_tf(std::make_shared(vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + Param("new_name", DataCategory::Var, Precision::String), + }), + vector())); } // init @@ -479,8 +486,8 @@ namespace deepx::tf { Param("B", DataCategory::Tensor, Precision::Any), }))); - // gather - tffactory.add_tf(std::make_shared>(vector( + // indexselect + tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), Param("indices", DataCategory::Tensor, Precision::Int64|Precision::Int32), diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu index 130fc80d..bc97ba5f 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu @@ -366,32 +366,38 @@ namespace deepx::tensorfunc const BroadcastMap *broadcastMap, int8_t *output, const int *outputStrides, const int outputDim, const int outputlen); - // gather - + // indexselect template - __host__ __device__ void fromGatherIndices( - const int *output_indices, // 输出张量的索引 - const GatherAxisT *indices, const int *indicesStrides, const int indicesDim, // indices是tensor - const int gatherAxis, // gather操作的轴 - int *input_indices, const int inputDim) - { - - for (int i = 0; i < inputDim; ++i) + __host__ __device__ void fromIndexselectIndices( + const int *output_indices,const int outputDim, // 输出张量的索引 + const GatherAxisT *indices,const int *indicesStrides,const int indicesDim, //indices是tensor + int *index_indices, + const int gatherAxis, // gather操作的轴 + int *input_indices,const int inputDim){ + + for (int i = 0; i < gatherAxis; ++i) { input_indices[i] = output_indices[i]; } - + for (int i = gatherAxis; i < gatherAxis + indicesDim; ++i) + { + index_indices[i - gatherAxis] = output_indices[i]; + } // 使用indices张量中对应位置的值来替换gatherAxis维度的索引 - int indices_idx = linearAt(indicesStrides, indicesDim, output_indices); + int indices_idx = linearAt(indicesStrides, indicesDim, index_indices); input_indices[gatherAxis] = indices[indices_idx]; + for (int i = gatherAxis +indicesDim; i < outputDim; ++i) + { + input_indices[gatherAxis+1+i] = output_indices[i]; + } } template - __global__ void gather_kernel( + __global__ void indexselect_kernel( const T *input, const int *inputStrides, const int inputDim, const GatherAxisT *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - T *output, const int outputlen) + T *output, const int *outputStrides, const int outputDim, const int outputlen) { const int grid_stride = gridDim.x * blockDim.x; int thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -399,26 +405,28 @@ namespace deepx::tensorfunc { // 输出索引 int output_indices[DIM]; - linearTo(indicesStrides, indicesDim, output_indices, thread_id); + linearTo(outputStrides, outputDim, output_indices, thread_id); // 输入索引 + int index_indices[DIM]; int input_indices[DIM]; - fromGatherIndices(output_indices, + fromIndexselectIndices(output_indices,outputDim, indices, indicesStrides, indicesDim, + index_indices, gatherAxis, input_indices, inputDim); int inputIdx = linearAt(inputStrides, inputDim, input_indices); - int outputIdx = linearAt(indicesStrides, indicesDim, output_indices); + int outputIdx = linearAt(outputStrides, outputDim, output_indices); output[outputIdx] = input[inputIdx]; } } template - void launch_gather( + void launch_indexselect( const T *input, const int *inputStrides, const int inputDim, const GatherAxisT *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - T *output, const int outputlen) + T *output, const int *outputStrides, const int outputDim, const int outputlen) { auto [numBlocks, blockSize] = BestDims(outputlen); @@ -428,44 +436,50 @@ namespace deepx::tensorfunc // input cudaVector inputStrides_d(inputStrides, inputDim, cudaMemcpyHostToDevice); + + // output + cudaVector outputStrides_d(outputStrides, outputDim, cudaMemcpyHostToDevice); + + //TODO 这里可能会导致寄存器浪费,但是,搞太多模板T,模板实例化不好搞 int dim=std::max(inputDim,indicesDim); + dim=std::max(dim,outputDim); switch (dim) { case 1: - gather_kernel<1, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<1, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 2: - gather_kernel<2, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<2, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 3: - gather_kernel<3, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<3, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 4: - gather_kernel<4, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<4, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 5: - gather_kernel<5, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<5, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 6: - gather_kernel<6, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<6, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 7: - gather_kernel<7, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<7, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 8: - gather_kernel<8, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<8, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 9: - gather_kernel<9, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<9, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 10: - gather_kernel<10, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<10, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 11: - gather_kernel<11, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<11, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; case 12: - gather_kernel<12, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputlen); + indexselect_kernel<12, T, GatherAxisT><<>>(input, inputStrides_d.data, inputDim, indices, indicesStrides_d.data, indicesDim, gatherAxis, output, outputStrides_d.data, outputDim, outputlen); break; default: throw std::runtime_error("dimension large than " + std::to_string(MAX_DIM)); @@ -476,71 +490,71 @@ namespace deepx::tensorfunc throw std::runtime_error("cuda error"); } } - template void launch_gather(const double *input, const int *inputStrides, const int inputDim, + template void launch_indexselect(const double *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - double *output, const int outputlen); - template void launch_gather(const float *input, const int *inputStrides, const int inputDim, + double *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const float *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - float *output, const int outputlen); - template void launch_gather(const nv_bfloat16 *input, const int *inputStrides, const int inputDim, + float *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const nv_bfloat16 *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - nv_bfloat16 *output, const int outputlen); - template void launch_gather<__half, int64_t>(const __half *input, const int *inputStrides, const int inputDim, + nv_bfloat16 *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect<__half, int64_t>(const __half *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - __half *output, const int outputlen); - template void launch_gather(const int64_t *input, const int *inputStrides, const int inputDim, + __half *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int64_t *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int64_t *output, const int outputlen); - template void launch_gather(const int32_t *input, const int *inputStrides, const int inputDim, + int64_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int32_t *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int32_t *output, const int outputlen); - template void launch_gather(const int16_t *input, const int *inputStrides, const int inputDim, + int32_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int16_t *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int16_t *output, const int outputlen); - template void launch_gather(const int8_t *input, const int *inputStrides, const int inputDim, + int16_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int8_t *input, const int *inputStrides, const int inputDim, const int64_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int8_t *output, const int outputlen); + int8_t *output, const int *outputStrides, const int outputDim, const int outputlen); - template void launch_gather(const double *input, const int *inputStrides, const int inputDim, + template void launch_indexselect(const double *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - double *output, const int outputlen); - template void launch_gather(const float *input, const int *inputStrides, const int inputDim, + double *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const float *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - float *output, const int outputlen); - template void launch_gather(const nv_bfloat16 *input, const int *inputStrides, const int inputDim, + float *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const nv_bfloat16 *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - nv_bfloat16 *output, const int outputlen); - template void launch_gather<__half, int32_t>(const __half *input, const int *inputStrides, const int inputDim, + nv_bfloat16 *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect<__half, int32_t>(const __half *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - __half *output, const int outputlen); - template void launch_gather(const int64_t *input, const int *inputStrides, const int inputDim, + __half *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int64_t *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int64_t *output, const int outputlen); - template void launch_gather(const int32_t *input, const int *inputStrides, const int inputDim, + int64_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int32_t *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int32_t *output, const int outputlen); - template void launch_gather(const int16_t *input, const int *inputStrides, const int inputDim, + int32_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int16_t *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int16_t *output, const int outputlen); - template void launch_gather(const int8_t *input, const int *inputStrides, const int inputDim, + int16_t *output, const int *outputStrides, const int outputDim, const int outputlen); + template void launch_indexselect(const int8_t *input, const int *inputStrides, const int inputDim, const int32_t *indices, const int *indicesStrides, const int indicesDim, const int gatherAxis, - int8_t *output, const int outputlen); + int8_t *output, const int *outputStrides, const int outputDim, const int outputlen); } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh index 2047a636..26c40851 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh @@ -43,26 +43,27 @@ namespace deepx::tensorfunc const BroadcastMap *broadcastMap, T *output, const int *outputStrides, const int outputDim, const int outputlen); - // gather + // indexselect template - __host__ __device__ void fromGatherIndices( - const int *output_indices, // 输出张量的索引 + __host__ __device__ void fromIndexselectIndices( + const int *output_indices,const int outputDim, // 输出张量的索引 const GatherAxisT *indices,const int *indicesStrides,const int indicesDim, //indices是tensor + int *index_indices, const int gatherAxis, // gather操作的轴 int *input_indices,const int inputDim); // 计算出的输入张量索引 template - __global__ void gather_kernel( + __global__ void indexselect_kernel( const T *input, const int *inputStrides, const int inputDim, - const GatherAxisT *indices,const int *indicesStrides,const int indicesDim, + const GatherAxisT *index,const int *indexStrides,const int indexDim, const int gatherAxis, - T *output,const int outputlen);//output 和input的shape相同,所以共享strides,dim,len + T *output,const int *outputStrides,const int outputDim,const int outputlen); template - void launch_gather( + void launch_indexselect( const T *input, const int *inputStrides, const int inputDim, const GatherAxisT *indices,const int *indicesStrides,const int indicesDim, const int gatherAxis, - T *output,const int outputlen);//output 和input的shape相同,所以共享strides,dim,len + T *output,const int *outputStrides,const int outputDim,const int outputlen); }; #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_CUH \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp index 8fb43a76..a49439b9 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -117,21 +117,22 @@ namespace deepx::tensorfunc } }; - //gather + //indexselect template - struct gatherDispatcher + struct indexselectDispatcher { - static void gather(const Tensor &input, const Tensor &indices, const int axis, Tensor &output){ - vector input_gatherShape = indices.shape.shape; - if (input_gatherShape.empty()||input_gatherShape!=output.shape.shape) + static void indexselect(const Tensor &input, const Tensor &indices, const int axis, Tensor &output){ + int gatherAxis = axis < 0 ? input.shape.dim + axis : axis; + vector gatherShape = indexselectShape(input.shape.shape, indices.shape.shape, gatherAxis); + if (gatherShape.empty()||gatherShape!=output.shape.shape) { - throw TensorShapeError("Gather shape mismatch"); + throw TensorShapeError("Indexselect shape mismatch"); } - int gatherAxis = axis < 0 ? input.shape.dim + axis : axis; - launch_gather(input.data, input.shape.strides.data(), input.shape.dim, + + launch_indexselect(input.data, input.shape.strides.data(), input.shape.dim, indices.data, indices.shape.strides.data(), indices.shape.dim, gatherAxis, - output.data,output.shape.size);//output和indices的shape相同,共享strides等 + output.data,output.shape.strides.data(),output.shape.dim,output.shape.size); } }; } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 3a2a4032..00d338fe 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -21,8 +21,7 @@ namespace deepx::tensorfunc { static void print(const Tensor &t, const std::string &f = "") { - int bytes = precision_bits(t.shape.dtype) / 8; - size_t total_bytes = t.shape.size * bytes; + int64_t total_bytes = t.shape.bytes(); // 统一分配CPU内存 unsigned char *host_data = new unsigned char[total_bytes]; @@ -48,8 +47,7 @@ namespace deepx::tensorfunc { static void print(const Tensor &t, const std::string &f = "") { - int bytes = precision_bits(t.shape.dtype) / 8; - size_t total_bytes = t.shape.size * bytes; + int64_t total_bytes = t.shape.bytes(); // 统一分配CPU内存 unsigned char *host_data = new unsigned char[total_bytes]; @@ -90,8 +88,7 @@ namespace deepx::tensorfunc { static void print(const Tensor &t, const std::string &f = "") { - int bytes = precision_bits(t.shape.dtype) / 8; - size_t total_bytes = t.shape.size * bytes; + int64_t total_bytes = t.shape.bytes(); // 统一分配CPU内存 unsigned char *host_data = new unsigned char[total_bytes]; @@ -137,8 +134,7 @@ namespace deepx::tensorfunc shape_fs.close(); // 保存data - int bytes = precision_bits(tensor.shape.dtype) / 8; - size_t total_bytes = tensor.shape.size * bytes; + int64_t total_bytes = tensor.shape.bytes(); // 统一分配CPU内存 unsigned char *host_data = new unsigned char[total_bytes]; @@ -190,7 +186,7 @@ namespace deepx::tensorfunc std::ifstream data_fs(datapath, std::ios::binary); data_fs.seekg(0, std::ios::end); std::streamsize fileSize = data_fs.tellg(); - std::streamsize expectedSize = shape.size * precision_bits(shape.dtype) / 8; + std::streamsize expectedSize = shape.bytes(); if (fileSize != expectedSize) { diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp index fdcb0f17..8e776a14 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp @@ -52,5 +52,8 @@ namespace deepx::tensorfunc dst.shape=src.shape; dst.copyer(src.data, dst.data, src.shape.size); } + + //rename + } #endif // DEEPX_TENSORFUNC_TENSORLIFE_MIAOBYTE_HPP diff --git a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp index d869a435..8b4604f9 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp @@ -344,14 +344,14 @@ namespace deepx::tf } }; - // gather + // indexselect template - class Gather : public TF + class IndexSelect : public TF { public: - Gather(const vector &args, const vector &returns) + IndexSelect(const vector &args, const vector &returns) { - this->name = "gather"; + this->name = "indexselect"; this->metadata.author = Author::name(); this->tftype = "changeshape"; this->args = args; @@ -360,11 +360,11 @@ namespace deepx::tf string math_formula() const override { - return "T2 = T1.gather(indices=[1,2], axis=1)"; + return "T2 = T1.indexselect(index=[1,2], axis=1)"; } shared_ptr clone() const override { - return make_shared>(*this); + return make_shared>(*this); } int run(shared_ptr mem, string &error) override { @@ -377,10 +377,10 @@ namespace deepx::tf error = "output_type " + precision_str(output_type) + " or input_type " + precision_str(input_type) + " must be the same"; return 1; } - Precision indices_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; - if (indices_type != Precision::Int64 && indices_type != Precision::Int32) + Precision index_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; + if (index_type != Precision::Int64 && index_type != Precision::Int32) { - error = "indices_type " + precision_str(indices_type) + " only support " + precision_str(Precision::Int64) + " or " + precision_str(Precision::Int32); + error = "index_type " + precision_str(index_type) + " only support " + precision_str(Precision::Int64) + " or " + precision_str(Precision::Int32); return 1; } @@ -388,97 +388,97 @@ namespace deepx::tf { case Precision::Float64: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Float32: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Float16: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::BFloat16: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int64: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int32: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int16: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } case Precision::Int8: { - if (indices_type == Precision::Int64) + if (index_type == Precision::Int64) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } - else if (indices_type == Precision::Int32) + else if (index_type == Precision::Int32) { - gather(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); + indexselect(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), axis, *mem->gettensor(this->returns[0].textvalue)); } break; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp index 43041188..620b81e1 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp @@ -248,5 +248,39 @@ namespace deepx::tf return make_shared(*this); } }; + + //rename + class RenameTensor : public TF + { + public: + RenameTensor(vector args, vector returns) + { + this->name = "renametensor"; + this->tftype = "tensorlife"; + this->args = args; + this->returns = returns; + } + int run(shared_ptr mem, string &error) override + { + string old_name = this->args[0].textvalue; + if (!checktensors({this->args[0].textvalue}, mem, error) != 0) + { + return 1; + } + + string new_name = this->args[1].textvalue; + + mem->rename_tensor(old_name, new_name); + return 0; + } + string math_formula() const override + { + return "rename T1 to T2"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + }; } #endif // DEEPX_TF_TENSORLIFE_HPP diff --git a/front/py/deepx/scheduler/client/udpconn.py b/front/py/deepx/scheduler/client/udpconn.py index a25b0963..6a12c26a 100644 --- a/front/py/deepx/scheduler/client/udpconn.py +++ b/front/py/deepx/scheduler/client/udpconn.py @@ -3,7 +3,7 @@ import select class UDPConn: - def __init__(self, endpoint: str = "localhost:8080"): + def __init__(self, endpoint: str = "localhost:9090"): # 解析endpoint self._host, port_str = endpoint.split(':') self._port = int(port_str) From ab1269ccccc4a524873f935f9f6efcc0915e3610 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Wed, 23 Apr 2025 18:19:45 +0800 Subject: [PATCH 5/5] =?UTF-8?q?nn.module:=E7=A7=BB=E9=99=A4=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E6=B3=A8=E5=86=8C=EF=BC=8C=E6=94=B9=E4=B8=BA=E6=89=8B?= =?UTF-8?q?=E5=8A=A8=E6=B3=A8=E5=86=8Ctensor=E3=80=81module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/language.md | 52 ++ .../nn/functional/leaffunc_changeshape.py | 2 +- front/py/deepx/nn/modules/linear.py | 6 +- front/py/deepx/nn/modules/module.py | 14 +- front/py/deepx/nn/modules/sparse.torch.py | 512 ------------------ front/py/deepx/tensor/tensor.py | 11 +- .../py/examples/4_transformer/llama/1_rope.py | 0 todo/deepxpy.md | 10 - todo/excuter/metal/metal.md | 3 - todo/excuter/ompsimd/ompsimd.md | 3 - todo/scheduler.md | 7 - 11 files changed, 66 insertions(+), 554 deletions(-) create mode 100644 doc/language.md delete mode 100644 front/py/deepx/nn/modules/sparse.torch.py create mode 100644 front/py/examples/4_transformer/llama/1_rope.py delete mode 100644 todo/deepxpy.md delete mode 100644 todo/excuter/metal/metal.md delete mode 100644 todo/excuter/ompsimd/ompsimd.md delete mode 100644 todo/scheduler.md diff --git a/doc/language.md b/doc/language.md new file mode 100644 index 00000000..eefde765 --- /dev/null +++ b/doc/language.md @@ -0,0 +1,52 @@ +## c++:计算执行器(excuter) + +负责实现tensor的具体计算过程,对接硬件如GPU、CPU的simd指令 + +除了c++,也就只有编译器能干这样的脏活累活了 + +deepx用到了以下库,都是c++是实现 + +cblas +openmp +c++可以和汇编结合,从而最大程度发挥cpu、gpu寄存器的性能 + +cuda是c++的语言子集,也可以看作是c++ + + +## python:模型前端构建 +python提供了类似pytorch的库,便于调试和验证模型算法 + +deepx/tensor/ +deepx/nn/deepxIR +deepx/nn.module/ +deepx/nn.functional +通过这些库,我们可以快速的搭建一个模型结构 + +## golang:运维、监控、分布式,深度学习训推自动化的维护者 + +与pytorch、tensorflow不同,deepx追求分布式过程自动化,因此python侧不参与分布式 + +deepxctl:提供对deepx体系的所有工具、库、模型、镜像的统一纳管 + + + +## deepxIR +虽然deepxIR不是独立的编程语言,但是deepx体系的程序格式标准 + +excuter所执行的内容,就是deepxir的序列或deepxir计算图 + +https://github.com/array2d/deepx/blob/main/doc/excuter/op-mem-cuda/list.md + +deepxir分为3类 + +计算:tensor这些系列elementwise、changeshape、tensorlife、io、reduce、init +指令结构: +queue[deepxIR],串行指令,有前后执行顺序 +parallel[deepxIR],可并行的指令,无顺序依赖,可并行 +以上指令为静态图所需的指令,运行过程是确定的。 + +分支:goto、ifelse +分支指令会让计算图行为不可预测,也就是动态部分 + +控制:parse、run等特殊自定义指令 +控制指令是deepx分布式系统内置的各个组件控制指令 \ No newline at end of file diff --git a/front/py/deepx/nn/functional/leaffunc_changeshape.py b/front/py/deepx/nn/functional/leaffunc_changeshape.py index cee587bf..c9501755 100644 --- a/front/py/deepx/nn/functional/leaffunc_changeshape.py +++ b/front/py/deepx/nn/functional/leaffunc_changeshape.py @@ -43,7 +43,7 @@ def permute(t:Tensor, def transpose(t:Tensor,out:Union[Tensor,str]='')->Tensor: dimorder = list(range(t.ndim)) dimorder[-1],dimorder[-2]=dimorder[-2],dimorder[-1] - return permute(t,dimorder,out) + return permute(t,tuple(dimorder),out) diff --git a/front/py/deepx/nn/modules/linear.py b/front/py/deepx/nn/modules/linear.py index f1eb86e3..c4f05194 100644 --- a/front/py/deepx/nn/modules/linear.py +++ b/front/py/deepx/nn/modules/linear.py @@ -21,8 +21,10 @@ def __init__( self.in_features = in_features self.out_features = out_features self.weight = Tensor(shape=(out_features, in_features),dtype=dtype) + self.register_parameter("weight",self.weight) if bias: self.bias = Tensor(shape=(out_features,),dtype=dtype) + self.register_parameter("bias",self.bias) else: self.register_parameter("bias", None) self.reset_parameters() @@ -42,9 +44,9 @@ def forward(self, input: Tensor) -> Tensor: y=input @ self.weight.T oldshape=y.shape if self.bias is not None: - y.reshape_(y.shape[1]) + y.reshape_(tuple(y.shape[1:])) y=y+self.bias - y.reshape_(*oldshape) + y.reshape_(oldshape) return y def extra_repr(self) -> str: diff --git a/front/py/deepx/nn/modules/module.py b/front/py/deepx/nn/modules/module.py index 6966bebd..003223ad 100644 --- a/front/py/deepx/nn/modules/module.py +++ b/front/py/deepx/nn/modules/module.py @@ -9,7 +9,7 @@ def __init__(self, name: Optional[str] = None): self._parent: Optional[Module] = None self._modules: OrderedDict[str, Module] = OrderedDict() self._parameters: OrderedDict[str, Tensor] = OrderedDict() - + def _generate_default_name(self) -> str: class_name = self.__class__.__name__ base_name = class_name.lower() @@ -17,6 +17,7 @@ def _generate_default_name(self) -> str: self.__class__._instance_counter = 0 count = self.__class__._instance_counter self.__class__._instance_counter += 1 + return count return f"{base_name}_{count}" @property @@ -25,16 +26,7 @@ def full_name(self): return self._name else: return f"{self._parent.full_name}.{self._name}" - - # def __setattr__(self, name: str, value: Any) -> None: - # if not name.startswith('_'): - # if isinstance(value, Module): - # self.register_module(name, value) - # elif isinstance(value, Tensor): - # self.register_parameter(name, value) - # # 使用父类方法设置属性,避免递归 - # super().__setattr__(name, value) - + def register_module(self, name: str, module: Optional['Module']) -> None: if module is None: self._modules.pop(name, None) diff --git a/front/py/deepx/nn/modules/sparse.torch.py b/front/py/deepx/nn/modules/sparse.torch.py deleted file mode 100644 index a0621543..00000000 --- a/front/py/deepx/nn/modules/sparse.torch.py +++ /dev/null @@ -1,512 +0,0 @@ -# mypy: 允许无类型定义的函数 -from typing import Optional - -import torch -from torch import Tensor -from torch.nn import functional as F, init -from torch.nn.parameter import Parameter - -from .module import Module - - -__all__ = ["Embedding", "EmbeddingBag"] - - -class Embedding(Module): - r"""一个存储固定字典和大小的嵌入向量的简单查找表。 - - 该模块常用于存储词嵌入并通过索引检索它们。 - 模块的输入是索引列表,输出是对应的词嵌入向量。 - - 参数: - num_embeddings (int): 嵌入字典的大小(词汇表大小) - embedding_dim (int): 每个嵌入向量的维度 - padding_idx (int, 可选): 如果指定,该索引位置的条目不参与梯度计算; - 因此,该位置的嵌入向量在训练中不会更新,保持为固定的"填充"向量。 - 对于新创建的嵌入层,该位置的嵌入向量默认全零,但可更新为其他值作为填充向量。 - max_norm (float, 可选): 如果指定,范数超过此值的嵌入向量会被重新归一化到该范数 - norm_type (float, 可选): 计算max_norm时使用的p范数(默认L2范数,p=2) - scale_grad_by_freq (bool, 可选): 如果为True,梯度会按mini-batch中词的频率倒数缩放(默认False) - sparse (bool, 可选): 如果为True,权重矩阵的梯度将是稀疏张量(详见注释) - - 属性: - weight (Tensor): 模块的可学习权重,形状为(num_embeddings, embedding_dim), - 初始化为正态分布N(0, 1) - - 形状: - - 输入: :math:`(*)`, 任意形状的IntTensor或LongTensor,包含要提取的索引 - - 输出: :math:`(*, H)`, 其中*是输入形状,H=embedding_dim - - .. 注意:: - 注意只有部分优化器支持稀疏梯度:目前支持的有SGD(CPU和CUDA)、SparseAdam(CPU和CUDA)、Adagrad(CPU) - - .. 注意:: - 当max_norm不为None时,嵌入层的前向传播会原地修改weight张量。 - 由于梯度计算所需的张量不能被原地修改,因此在调用前向传播前对weight进行可微操作时, - 若max_norm不为None则需要克隆weight。例如:: - - n, d, m = 3, 5, 7 - embedding = nn.Embedding(n, d, max_norm=1.0) - W = torch.randn((m, d), requires_grad=True) - idx = torch.tensor([1, 2]) - a = embedding.weight.clone() @ W.t() # weight必须克隆以保证可微性 - b = embedding(idx) @ W.t() # 原地修改weight - out = (a.unsqueeze(0) + b.unsqueeze(1)) - loss = out.sigmoid().prod() - loss.backward() - - 示例:: - - >>> # 包含10个3维张量的嵌入层 - >>> embedding = nn.Embedding(10, 3) - >>> # 2个样本,每个包含4个索引的批次 - >>> input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]]) - >>> # xdoctest: +IGNORE_WANT("non-deterministic") - >>> embedding(input) - tensor([[[-0.0251, -1.6902, 0.7172], - [-0.6431, 0.0748, 0.6969], - [ 1.4970, 1.3448, -0.9685], - [-0.3677, -2.7265, -0.1685]], - - [[ 1.4970, 1.3448, -0.9685], - [ 0.4362, -0.4004, 0.9400], - [-0.6431, 0.0748, 0.6969], - [ 0.9124, -2.3616, 1.1151]]]) - - - >>> # 带padding_idx的示例 - >>> embedding = nn.Embedding(10, 3, padding_idx=0) - >>> input = torch.LongTensor([[0, 2, 0, 5]]) - >>> embedding(input) - tensor([[[ 0.0000, 0.0000, 0.0000], - [ 0.1535, -2.0309, 0.9315], - [ 0.0000, 0.0000, 0.0000], - [-0.1655, 0.9897, 0.0635]]]) - - >>> # 修改填充向量的示例 - >>> padding_idx = 0 - >>> embedding = nn.Embedding(3, 3, padding_idx=padding_idx) - >>> embedding.weight - Parameter containing: - tensor([[ 0.0000, 0.0000, 0.0000], - [-0.7895, -0.7089, -0.0364], - [ 0.6778, 0.5803, 0.2678]], requires_grad=True) - >>> with torch.no_grad(): - ... embedding.weight[padding_idx] = torch.ones(3) - >>> embedding.weight - Parameter containing: - tensor([[ 1.0000, 1.0000, 1.0000], - [-0.7895, -0.7089, -0.0364], - [ 0.6778, 0.5803, 0.2678]], requires_grad=True) - """ - - __constants__ = [ - "num_embeddings", - "embedding_dim", - "padding_idx", - "max_norm", - "norm_type", - "scale_grad_by_freq", - "sparse", - ] - - num_embeddings: int - embedding_dim: int - padding_idx: Optional[int] - max_norm: Optional[float] - norm_type: float - scale_grad_by_freq: bool - weight: Tensor - freeze: bool - sparse: bool - - def __init__( - self, - num_embeddings: int, - embedding_dim: int, - padding_idx: Optional[int] = None, - max_norm: Optional[float] = None, - norm_type: float = 2.0, - scale_grad_by_freq: bool = False, - sparse: bool = False, - _weight: Optional[Tensor] = None, - _freeze: bool = False, - device=None, - dtype=None, - ) -> None: - factory_kwargs = {"device": device, "dtype": dtype} - super().__init__() - self.num_embeddings = num_embeddings - self.embedding_dim = embedding_dim - if padding_idx is not None: - if padding_idx > 0: - assert ( - padding_idx < self.num_embeddings - ), "Padding_idx必须在num_embeddings范围内" - elif padding_idx < 0: - assert ( - padding_idx >= -self.num_embeddings - ), "Padding_idx必须在num_embeddings范围内" - padding_idx = self.num_embeddings + padding_idx - self.padding_idx = padding_idx - self.max_norm = max_norm - self.norm_type = norm_type - self.scale_grad_by_freq = scale_grad_by_freq - if _weight is None: - self.weight = Parameter( - torch.empty((num_embeddings, embedding_dim), **factory_kwargs), - requires_grad=not _freeze, - ) - self.reset_parameters() - else: - assert list(_weight.shape) == [ - num_embeddings, - embedding_dim, - ], "权重形状与num_embeddings和embedding_dim不匹配" - self.weight = Parameter(_weight, requires_grad=not _freeze) - - self.sparse = sparse - - def reset_parameters(self) -> None: - init.normal_(self.weight) # 正态分布初始化权重 - self._fill_padding_idx_with_zero() # 填充索引位置归零 - - def _fill_padding_idx_with_zero(self) -> None: - if self.padding_idx is not None: - with torch.no_grad(): # 不计算梯度 - self.weight[self.padding_idx].fill_(0) # 填充位置设为0 - - def forward(self, input: Tensor) -> Tensor: - return F.embedding( - input, - self.weight, - self.padding_idx, - self.max_norm, - self.norm_type, - self.scale_grad_by_freq, - self.sparse, - ) - - def extra_repr(self) -> str: - s = "{num_embeddings}, {embedding_dim}" - if self.padding_idx is not None: - s += ", padding_idx={padding_idx}" - if self.max_norm is not None: - s += ", max_norm={max_norm}" - s += ", max_norm={max_norm}" - if self.norm_type != 2: - s += ", norm_type={norm_type}" - if self.scale_grad_by_freq is not False: - s += ", scale_grad_by_freq={scale_grad_by_freq}" - if self.sparse is not False: - s += ", sparse=True" - return s.format(**self.__dict__) - - @classmethod - def from_pretrained( - cls, - embeddings, - freeze=True, - padding_idx=None, - max_norm=None, - norm_type=2.0, - scale_grad_by_freq=False, - sparse=False, - ): - r"""从给定的2维FloatTensor创建Embedding实例。 - - 参数: - embeddings (Tensor): 包含嵌入权重的FloatTensor, - 第一维作为num_embeddings,第二维作为embedding_dim。 - freeze (bool, 可选): 若为True,张量在学习过程中不更新, - 相当于embedding.weight.requires_grad = False。默认True。 - padding_idx (int, 可选): 同模块初始化文档说明。 - max_norm (float, 可选): 同模块初始化文档说明。 - norm_type (float, 可选): 同模块初始化文档说明,默认2。 - scale_grad_by_freq (bool, 可选): 同模块初始化文档说明,默认False。 - sparse (bool, 可选): 同模块初始化文档说明。 - - 示例:: - - >>> # 包含预训练权重的FloatTensor - >>> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]]) - >>> embedding = nn.Embedding.from_pretrained(weight) - >>> # 获取索引1的嵌入 - >>> input = torch.LongTensor([1]) - >>> # xdoctest: +IGNORE_WANT("non-deterministic") - >>> embedding(input) - tensor([[ 4.0000, 5.1000, 6.3000]]) - """ - assert ( - embeddings.dim() == 2 - ), "Embeddings参数应为2维张量" - rows, cols = embeddings.shape - embedding = cls( - num_embeddings=rows, - embedding_dim=cols, - _weight=embeddings, - _freeze=freeze, - padding_idx=padding_idx, - max_norm=max_norm, - norm_type=norm_type, - scale_grad_by_freq=scale_grad_by_freq, - sparse=sparse, - ) - return embedding - - -class EmbeddingBag(Module): - r"""计算嵌入"袋"的和或均值,无需实例化中间嵌入。 - - 对于固定长度的袋、无per_sample_weights、无等于padding_idx的索引,且输入为2D时, - 该类的行为如下: - * mode="sum"等价于Embedding层后接torch.sum(dim=1) - * mode="mean"等价于Embedding层后接torch.mean(dim=1) - * mode="max"等价于Embedding层后接torch.max(dim=1) - - 但EmbeddingBag比链式操作更节省时间和内存。 - - EmbeddingBag还支持在正向传播时传入样本权重, - 这会在按mode指定的方式进行加权归约前缩放嵌入输出。 - 若传入per_sample_weights,仅支持mode="sum",即按权重计算加权和。 - - 参数: - num_embeddings (int): 嵌入字典的大小(词汇表大小) - embedding_dim (int): 每个嵌入向量的维度 - max_norm (float, 可选): 若指定,范数超过此值的嵌入向量会被重新归一化到该范数 - norm_type (float, 可选): 计算max_norm时使用的p范数(默认L2范数,p=2) - scale_grad_by_freq (bool, 可选): 若为True,梯度会按mini-batch中词的频率倒数缩放(默认False)。 - 注意:mode="max"时不支持此选项。 - mode (str, 可选): "sum"、"mean"或"max",指定袋的归约方式。 - "sum"计算加权和(考虑per_sample_weights), - "mean"计算袋内平均值,"max"计算袋内最大值。默认"mean"。 - sparse (bool, 可选): 若为True,权重矩阵的梯度将是稀疏张量(详见注释)。 - 注意:mode="max"时不支持此选项。 - include_last_offset (bool, 可选): 若为True,offsets包含一个额外元素, - 其值等于indices的长度,符合CSR格式。 - padding_idx (int, 可选): 若指定,该索引位置的条目不参与梯度计算; - 因此,该位置的嵌入向量在训练中不会更新,保持为固定的"填充"向量。 - 对于新创建的EmbeddingBag,该位置的嵌入向量默认全零, - 但可更新为其他值作为填充向量。注意该位置的嵌入向量会被排除在归约之外。 - - 属性: - weight (Tensor): 模块的可学习权重,形状为(num_embeddings, embedding_dim), - 初始化为正态分布N(0, 1)。 - - 示例:: - - >>> # 包含10个3维张量的EmbeddingBag(求和模式) - >>> embedding_sum = nn.EmbeddingBag(10, 3, mode='sum') - >>> # 2个样本,每个包含4个索引的输入(展平为1D) - >>> input = torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.long) - >>> offsets = torch.tensor([0, 4], dtype=torch.long) - >>> # xdoctest: +IGNORE_WANT("non-deterministic") - >>> embedding_sum(input, offsets) - tensor([[-0.8861, -5.4350, -0.0523], - [ 1.1306, -2.5798, -1.0044]]) - - >>> # 带padding_idx的示例 - >>> embedding_sum = nn.EmbeddingBag(10, 3, mode='sum', padding_idx=2) - >>> input = torch.tensor([2, 2, 2, 2, 4, 3, 2, 9], dtype=torch.long) - >>> offsets = torch.tensor([0, 4], dtype=torch.long) - >>> embedding_sum(input, offsets) - tensor([[ 0.0000, 0.0000, 0.0000], - [-0.7082, 3.2145, -2.6251]]) - - >>> # 从Embedding加载EmbeddingBag的示例 - >>> embedding = nn.Embedding(10, 3, padding_idx=2) - >>> embedding_sum = nn.EmbeddingBag.from_pretrained( - embedding.weight, - padding_idx=embedding.padding_idx, - mode='sum') - """ - - __constants__ = [ - "num_embeddings", - "embedding_dim", - "max_norm", - "norm_type", - "scale_grad_by_freq", - "mode", - "sparse", - "include_last_offset", - "padding_idx", - ] - - num_embeddings: int - embedding_dim: int - max_norm: Optional[float] - norm_type: float - scale_grad_by_freq: bool - weight: Tensor - mode: str - sparse: bool - include_last_offset: bool - padding_idx: Optional[int] - - def __init__( - self, - num_embeddings: int, - embedding_dim: int, - max_norm: Optional[float] = None, - norm_type: float = 2.0, - scale_grad_by_freq: bool = False, - mode: str = "mean", - sparse: bool = False, - _weight: Optional[Tensor] = None, - include_last_offset: bool = False, - padding_idx: Optional[int] = None, - device=None, - dtype=None, - ) -> None: - factory_kwargs = {"device": device, "dtype": dtype} - super().__init__() - self.num_embeddings = num_embeddings - self.embedding_dim = embedding_dim - self.max_norm = max_norm - self.norm_type = norm_type - self.scale_grad_by_freq = scale_grad_by_freq - if padding_idx is not None: - if padding_idx > 0: - assert ( - padding_idx < self.num_embeddings - ), "padding_idx必须在num_embeddings范围内" - elif padding_idx < 0: - assert ( - padding_idx >= -self.num_embeddings - ), "padding_idx必须在num_embeddings范围内" - padding_idx = self.num_embeddings + padding_idx - self.padding_idx = padding_idx - if _weight is None: - self.weight = Parameter( - torch.empty((num_embeddings, embedding_dim), **factory_kwargs) - ) - self.reset_parameters() - else: - assert list(_weight.shape) == [ - num_embeddings, - embedding_dim, - ], "权重形状与num_embeddings和embedding_dim不匹配" - self.weight = Parameter(_weight) - self.mode = mode - self.sparse = sparse - self.include_last_offset = include_last_offset - - def reset_parameters(self) -> None: - init.normal_(self.weight) # 正态分布初始化权重 - self._fill_padding_idx_with_zero() # 填充索引位置归零 - - def _fill_padding_idx_with_zero(self) -> None: - if self.padding_idx is not None: - with torch.no_grad(): # 不计算梯度 - self.weight[self.padding_idx].fill_(0) # 填充位置设为0 - - def forward( - self, - input: Tensor, - offsets: Optional[Tensor] = None, - per_sample_weights: Optional[Tensor] = None, - ) -> Tensor: - """EmbeddingBag的正向传播。 - - 参数: - input (Tensor): 包含嵌入矩阵索引袋的张量。 - offsets (Tensor, 可选): 仅当input为1D时使用,确定input中每个袋(序列)的起始索引位置。 - per_sample_weights (Tensor, 可选): 浮点/双精度权重张量,None表示所有权重为1。 - 若指定,形状必须与input相同,且在offsets非None时使用相同的偏移量。仅支持mode='sum'。 - - 返回: - 形状为(B, embedding_dim)的张量。 - - .. 注意:: - - 关于input和offsets的说明: - - input和offsets必须同类型(int或long) - - 若input为2D形状(B, N),视为B个固定长度N的袋,返回B个按mode聚合的值,此时offsets被忽略且必须为None。 - - 若input为1D形状(N),视为多个袋(序列)的拼接,offsets必须为1D张量,包含每个袋在input中的起始索引位置。 - 因此,对于形状(B)的offsets,input视为B个袋,空袋(长度为0)返回全零向量。 - """ - return F.embedding_bag( - input, - self.weight, - offsets, - self.max_norm, - self.norm_type, - self.scale_grad_by_freq, - self.mode, - self.sparse, - per_sample_weights, - self.include_last_offset, - self.padding_idx, - ) - - def extra_repr(self) -> str: - s = "{num_embeddings}, {embedding_dim}" - if self.max_norm is not None: - s += ", max_norm={max_norm}" - if self.norm_type != 2: - s += ", norm_type={norm_type}" - if self.scale_grad_by_freq is not False: - s += ", scale_grad_by_freq={scale_grad_by_freq}" - s += ", mode={mode}" - if self.padding_idx is not None: - s += ", padding_idx={padding_idx}" - return s.format(**{k: repr(v) for k, v in self.__dict__.items()}) - - @classmethod - def from_pretrained( - cls, - embeddings: Tensor, - freeze: bool = True, - max_norm: Optional[float] = None, - norm_type: float = 2.0, - scale_grad_by_freq: bool = False, - mode: str = "mean", - sparse: bool = False, - include_last_offset: bool = False, - padding_idx: Optional[int] = None, - ) -> "EmbeddingBag": - r"""从给定的2维FloatTensor创建EmbeddingBag实例。 - - 参数: - embeddings (Tensor): 包含EmbeddingBag权重的FloatTensor, - 第一维作为num_embeddings,第二维作为embedding_dim。 - freeze (bool, 可选): 若为True,张量在学习过程中不更新, - 相当于embeddingbag.weight.requires_grad = False。默认True。 - max_norm (float, 可选): 同模块初始化文档说明,默认None。 - norm_type (float, 可选): 同模块初始化文档说明,默认2。 - scale_grad_by_freq (bool, 可选): 同模块初始化文档说明,默认False。 - mode (str, 可选): 同模块初始化文档说明,默认"mean"。 - sparse (bool, 可选): 同模块初始化文档说明,默认False。 - include_last_offset (bool, 可选): 同模块初始化文档说明,默认False。 - padding_idx (int, 可选): 同模块初始化文档说明,默认None。 - - 示例:: - - >>> # 包含预训练权重的FloatTensor - >>> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]]) - >>> embeddingbag = nn.EmbeddingBag.from_pretrained(weight) - >>> # 获取索引1和0的嵌入袋(2D输入) - >>> input = torch.LongTensor([[1, 0]]) - >>> # xdoctest: +IGNORE_WANT("non-deterministic") - >>> embeddingbag(input) - tensor([[ 2.5000, 3.7000, 4.6500]]) - """ - assert ( - embeddings.dim() == 2 - ), "Embeddings参数应为2维张量" - rows, cols = embeddings.shape - embeddingbag = cls( - num_embeddings=rows, - embedding_dim=cols, - _weight=embeddings, - max_norm=max_norm, - norm_type=norm_type, - scale_grad_by_freq=scale_grad_by_freq, - mode=mode, - sparse=sparse, - include_last_offset=include_last_offset, - padding_idx=padding_idx, - ) - embeddingbag.weight.requires_grad = not freeze - return embeddingbag \ No newline at end of file diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index 7929ba7f..efe2255d 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -4,8 +4,7 @@ Number: TypeAlias = Union[int, float, bool] -tensorid=1 - + class Tensor: #life @@ -19,9 +18,11 @@ def __init__(self,shape:tuple[int,...],dtype:str='float32',name:str=None): self._name = name if name is None or name =='': - global tensorid - self._name =tensorid - tensorid+=1 + if not hasattr(self.__class__, '_instance_counter'): + self.__class__._instance_counter = 0 + count = self.__class__._instance_counter + self.__class__._instance_counter += 1 + self._name = count # dtype self._dtype = dtype diff --git a/front/py/examples/4_transformer/llama/1_rope.py b/front/py/examples/4_transformer/llama/1_rope.py new file mode 100644 index 00000000..e69de29b diff --git a/todo/deepxpy.md b/todo/deepxpy.md deleted file mode 100644 index b784c004..00000000 --- a/todo/deepxpy.md +++ /dev/null @@ -1,10 +0,0 @@ -# DeepX py部分 - -1.scheduder和excuter的通信 - + IR增加时间辍 - + 实现 请求和返回,实现异步计算通信重叠 - -2. - -3. - diff --git a/todo/excuter/metal/metal.md b/todo/excuter/metal/metal.md deleted file mode 100644 index 2022e8f9..00000000 --- a/todo/excuter/metal/metal.md +++ /dev/null @@ -1,3 +0,0 @@ -# Metal - -考虑用macos的metal加速库,支持GPU计算 \ No newline at end of file diff --git a/todo/excuter/ompsimd/ompsimd.md b/todo/excuter/ompsimd/ompsimd.md deleted file mode 100644 index 6b97be76..00000000 --- a/todo/excuter/ompsimd/ompsimd.md +++ /dev/null @@ -1,3 +0,0 @@ -# OMP SIMD - -当前开发中 \ No newline at end of file diff --git a/todo/scheduler.md b/todo/scheduler.md deleted file mode 100644 index 4f0cf0ec..00000000 --- a/todo/scheduler.md +++ /dev/null @@ -1,7 +0,0 @@ - -## 性能监控 - -通过IR消息的send_at,recv_at,done_at,计算IR的计算耗时 - -## 算子注册和融合 -