diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index ada03a395b3..f3f13167d90 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -35,34 +35,11 @@ #include #include -GgmlOvDecoder::GgmlOvDecoder(ggml_tensor * node, - ggml_cgraph * cgraph, - bool is_static, - int context_size, - int context_size_swa, - int num_heads, - int num_heads_kv, - int head_size, - const std::vector & swa_layers) : - m_is_static(is_static), - m_cgraph(cgraph), - m_node(node), - m_op_name(std::string(node->name)), - m_ctx(context_size), - m_ctx_swa(context_size_swa), - m_n_heads(num_heads), - m_n_heads_kv(num_heads_kv), - m_head_size(head_size), - m_swa_layers(swa_layers) { - set_input_output(node); -} - GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map> & model_weights, bool is_static) : m_is_static(is_static), m_cgraph(cgraph), - m_op_name(m_node ? std::string(m_node->name) : ""), m_model_weights(model_weights) { if (auto * env = getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS"); env && std::string(env) != "0") { #ifdef _WIN32 @@ -82,6 +59,11 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, set_input_output(cur_node); } + for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) { + m_node_info_list[node_n].node_op_case = compute_op_case(m_node_info_list[node_n].node); + m_node_info_list[node_n].node_op_type = compute_op_type(m_node_info_list[node_n].node); + } + add_extra_inputs(); } @@ -103,6 +85,7 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::mapop == GGML_OP_SET_ROWS) { // SET_ROWS updates the tensor in place. For later ov op that uses the // the view_src of SET_ROWS, we need to make sure they get the updated tensor @@ -116,6 +99,12 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { m_output_names.push_back(node_name); m_outputs[node_name] = node; + current_node_info.node = node; + current_node_info.node_name = node_name; + current_node_info.node_outputs[node_name] = node; + current_node_info.node_outputs_names.push_back(node_name); + current_node_info.node_op_case = 0; + for (int i = 0; i < GGML_MAX_SRC; i++) { auto * src = node->src[i]; if (src == nullptr) { @@ -124,7 +113,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { std::string src_name = std::string(src->name); m_input_names.push_back(src_name); m_inputs[src_name] = src; - m_op_node_name.emplace_back(src_name, ggml_op_name(node->op)); + current_node_info.node_inputs[src_name] = src; + current_node_info.node_inputs_names.push_back(src_name); // Add model inputs and weights constants, if called for the whole graph if (naive) { @@ -136,7 +126,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { m_model_inputs[src_name] = param_node; } - } else if (!m_node && !src->view_src) { + } else if (!src->view_src) { ggml_backend_buffer * buffer = src->buffer; if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || src->flags & GGML_TENSOR_FLAG_INPUT) { @@ -159,7 +149,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { // Add model outputs, if called for the whole graph if (naive) { m_model_output_names.push_back(node_name); - } else if (!m_node) { + } else { // Model outputs are tensors with GGML_TENSOR_FLAG_OUTPUT flag and kv_caches static std::set debug_output_names = {}; // Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph @@ -178,92 +168,92 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { } } - if (m_node) { - switch (node->op) { - case GGML_OP_RESHAPE: { - auto * src = node->src[0]; - if (src->op == GGML_OP_RESHAPE && src->src[0]->ne[0] == node->ne[0] && src->src[0]->ne[1] == node->ne[1]) { - m_op_case = 4; - } else if (node->ne[0] * node->ne[1] == src->ne[0]) { - m_op_case = 1; - } else if (src->ne[0] * src->ne[1] == node->ne[0]) { - m_op_case = 2; - if (src->ne[2] * src->ne[3] == node->ne[1]) { - m_op_case = 5; - } - } else if (src->ne[0] * src->ne[1] == node->ne[1]) { - m_op_case = 3; - } else if (src->ne[1] * src->ne[2] == node->ne[1]) { - m_op_case = 6; + m_node_info_list.push_back(current_node_info); +} + +int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) { + int op_case = 0; + switch (node->op) { + case GGML_OP_RESHAPE: { + auto * src = node->src[0]; + if (src->op == GGML_OP_RESHAPE && src->src[0]->ne[0] == node->ne[0] && src->src[0]->ne[1] == node->ne[1]) { + op_case = 4; + } else if (node->ne[0] * node->ne[1] == src->ne[0]) { + op_case = 1; + } else if (src->ne[0] * src->ne[1] == node->ne[0]) { + op_case = 2; + if (src->ne[2] * src->ne[3] == node->ne[1]) { + op_case = 5; } - break; + } else if (src->ne[0] * src->ne[1] == node->ne[1]) { + op_case = 3; + } else if (src->ne[1] * src->ne[2] == node->ne[1]) { + op_case = 6; } - case GGML_OP_CONT: { - if (node->src[0]->op == GGML_OP_PERMUTE) { - m_op_case = 1; - } else if (node->src[0]->op == GGML_OP_TRANSPOSE) { - m_op_case = 2; - } else if (node->src[0]->op == GGML_OP_VIEW) { - // The input comes from a VIEW which is subtensor - m_op_case = 3; - } - break; + break; + } + case GGML_OP_CONT: { + if (node->src[0]->op == GGML_OP_PERMUTE) { + op_case = 1; + } else if (node->src[0]->op == GGML_OP_TRANSPOSE) { + op_case = 2; + } else if (node->src[0]->op == GGML_OP_VIEW) { + op_case = 3; } - case GGML_OP_PERMUTE: { - if (node->src[0]->op != GGML_OP_VIEW) { - m_op_case = 1; - } else if (ggml_is_contiguous(node->src[0])) { - std::string src_name(node->view_src->name); - if (src_name.find("cache") == std::string::npos) { - // permute Qcur - m_op_case = 4; + break; + } + case GGML_OP_PERMUTE: { + if (node->src[0]->op != GGML_OP_VIEW) { + op_case = 1; + } else if (ggml_is_contiguous(node->src[0])) { + std::string src_name(node->view_src->name); + if (src_name.find("cache") == std::string::npos) { + op_case = 4; + } else { + int layer = extract_layer_from_name(src_name); + if (!is_swa_layer(layer)) { + op_case = 2; } else { - // Permute kv cache (view) - int layer = extract_layer_from_name(src_name); - if (!is_swa_layer(layer)) { - m_op_case = 2; - } else { - m_op_case = 3; - } + op_case = 3; } } - break; } - case GGML_OP_MUL_MAT: { - if (node->src[0]->op == GGML_OP_CONT && node->src[0]->src[0]->op == GGML_OP_TRANSPOSE) { - m_op_case = 2; - } else if (node->src[0]->op == GGML_OP_VIEW && node->src[1]->op == GGML_OP_VIEW) { - // test-backend-ops case - m_op_case = 3; - } - break; + break; + } + case GGML_OP_MUL_MAT: { + if (node->src[0]->op == GGML_OP_CONT && node->src[0]->src[0]->op == GGML_OP_TRANSPOSE) { + op_case = 2; + } else if (node->src[0]->op == GGML_OP_VIEW && node->src[1]->op == GGML_OP_VIEW) { + op_case = 3; } - case GGML_OP_GET_ROWS: { - if (node->src[1]->op == GGML_OP_VIEW) { - m_op_case = 2; - } - break; + break; + } + case GGML_OP_GET_ROWS: { + if (node->src[1]->op == GGML_OP_VIEW) { + op_case = 2; } - case GGML_OP_ROPE: { - if (node->src[0]->op == GGML_OP_VIEW) { - m_op_case = 2; - } - break; + break; + } + case GGML_OP_ROPE: { + if (node->src[0]->op == GGML_OP_VIEW) { + op_case = 2; } - case GGML_OP_VIEW: { - if (node->src[0]->op == GGML_OP_VIEW) { - auto * src = node->src[0]; - if (ggml_nelements(node) != ggml_nelements(src)) { - throw std::runtime_error("Unsupported VIEW case"); - } - // This view is a reshape, slicing happens at src->op - m_op_case = 2; + break; + } + case GGML_OP_VIEW: { + if (node->src[0]->op == GGML_OP_VIEW) { + auto * src = node->src[0]; + if (ggml_nelements(node) != ggml_nelements(src)) { + throw std::runtime_error("Unsupported VIEW case"); } + op_case = 2; } - default: - break; - } + break; } + default: + break; + } + return op_case; } int extract_layer_from_name(const std::string & name) { @@ -721,10 +711,18 @@ ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string & name) const return ov::PartialShape(get_shape(m_inputs.at(name))); } +ov::PartialShape GgmlOvDecoder::get_input_shape(int node_idx, const std::string & name) const { + return ov::PartialShape(get_shape(m_node_info_list[node_idx].node_inputs.at(name))); +} + std::vector GgmlOvDecoder::get_input_stride(const std::string & name) const { return get_stride(m_inputs.at(name)); } +std::vector GgmlOvDecoder::get_input_stride(int node_idx, const std::string & name) const { + return get_stride(m_node_info_list[node_idx].node_inputs.at(name)); +} + ov::element::Type GgmlOvDecoder::get_input_type(const std::string & name) const { return get_ov_type(m_inputs.at(name)); } @@ -733,15 +731,18 @@ size_t GgmlOvDecoder::get_input_size() const { return m_input_names.size(); } -std::string & GgmlOvDecoder::get_input_name(size_t index) const { - m_name = m_input_names[index]; - return m_name; +size_t GgmlOvDecoder::get_input_size(int node_idx) const { + return m_node_info_list[node_idx].node_inputs_names.size(); } std::vector GgmlOvDecoder::get_input_names() const { return m_input_names; } +std::vector GgmlOvDecoder::get_input_names(int node_idx) const { + return m_node_info_list[node_idx].node_inputs_names; +} + std::vector GgmlOvDecoder::get_output_stride(const std::string & name) const { return get_stride(m_outputs.at(name)); } @@ -754,40 +755,58 @@ ov::PartialShape GgmlOvDecoder::get_output_shape(const std::string & name) const return ov::PartialShape(get_shape(ggml_tensor)); } -ov::element::Type GgmlOvDecoder::get_output_type(const std::string & name) const { - return get_ov_type(m_outputs.at(name)); +ov::PartialShape GgmlOvDecoder::get_output_shape(int node_idx, const std::string & name) const { + auto * ggml_tensor = m_node_info_list[node_idx].node_outputs.at(name); + if (ggml_tensor->op == GGML_OP_SET_ROWS) { + ggml_tensor = ggml_tensor->view_src; + } + return ov::PartialShape(get_shape(ggml_tensor)); } -std::string & GgmlOvDecoder::get_output_name(size_t index) const { - m_name = std::string(m_output_names[index]); - return m_name; +ov::element::Type GgmlOvDecoder::get_output_type(const std::string & name) const { + return get_ov_type(m_outputs.at(name)); } std::vector GgmlOvDecoder::get_output_names() const { return m_output_names; } +std::vector GgmlOvDecoder::get_output_names(int node_idx) const { + return m_node_info_list[node_idx].node_outputs_names; +} + const std::string & GgmlOvDecoder::get_op_name() const { - return m_op_name; + static const std::string unknown_name = "UNKNOWN_OP_NAME"; + return unknown_name; +} + +const std::string & GgmlOvDecoder::get_op_name(int node_idx) const { + return m_node_info_list[node_idx].node_name; } int32_t * GgmlOvDecoder::get_input_op_params(const std::string & name) const { return m_inputs.at(name)->op_params; } +int32_t * GgmlOvDecoder::get_input_op_params(int node_idx, const std::string & name) const { + return m_node_info_list[node_idx].node_inputs.at(name)->op_params; +} + int32_t * GgmlOvDecoder::get_output_op_params(const std::string & name) const { return m_outputs.at(name)->op_params; } -void GgmlOvDecoder::visit_subgraph(std::function)> node_visitor) const { - for (const auto & node : m_nodes) { - auto decoder = std::make_shared(node, m_cgraph, m_is_static, m_ctx, m_ctx_swa, m_n_heads, - m_n_heads_kv, m_head_size, m_swa_layers); - node_visitor(decoder); +int32_t * GgmlOvDecoder::get_output_op_params(int node_idx, const std::string & name) const { + return m_node_info_list[node_idx].node_outputs.at(name)->op_params; +} + +void GgmlOvDecoder::visit_subgraph(std::function, int node_idx)> node_visitor) const { + for (int node_idx = 0; node_idx < m_cgraph->n_nodes; node_idx++) { + node_visitor(std::make_shared(*this), node_idx); } } -const std::string & GgmlOvDecoder::get_op_type() const { +std::string GgmlOvDecoder::compute_op_type(const ggml_tensor * node) { static const std::map ops = { {GGML_OP_NONE, "GGML_OP_NONE" }, {GGML_OP_ACC, "GGML_OP_ACC" }, @@ -835,14 +854,23 @@ const std::string & GgmlOvDecoder::get_op_type() const { {GGML_GLU_OP_REGLU, "GGML_GLU_OP_REGLU" } }; - switch (m_node->op) { + switch (node->op) { case GGML_OP_UNARY: - return unary_ops.at(ggml_get_unary_op(m_node)); + return unary_ops.at(ggml_get_unary_op(node)); case GGML_OP_GLU: - return glu_ops.at(ggml_get_glu_op(m_node)); + return glu_ops.at(ggml_get_glu_op(node)); default: - return ops.at(m_node->op); + return ops.at(node->op); } static const std::string unknown_op = "UNKNOWN_GGML_OP"; return unknown_op; } + +const std::string & GgmlOvDecoder::get_op_type(int node_idx) const { + return m_node_info_list[node_idx].node_op_type; +} + +const std::string & GgmlOvDecoder::get_op_type() const { + static const std::string unknown_op = "UNKNOWN_GGML_OP"; + return unknown_op; +} diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index e2efc73f17f..6e2bf0486d8 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -13,22 +13,21 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { public: + struct NodeInfo { + ggml_tensor * node; + std::map node_inputs; + std::vector node_inputs_names; + std::map node_outputs; + std::vector node_outputs_names; + int node_op_case = 0; + std::string node_op_type; + std::string node_name; + }; // Graph decoder GgmlOvDecoder(ggml_cgraph * cgraph, std::map> & model_weights, bool is_static); - // Node decoder, called in GgmlOvDecoder::visit_subgraph - GgmlOvDecoder(ggml_tensor * node, - ggml_cgraph * cgraph, - bool is_static, - int context_size, - int context_size_swa, - int num_heads, - int num_heads_kv, - int head_size, - const std::vector & swa_layers); - // Naive graph decoder GgmlOvDecoder(ggml_cgraph * cgraph, std::map> & model_weights); @@ -39,12 +38,18 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { virtual ov::PartialShape get_input_shape(const std::string & name) const override; + virtual ov::PartialShape get_input_shape(int node_idx, const std::string & name) const override; + virtual std::vector get_input_stride(const std::string & name) const override; + virtual std::vector get_input_stride(int node_idx, const std::string & name) const override; + virtual ov::element::Type get_input_type(const std::string & name) const override; virtual size_t get_input_size() const override; + virtual size_t get_input_size(int node_idx) const override; + virtual void get_input_node(size_t input_port_idx, std::string & producer_name, std::string & producer_output_port_name, @@ -55,35 +60,45 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { GGML_UNUSED(producer_output_port_index); } - virtual std::string & get_input_name(size_t index) const override; - virtual std::vector get_input_names() const override; + virtual std::vector get_input_names(int node_idx) const override; + virtual ov::PartialShape get_output_shape(const std::string & name) const override; + virtual ov::PartialShape get_output_shape(int node_idx, const std::string & name) const override; + virtual std::vector get_output_stride(const std::string & name) const override; virtual ov::element::Type get_output_type(const std::string & name) const override; virtual int32_t * get_input_op_params(const std::string & name) const override; + virtual int32_t * get_input_op_params(int node_idx, const std::string & name) const override; + virtual int32_t * get_output_op_params(const std::string & name) const override; - virtual std::string & get_output_name(size_t index) const override; + virtual int32_t * get_output_op_params(int node_idx, const std::string & name) const override; virtual std::vector get_output_names() const override; + virtual std::vector get_output_names(int node_idx) const override; + virtual const std::string & get_op_type() const override; + virtual const std::string & get_op_type(int node_idx) const override; + virtual const std::string & get_op_name() const override; - virtual void visit_subgraph(std::function)> node_visitor) const override; + virtual const std::string & get_op_name(int node_idx) const override; + + virtual void visit_subgraph(std::function, int node_idx)> node_visitor) const override; ggml_tensor * get_input_ggml_tensor(const std::string & name) const { return m_inputs.at(name); } ggml_tensor * get_output_ggml_tensor(const std::string & name) const { return m_outputs.at(name); } - virtual int get_op_case() const override { return m_op_case; } + virtual int get_op_case(int node_idx) const override { return m_node_info_list[node_idx].node_op_case; } virtual const std::map> & get_model_inputs() const override { return m_model_inputs; @@ -150,6 +165,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { static std::vector get_shape(const ggml_tensor * tensor); static std::vector get_stride(const ggml_tensor * tensor); static ov::element::Type get_ov_type(const ggml_tensor * tensor); + int compute_op_case(const ggml_tensor * node); + std::string compute_op_type(const ggml_tensor * node); void set_llm_params(); void validate_cgraph() const; @@ -157,21 +174,18 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { bool m_is_static = false; ggml_cgraph * m_cgraph = nullptr; - ggml_tensor * m_node = nullptr; std::vector m_nodes; std::map m_inputs; std::vector m_input_names; std::map m_outputs; std::vector m_output_names; - std::string m_op_name; - mutable std::string m_name; - int m_op_case = 0; - std::vector> m_op_node_name; + std::map> m_model_inputs; std::map> m_model_extra_inputs; std::map> m_model_extra_input_values; std::map> m_model_weights; std::vector m_model_output_names; + std::vector m_node_info_list; // Fixed for a model int m_ctx = -1; diff --git a/ggml/src/ggml-openvino/openvino/decoder.hpp b/ggml/src/ggml-openvino/openvino/decoder.hpp index 8f86a4de064..1d5b7a850f8 100644 --- a/ggml/src/ggml-openvino/openvino/decoder.hpp +++ b/ggml/src/ggml-openvino/openvino/decoder.hpp @@ -16,42 +16,58 @@ class GgmlDecoder : public DecoderBase { virtual PartialShape get_input_shape(const std::string& name) const = 0; + virtual PartialShape get_input_shape(int node_idx, const std::string& name) const = 0; + virtual std::vector get_input_stride(const std::string& name) const = 0; + virtual std::vector get_input_stride(int node_idx, const std::string& name) const = 0; + virtual element::Type get_input_type(const std::string& name) const = 0; virtual size_t get_input_size() const = 0; + virtual size_t get_input_size(int node_idx) const = 0; + virtual void get_input_node(size_t input_port_idx, std::string& producer_name, std::string& producer_output_port_name, size_t& producer_output_port_index) const = 0; - virtual std::string& get_input_name(size_t index) const = 0; - virtual std::vector get_input_names() const = 0; + virtual std::vector get_input_names(int node_idx) const = 0; + virtual PartialShape get_output_shape(const std::string& name) const = 0; + virtual PartialShape get_output_shape(int node_idx, const std::string& name) const = 0; + virtual std::vector get_output_stride(const std::string& name) const = 0; virtual element::Type get_output_type(const std::string& name) const = 0; virtual int32_t* get_input_op_params(const std::string& name) const = 0; + virtual int32_t* get_input_op_params(int node_idx, const std::string& name) const = 0; + virtual int32_t* get_output_op_params(const std::string& name) const = 0; - virtual std::string& get_output_name(size_t index) const = 0; + virtual int32_t* get_output_op_params(int node_idx, const std::string& name) const = 0; virtual std::vector get_output_names() const = 0; + virtual std::vector get_output_names(int node_idx) const = 0; + virtual const std::string& get_op_type() const = 0; + virtual const std::string& get_op_type(int node_idx) const = 0; + virtual const std::string& get_op_name() const = 0; - virtual void visit_subgraph(std::function)> node_visitor) const = 0; + virtual const std::string& get_op_name(int node_idx) const = 0; + + virtual void visit_subgraph(std::function, int node_idx)> node_visitor) const = 0; - virtual int get_op_case() const = 0; + virtual int get_op_case(int node_idx) const = 0; virtual const std::map>& get_model_inputs() const = 0; virtual const std::map>& get_model_extra_inputs() const = 0; diff --git a/ggml/src/ggml-openvino/openvino/node_context.hpp b/ggml/src/ggml-openvino/openvino/node_context.hpp index 0d76dc83e05..64e3d550c58 100644 --- a/ggml/src/ggml-openvino/openvino/node_context.hpp +++ b/ggml/src/ggml-openvino/openvino/node_context.hpp @@ -18,13 +18,15 @@ class NodeContext : public frontend::NodeContext { public: NodeContext(const std::shared_ptr& decoder, std::shared_ptr& tensor_map, + int node_idx, TranslateSession* translate_session = nullptr) - : ov::frontend::NodeContext(decoder->get_op_type()), + : ov::frontend::NodeContext(decoder->get_op_type(node_idx)), m_decoder(decoder), m_tensor_map(tensor_map), + m_node_idx(node_idx), m_translate_session(translate_session) { - m_input_names = decoder->get_input_names(); - m_output_names = decoder->get_output_names(); + m_input_names = decoder->get_input_names(m_node_idx); + m_output_names = decoder->get_output_names(m_node_idx); } TranslateSession* get_translate_session() const { @@ -34,7 +36,7 @@ class NodeContext : public frontend::NodeContext { const std::vector& get_input_names() const { return m_input_names; } size_t get_input_size() const override { - return m_decoder->get_input_size(); + return m_decoder->get_input_size(m_node_idx); } ov::element::Type get_input_type(size_t index) const { @@ -42,29 +44,25 @@ class NodeContext : public frontend::NodeContext { } PartialShape get_input_shape(size_t index) const { - return m_decoder->get_input_shape(m_input_names[index]); + return m_decoder->get_input_shape(m_node_idx, m_input_names[index]); } std::vector get_input_stride(size_t index) const { - return m_decoder->get_input_stride(m_input_names[index]); + return m_decoder->get_input_stride(m_node_idx, m_input_names[index]); } std::string get_output_name() const { return m_output_names[0]; } PartialShape get_output_shape(size_t index) const { - return m_decoder->get_output_shape(m_output_names[index]); - } - - std::vector get_output_stride(size_t index) const { - return m_decoder->get_output_stride(m_output_names[index]); + return m_decoder->get_output_shape(m_node_idx, m_output_names[index]); } int32_t* get_input_op_params(size_t index) const { - return m_decoder->get_input_op_params(m_input_names[index]); + return m_decoder->get_input_op_params(m_node_idx, m_input_names[index]); } int32_t* get_output_op_params(size_t index) const { - return m_decoder->get_output_op_params(m_output_names[index]); + return m_decoder->get_output_op_params(m_node_idx, m_output_names[index]); } ov::element::Type get_output_type(size_t index) const { @@ -72,7 +70,7 @@ class NodeContext : public frontend::NodeContext { } Output get_input(int idx) const override { - return m_tensor_map->at(m_decoder->get_input_name(idx)); + return m_tensor_map->at(m_input_names[idx]); } Output get_input(const std::string& name) const override { @@ -87,7 +85,7 @@ class NodeContext : public frontend::NodeContext { } const std::string& get_name() const override { - return m_decoder->get_op_name(); + return m_decoder->get_op_name(m_node_idx); } ov::Any get_attribute_as_any(const std::string& name) const override { @@ -95,13 +93,14 @@ class NodeContext : public frontend::NodeContext { } int get_op_case() const { - return m_decoder->get_op_case(); + return m_decoder->get_op_case(m_node_idx); } bool is_static() const { return m_decoder->is_static(); } private: std::shared_ptr m_decoder; std::shared_ptr& m_tensor_map; + int m_node_idx; TranslateSession* m_translate_session; std::vector m_input_names; std::vector m_output_names; diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index d12701acdc7..d03c9358b03 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -164,8 +164,8 @@ std::shared_ptr TranslateSession::translate_graph(const frontend::InputMo (*tensor_map)[it.first] = it.second; } - auto node_visitor = [&](std::shared_ptr node) { - auto operation_type = node->get_op_type(); + auto node_visitor = [&](std::shared_ptr decoder, int node_idx) { + auto operation_type = decoder->get_op_type(node_idx); if (operation_type == "GGML_OP_NONE") { return; } @@ -174,10 +174,10 @@ std::shared_ptr TranslateSession::translate_graph(const frontend::InputMo auto it = m_translator_map.find(operation_type); FRONT_END_OP_CONVERSION_CHECK(it != m_translator_map.end(), "Translation for operation type ", operation_type, " is not implemented."); - NodeContext node_context(node, tensor_map, this); + NodeContext node_context(decoder, tensor_map, node_idx, this); converted_outputs = it->second(node_context); - const auto & node_output_names = node->get_output_names(); + const auto & node_output_names = decoder->get_output_names(node_idx); FRONT_END_OP_CONVERSION_CHECK(node_output_names.size() == converted_outputs.size(), "Number of ", operation_type, " outputs greater than number of converted outputs, which are ", node_output_names.size(), " and ", converted_outputs.size(), " respectively.");