Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 10 additions & 96 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,93 +173,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
}
}
}

if (m_node) {
switch (node->op) {
case GGML_OP_RESHAPE: {
auto * src = node->src[0];
if (src->op == GGML_OP_RESHAPE && src->src[0]->ne[0] == node->ne[0] && src->src[0]->ne[1] == node->ne[1]) {
m_op_case = 4;
} else if (node->ne[0] * node->ne[1] == src->ne[0]) {
m_op_case = 1;
} else if (src->ne[0] * src->ne[1] == node->ne[0]) {
m_op_case = 2;
if (src->ne[2] * src->ne[3] == node->ne[1]) {
m_op_case = 5;
}
} else if (src->ne[0] * src->ne[1] == node->ne[1]) {
m_op_case = 3;
} else if (src->ne[1] * src->ne[2] == node->ne[1]) {
m_op_case = 6;
}
break;
}
case GGML_OP_CONT: {
if (node->src[0]->op == GGML_OP_PERMUTE) {
m_op_case = 1;
} else if (node->src[0]->op == GGML_OP_TRANSPOSE) {
m_op_case = 2;
} else if (node->src[0]->op == GGML_OP_VIEW) {
// The input comes from a VIEW which is subtensor
m_op_case = 3;
}
break;
}
case GGML_OP_PERMUTE: {
if (node->src[0]->op != GGML_OP_VIEW) {
m_op_case = 1;
} else if (ggml_is_contiguous(node->src[0])) {
std::string src_name(node->view_src->name);
if (src_name.find("cache") == std::string::npos) {
// permute Qcur
m_op_case = 4;
} else {
// Permute kv cache (view)
int layer = extract_layer_from_name(src_name);
if (!is_swa_layer(layer)) {
m_op_case = 2;
} else {
m_op_case = 3;
}
}
}
break;
}
case GGML_OP_MUL_MAT: {
if (node->src[0]->op == GGML_OP_CONT && node->src[0]->src[0]->op == GGML_OP_TRANSPOSE) {
m_op_case = 2;
} else if (node->src[0]->op == GGML_OP_VIEW && node->src[1]->op == GGML_OP_VIEW) {
// test-backend-ops case
m_op_case = 3;
}
break;
}
case GGML_OP_GET_ROWS: {
if (node->src[1]->op == GGML_OP_VIEW) {
m_op_case = 2;
}
break;
}
case GGML_OP_ROPE: {
if (node->src[0]->op == GGML_OP_VIEW) {
m_op_case = 2;
}
break;
}
case GGML_OP_VIEW: {
if (node->src[0]->op == GGML_OP_VIEW) {
auto * src = node->src[0];
if (ggml_nelements(node) != ggml_nelements(src)) {
throw std::runtime_error("Unsupported VIEW case");
}
// This view is a reshape, slicing happens at src->op
m_op_case = 2;
}
}
default:
break;
}
}
}

int extract_layer_from_name(const std::string & name) {
Expand Down Expand Up @@ -320,7 +233,6 @@ void GgmlOvDecoder::set_llm_params() {
} else if (node->op == GGML_OP_ROPE) {
if (name.find("Qcur-0") == 0 || std::string(node->src[0]->name).find("Qcur-0") == 0) {
m_head_size = node->ne[0];
m_n_heads = node->ne[1];
m_rope_params = node->op_params;
auto * inp_pos = node->src[1];
m_input_len = inp_pos->ne[0];
Expand Down Expand Up @@ -775,15 +687,17 @@ int32_t * GgmlOvDecoder::get_output_op_params(const std::string & name) const {
return m_outputs.at(name)->op_params;
}

void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const {
void GgmlOvDecoder::visit_subgraph(std::function<void(ggml_tensor * tensor, bool is_static)> node_visitor) const {
for (const auto & node : m_nodes) {
auto decoder = std::make_shared<GgmlOvDecoder>(node, m_cgraph, m_is_static, m_ctx, m_ctx_swa, m_n_heads,
m_n_heads_kv, m_head_size, m_swa_layers);
node_visitor(decoder);
node_visitor(node, m_is_static);
}
}

const std::string & GgmlOvDecoder::get_op_type() const {
return get_ggml_op_type(m_node);
}

std::string GgmlOvDecoder::get_ggml_op_type(ggml_tensor * tensor) {
static const std::map<ggml_op, std::string> ops = {
{GGML_OP_NONE, "GGML_OP_NONE" },
{GGML_OP_ACC, "GGML_OP_ACC" },
Expand Down Expand Up @@ -831,13 +745,13 @@ const std::string & GgmlOvDecoder::get_op_type() const {
{GGML_GLU_OP_REGLU, "GGML_GLU_OP_REGLU" }
};

switch (m_node->op) {
switch (tensor->op) {
case GGML_OP_UNARY:
return unary_ops.at(ggml_get_unary_op(m_node));
return unary_ops.at(ggml_get_unary_op(tensor));
case GGML_OP_GLU:
return glu_ops.at(ggml_get_glu_op(m_node));
return glu_ops.at(ggml_get_glu_op(tensor));
default:
return ops.at(m_node->op);
return ops.at(tensor->op);
}
static const std::string unknown_op = "UNKNOWN_GGML_OP";
return unknown_op;
Expand Down
15 changes: 8 additions & 7 deletions ggml/src/ggml-openvino/ggml-decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {

virtual const std::string & get_op_type() const override;

static std::string get_ggml_op_type(ggml_tensor * tensor);

virtual const std::string & get_op_name() const override;

virtual void visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const override;
virtual void visit_subgraph(std::function<void(ggml_tensor * tensor, bool is_static)> node_visitor) const override;

ggml_tensor * get_input_ggml_tensor(const std::string & name) const { return m_inputs.at(name); }

ggml_tensor * get_output_ggml_tensor(const std::string & name) const { return m_outputs.at(name); }

virtual int get_op_case() const override { return m_op_case; }

virtual const std::map<std::string, std::shared_ptr<ov::Node>> & get_model_inputs() const override {
return m_model_inputs;
}
Expand Down Expand Up @@ -144,12 +144,14 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {

void clear_model_weights() { m_model_weights.clear(); }

static ov::element::Type get_ov_type(const ggml_tensor * tensor);

static std::vector<size_t> get_shape(const ggml_tensor * tensor);

static std::vector<size_t> get_stride(const ggml_tensor * tensor);
private:
void set_input_output(ggml_tensor * node, bool naive = false);
void add_extra_inputs();
static std::vector<size_t> get_shape(const ggml_tensor * tensor);
static std::vector<size_t> get_stride(const ggml_tensor * tensor);
static ov::element::Type get_ov_type(const ggml_tensor * tensor);

void set_llm_params();
void validate_cgraph() const;
Expand All @@ -165,7 +167,6 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
std::vector<std::string> m_output_names;
std::string m_op_name;
mutable std::string m_name;
int m_op_case = 0;
std::vector<std::pair<std::string, std::string>> m_op_node_name;
std::map<std::string, std::shared_ptr<ov::Node>> m_model_inputs;
std::map<std::string, std::shared_ptr<ov::Node>> m_model_extra_inputs;
Expand Down
7 changes: 2 additions & 5 deletions ggml/src/ggml-openvino/openvino/decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <openvino/core/node.hpp>
#include <openvino/frontend/decoder.hpp>
#include <string>
#include "ggml.h"

namespace ov {
namespace frontend {
Expand Down Expand Up @@ -45,13 +46,9 @@ class GgmlDecoder : public DecoderBase {

virtual std::vector<std::string> get_output_names() const = 0;

virtual const std::string& get_op_type() const = 0;

virtual const std::string& get_op_name() const = 0;

virtual void visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const = 0;

virtual int get_op_case() const = 0;
virtual void visit_subgraph(std::function<void(ggml_tensor * tensor, bool is_static)> node_visitor) const = 0;

virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_inputs() const = 0;
virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_extra_inputs() const = 0;
Expand Down
Loading