Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 6 additions & 39 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
node_output = node->view_src;
}

m_output_names.push_back(node_output_name);
m_outputs[node_output_name] = node_output;

current_node_info.node = node;
current_node_info.node_name = node_name;
current_node_info.node_output = node_output;
Expand All @@ -150,8 +147,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
continue;
}
std::string src_name = std::string(src->name);
m_input_names.push_back(src_name);
m_inputs[src_name] = src;
current_node_info.node_inputs[src_name] = src;
current_node_info.node_inputs_names.push_back(src_name);

Expand All @@ -167,6 +162,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
if (m_model_inputs.find(src_name) != m_model_inputs.end()) {
continue;
}
m_inputs[src_name] = src;
auto param_node =
std::make_shared<ov::op::v0::Parameter>(get_ov_type(src), get_graph_input_shape(node, src));
param_node->set_friendly_name(src_name);
Expand Down Expand Up @@ -739,58 +735,37 @@ ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor * tensor) {
}
}

ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string & name) const {
return ov::PartialShape(get_shape(m_inputs.at(name)));
}

ov::PartialShape GgmlOvDecoder::get_input_shape(int node_idx, const std::string & name) const {
return ov::PartialShape(get_shape(m_node_info_list[node_idx].node_inputs.at(name)));
}

std::vector<size_t> GgmlOvDecoder::get_input_stride(const std::string & name) const {
return get_stride(m_inputs.at(name));
}

std::vector<size_t> GgmlOvDecoder::get_input_stride(int node_idx, const std::string & name) const {
return get_stride(m_node_info_list[node_idx].node_inputs.at(name));
}

ov::element::Type GgmlOvDecoder::get_input_type(const std::string & name) const {
return get_ov_type(m_inputs.at(name));
ov::element::Type GgmlOvDecoder::get_input_type(int node_idx, const std::string & name) const {
return get_ov_type(m_node_info_list[node_idx].node_inputs.at(name));
}

size_t GgmlOvDecoder::get_input_size() const {
return m_input_names.size();
return m_model_inputs.size();
}

size_t GgmlOvDecoder::get_input_size(int node_idx) const {
return m_node_info_list[node_idx].node_inputs_names.size();
}

std::vector<std::string> GgmlOvDecoder::get_input_names() const {
return m_input_names;
}

std::vector<std::string> GgmlOvDecoder::get_input_names(int node_idx) const {
return m_node_info_list[node_idx].node_inputs_names;
}

ov::PartialShape GgmlOvDecoder::get_output_shape(const std::string & name) const {
auto * ggml_tensor = m_outputs.at(name);
return ov::PartialShape(get_shape(ggml_tensor));
}

ov::PartialShape GgmlOvDecoder::get_output_shape(int node_idx) const {
auto * ggml_tensor = m_node_info_list[node_idx].node_output;
return ov::PartialShape(get_shape(ggml_tensor));
}

ov::element::Type GgmlOvDecoder::get_output_type(const std::string & name) const {
return get_ov_type(m_outputs.at(name));
}

std::vector<std::string> GgmlOvDecoder::get_output_names() const {
return m_output_names;
ov::element::Type GgmlOvDecoder::get_output_type(const int node_idx) const {
return get_ov_type(m_node_info_list[node_idx].node);
}

std::vector<std::string> GgmlOvDecoder::get_output_names(int node_idx) const {
Expand All @@ -806,18 +781,10 @@ const std::string & GgmlOvDecoder::get_op_name(int node_idx) const {
return m_node_info_list[node_idx].node_name;
}

int32_t * GgmlOvDecoder::get_input_op_params(const std::string & name) const {
return m_inputs.at(name)->op_params;
}

int32_t * GgmlOvDecoder::get_input_op_params(int node_idx, const std::string & name) const {
return m_node_info_list[node_idx].node_inputs.at(name)->op_params;
}

int32_t * GgmlOvDecoder::get_output_op_params(const std::string & name) const {
return m_outputs.at(name)->op_params;
}

int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const {
return m_node_info_list[node_idx].node->op_params;
}
Expand Down
23 changes: 2 additions & 21 deletions ggml/src/ggml-openvino/ggml-decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,11 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
GGML_UNUSED(name);
}

virtual ov::PartialShape get_input_shape(const std::string & name) const override;

virtual ov::PartialShape get_input_shape(int node_idx, const std::string & name) const override;

virtual std::vector<size_t> get_input_stride(const std::string & name) const override;

virtual std::vector<size_t> get_input_stride(int node_idx, const std::string & name) const override;

virtual ov::element::Type get_input_type(const std::string & name) const override;
virtual ov::element::Type get_input_type(int node_idx, const std::string & name) const override;

virtual size_t get_input_size() const override;

Expand All @@ -101,26 +97,16 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
GGML_UNUSED(producer_output_port_index);
}

virtual std::vector<std::string> get_input_names() const override;

virtual std::vector<std::string> get_input_names(int node_idx) const override;

virtual ov::PartialShape get_output_shape(const std::string & name) const override;

virtual ov::PartialShape get_output_shape(int node_idx) const override;

virtual ov::element::Type get_output_type(const std::string & name) const override;

virtual int32_t * get_input_op_params(const std::string & name) const override;
virtual ov::element::Type get_output_type(const int node_idx) const override;

virtual int32_t * get_input_op_params(int node_idx, const std::string & name) const override;

virtual int32_t * get_output_op_params(const std::string & name) const override;

virtual int32_t * get_output_op_params(int node_idx) const override;

virtual std::vector<std::string> get_output_names() const override;

virtual std::vector<std::string> get_output_names(int node_idx) const override;

virtual const std::string & get_op_type() const override;
Expand All @@ -135,8 +121,6 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {

ggml_tensor * get_input_ggml_tensor(const std::string & name) const { return m_inputs.at(name); }

ggml_tensor * get_output_ggml_tensor(const std::string & name) const { return m_outputs.at(name); }

virtual int get_op_case(int node_idx) const override { return m_node_info_list[node_idx].node_op_case; }

virtual const std::map<std::string, std::shared_ptr<ov::Node>> & get_model_inputs() const override {
Expand Down Expand Up @@ -237,9 +221,6 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
ggml_cgraph * m_cgraph = nullptr;
std::vector<ggml_tensor *> m_nodes;
std::map<std::string, ggml_tensor *> m_inputs;
std::vector<std::string> m_input_names;
std::map<std::string, ggml_tensor *> m_outputs;
std::vector<std::string> m_output_names;

std::map<std::string, std::shared_ptr<ov::Node>> m_model_inputs;
std::map<std::string, std::shared_ptr<ov::Node>> m_model_extra_inputs;
Expand Down
18 changes: 2 additions & 16 deletions ggml/src/ggml-openvino/openvino/decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@ class GgmlDecoder : public DecoderBase {
public:
virtual ov::Any get_attribute(const std::string& name) const = 0;

virtual PartialShape get_input_shape(const std::string& name) const = 0;

virtual PartialShape get_input_shape(int node_idx, const std::string& name) const = 0;

virtual std::vector<size_t> get_input_stride(const std::string& name) const = 0;

virtual std::vector<size_t> get_input_stride(int node_idx, const std::string& name) const = 0;

virtual element::Type get_input_type(const std::string& name) const = 0;
virtual element::Type get_input_type(int node_idx, const std::string& name) const = 0;

virtual size_t get_input_size() const = 0;

Expand All @@ -33,26 +29,16 @@ class GgmlDecoder : public DecoderBase {
std::string& producer_output_port_name,
size_t& producer_output_port_index) const = 0;

virtual std::vector<std::string> get_input_names() const = 0;

virtual std::vector<std::string> get_input_names(int node_idx) const = 0;

virtual PartialShape get_output_shape(const std::string& name) const = 0;

virtual PartialShape get_output_shape(int node_idx) const = 0;

virtual element::Type get_output_type(const std::string& name) const = 0;

virtual int32_t* get_input_op_params(const std::string& name) const = 0;
virtual element::Type get_output_type(const int node_idx) const = 0;

virtual int32_t* get_input_op_params(int node_idx, const std::string& name) const = 0;

virtual int32_t* get_output_op_params(const std::string& name) const = 0;

virtual int32_t * get_output_op_params(int node_idx) const = 0;

virtual std::vector<std::string> get_output_names() const = 0;

virtual std::vector<std::string> get_output_names(int node_idx) const = 0;

virtual const std::string& get_op_type() const = 0;
Expand Down
10 changes: 5 additions & 5 deletions ggml/src/ggml-openvino/openvino/node_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class NodeContext : public frontend::NodeContext {
}

ov::element::Type get_input_type(size_t index) const {
return m_decoder->get_input_type(m_input_names[index]);
return m_decoder->get_input_type(m_node_idx, m_input_names[index]);
}

PartialShape get_input_shape(size_t index) const {
return m_decoder->get_input_shape(m_node_idx, m_input_names[index]);
PartialShape get_input_shape(size_t input_index) const {
return m_decoder->get_input_shape(m_node_idx, m_input_names[input_index]);
}

std::vector<size_t> get_input_stride(size_t index) const {
Expand All @@ -61,8 +61,8 @@ class NodeContext : public frontend::NodeContext {

int32_t * get_output_op_params() const { return m_decoder->get_output_op_params(m_node_idx); }

ov::element::Type get_output_type(size_t index) const {
return m_decoder->get_output_type(m_output_names[index]);
ov::element::Type get_output_type() const {
return m_decoder->get_output_type(m_node_idx);
}

Output<Node> get_input(int idx) const override {
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-openvino/openvino/op/cpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace ggml {
namespace op {

OutputVector translate_cpy(const NodeContext & context) {
auto res = std::make_shared<ov::op::v0::Convert>(context.get_input(0), context.get_output_type(0));
auto res = std::make_shared<ov::op::v0::Convert>(context.get_input(0), context.get_output_type());
return rename_outputs_with_suffix({res}, context.get_name());
}

Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-openvino/openvino/op/get_rows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ OutputVector translate_get_rows(const NodeContext & context) {
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
}

if (res.get_element_type() != context.get_output_type(0)) {
res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type(0));
if (res.get_element_type() != context.get_output_type()) {
res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type());
}
res = std::make_shared<ov::op::v0::Unsqueeze>(res, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
return rename_outputs_with_suffix({res}, context.get_name());
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-openvino/openvino/op/set_rows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ OutputVector translate_set_rows(const NodeContext & context) {
auto indices = context.get_input(1);
auto dst = context.get_input(2);

data = std::make_shared<ov::op::v0::Convert>(data, context.get_output_type(0));
data = std::make_shared<ov::op::v0::Convert>(data, context.get_output_type());

auto dst_shape = context.get_output_shape().to_shape();

Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-openvino/openvino/op/softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ OutputVector translate_soft_max(const NodeContext & context) {
mask_node_sliced = std::make_shared<ov::op::v8::Slice>(mask_node, zero, token_len, one, one);
}

if (mask_node_sliced.get_element_type() != context.get_output_type(0)) {
mask_node_sliced = std::make_shared<ov::op::v0::Convert>(mask_node_sliced, context.get_output_type(0));
if (mask_node_sliced.get_element_type() != context.get_output_type()) {
mask_node_sliced = std::make_shared<ov::op::v0::Convert>(mask_node_sliced, context.get_output_type());
}

Output<Node> slope_mask;
Expand Down
12 changes: 6 additions & 6 deletions ggml/src/ggml-openvino/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,9 +480,9 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
// This case is added to make test-backend-ops work
input_shape = ggml_decoder->get_shape(ggml_tensor->view_src);
} else {
input_shape = ggml_decoder->get_input_shape(name).to_shape();
input_shape = ggml_decoder->get_shape(ggml_tensor);
}
auto input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data);
auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data);
return input_tensor;
}
} // namespace
Expand All @@ -506,7 +506,7 @@ ov::Tensor get_ov_input_tensor_static_decode(std::shared_ptr<GgmlOvDecoder> ggml
(op->op == GGML_OP_SET_ROWS && op->src[1] == ggml_tensor)) {
assert(ggml_tensor->ne[0] == 1);
ov::Shape input_shape = {1, 1, 1, 1};
ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape);
ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
if (ggml_tensor->type == GGML_TYPE_I32) {
*input_tensor.data<int32_t>() = *((int32_t *) ggml_tensor->data);
} else if (ggml_tensor->type == GGML_TYPE_I64) {
Expand All @@ -519,7 +519,7 @@ ov::Tensor get_ov_input_tensor_static_decode(std::shared_ptr<GgmlOvDecoder> ggml

if (param_name == "inp_out_ids") {
ov::Shape input_shape = {1, 1, 1, 1};
ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape);
ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
int32_t inp_out_id = *((int32_t *) ggml_tensor->data);
assert(ggml_tensor->ne[0] == 1);
assert(inp_out_id == 0);
Expand Down Expand Up @@ -553,7 +553,7 @@ ov::Tensor get_ov_input_tensor_static_prefill(std::shared_ptr<GgmlOvDecoder> ggm
if (param_name == "inp_pos" || param_name == "inp_tokens" ||
(op->op == GGML_OP_SET_ROWS && op->src[1] == ggml_tensor)) {
ov::Shape input_shape = {1, 1, 1, chunk_size};
ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape);
ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
// copy the chunk_index-th chunk from ggml_tensor
size_t element_size = ggml_type_size(ggml_tensor->type);
void * input_data = (char *) ggml_tensor->data + chunk_index * chunk_size * element_size;
Expand Down Expand Up @@ -581,7 +581,7 @@ ov::Tensor get_ov_input_tensor_static_prefill(std::shared_ptr<GgmlOvDecoder> ggm
if (param_name == "inp_out_ids") {
size_t output_len = ggml_decoder->get_compute_params().output_len;
ov::Shape input_shape = {1, 1, 1, output_len};
ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape);
ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
if (ggml_tensor->ne[0] == 0) {
*input_tensor.data<int32_t>() = 0;
} else {
Expand Down