diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index c00efaf6aeb..2e45e2081f0 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -173,93 +173,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
             }
         }
     }
-
-    if (m_node) {
-        switch (node->op) {
-        case GGML_OP_RESHAPE: {
-            auto * src = node->src[0];
-            if (src->op == GGML_OP_RESHAPE && src->src[0]->ne[0] == node->ne[0] && src->src[0]->ne[1] == node->ne[1]) {
-                m_op_case = 4;
-            } else if (node->ne[0] * node->ne[1] == src->ne[0]) {
-                m_op_case = 1;
-            } else if (src->ne[0] * src->ne[1] == node->ne[0]) {
-                m_op_case = 2;
-                if (src->ne[2] * src->ne[3] == node->ne[1]) {
-                    m_op_case = 5;
-                }
-            } else if (src->ne[0] * src->ne[1] == node->ne[1]) {
-                m_op_case = 3;
-            } else if (src->ne[1] * src->ne[2] == node->ne[1]) {
-                m_op_case = 6;
-            }
-            break;
-        }
-        case GGML_OP_CONT: {
-            if (node->src[0]->op == GGML_OP_PERMUTE) {
-                m_op_case = 1;
-            } else if (node->src[0]->op == GGML_OP_TRANSPOSE) {
-                m_op_case = 2;
-            } else if (node->src[0]->op == GGML_OP_VIEW) {
-                // The input comes from a VIEW which is subtensor
-                m_op_case = 3;
-            }
-            break;
-        }
-        case GGML_OP_PERMUTE: {
-            if (node->src[0]->op != GGML_OP_VIEW) {
-                m_op_case = 1;
-            } else if (ggml_is_contiguous(node->src[0])) {
-                std::string src_name(node->view_src->name);
-                if (src_name.find("cache") == std::string::npos) {
-                    // permute Qcur
-                    m_op_case = 4;
-                } else {
-                    // Permute kv cache (view)
-                    int layer = extract_layer_from_name(src_name);
-                    if (!is_swa_layer(layer)) {
-                        m_op_case = 2;
-                    } else {
-                        m_op_case = 3;
-                    }
-                }
-            }
-            break;
-        }
-        case GGML_OP_MUL_MAT: {
-            if (node->src[0]->op == GGML_OP_CONT && node->src[0]->src[0]->op == GGML_OP_TRANSPOSE) {
-                m_op_case = 2;
-            } else if (node->src[0]->op == GGML_OP_VIEW && node->src[1]->op == GGML_OP_VIEW) {
-                // test-backend-ops case
-                m_op_case = 3;
-            }
-            break;
-        }
-        case GGML_OP_GET_ROWS: {
-            if (node->src[1]->op == GGML_OP_VIEW) {
-                m_op_case = 2;
-            }
-            break;
-        }
-        case GGML_OP_ROPE: {
-            if (node->src[0]->op == GGML_OP_VIEW) {
-                m_op_case = 2;
-            }
-            break;
-        }
-        case GGML_OP_VIEW: {
-            if (node->src[0]->op == GGML_OP_VIEW) {
-                auto * src = node->src[0];
-                if (ggml_nelements(node) != ggml_nelements(src)) {
-                    throw std::runtime_error("Unsupported VIEW case");
-                }
-                // This view is a reshape, slicing happens at src->op
-                m_op_case = 2;
-            }
-        }
-        default:
-            break;
-        }
-    }
 }
 
 int extract_layer_from_name(const std::string & name) {
@@ -320,7 +233,6 @@ void GgmlOvDecoder::set_llm_params() {
         } else if (node->op == GGML_OP_ROPE) {
             if (name.find("Qcur-0") == 0 || std::string(node->src[0]->name).find("Qcur-0") == 0) {
                 m_head_size = node->ne[0];
-                m_n_heads = node->ne[1];
                 m_rope_params = node->op_params;
                 auto * inp_pos = node->src[1];
                 m_input_len = inp_pos->ne[0];
@@ -775,15 +687,17 @@ int32_t * GgmlOvDecoder::get_output_op_params(const std::string & name) const {
     return m_outputs.at(name)->op_params;
 }
 
-void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const {
+void GgmlOvDecoder::visit_subgraph(std::function<void(ggml_tensor * tensor, bool is_static)> node_visitor) const {
     for (const auto & node : m_nodes) {
-        auto decoder = std::make_shared<GgmlOvDecoder>(node, m_cgraph, m_is_static, m_ctx, m_ctx_swa, m_n_heads,
-                                                       m_n_heads_kv, m_head_size, m_swa_layers);
-        node_visitor(decoder);
+        node_visitor(node, m_is_static);
     }
 }
 
 const std::string & GgmlOvDecoder::get_op_type() const {
+    return get_ggml_op_type(m_node);
+}
+
+std::string GgmlOvDecoder::get_ggml_op_type(ggml_tensor * tensor) {
     static const std::map<ggml_op, std::string> ops = {
         {GGML_OP_NONE,           "GGML_OP_NONE"          },
         {GGML_OP_ACC,            "GGML_OP_ACC"           },
@@ -831,13 +745,13 @@ const std::string & GgmlOvDecoder::get_op_type() const {
         {GGML_GLU_OP_REGLU,  "GGML_GLU_OP_REGLU" }
     };
 
-    switch (m_node->op) {
+    switch (tensor->op) {
     case GGML_OP_UNARY:
-        return unary_ops.at(ggml_get_unary_op(m_node));
+        return unary_ops.at(ggml_get_unary_op(tensor));
     case GGML_OP_GLU:
-        return glu_ops.at(ggml_get_glu_op(m_node));
+        return glu_ops.at(ggml_get_glu_op(tensor));
     default:
-        return ops.at(m_node->op);
+        return ops.at(tensor->op);
     }
     static const std::string unknown_op = "UNKNOWN_GGML_OP";
     return unknown_op;
diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h
index e2efc73f17f..ea15698bae8 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@@ -75,16 +75,16 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     virtual const std::string & get_op_type() const override;
 
+    static std::string get_ggml_op_type(ggml_tensor * tensor);
+
     virtual const std::string & get_op_name() const override;
 
-    virtual void visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const override;
+    virtual void visit_subgraph(std::function<void(ggml_tensor * tensor, bool is_static)> node_visitor) const override;
 
     ggml_tensor * get_input_ggml_tensor(const std::string & name) const { return m_inputs.at(name); }
 
     ggml_tensor * get_output_ggml_tensor(const std::string & name) const { return m_outputs.at(name); }
 
-    virtual int get_op_case() const override { return m_op_case; }
-
     virtual const std::map<std::string, std::shared_ptr<ov::Node>> & get_model_inputs() const override {
         return m_model_inputs;
     }
@@ -144,12 +144,14 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     void clear_model_weights() { m_model_weights.clear(); }
 
+    static ov::element::Type get_ov_type(const ggml_tensor * tensor);
+
+    static std::vector<size_t> get_shape(const ggml_tensor * tensor);
+
+    static std::vector<size_t> get_stride(const ggml_tensor * tensor);
 private:
     void set_input_output(ggml_tensor * node, bool naive = false);
     void add_extra_inputs();
-    static std::vector<size_t> get_shape(const ggml_tensor * tensor);
-    static std::vector<size_t> get_stride(const ggml_tensor * tensor);
-    static ov::element::Type get_ov_type(const ggml_tensor * tensor);
 
     void set_llm_params();
     void validate_cgraph() const;
@@ -165,7 +167,6 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
     std::vector<std::string> m_output_names;
     std::string m_op_name;
     mutable std::string m_name;
-    int m_op_case = 0;
     std::vector<std::pair<std::string, std::string>> m_op_node_name;
     std::map<std::string, std::shared_ptr<ov::Node>> m_model_inputs;
     std::map<std::string, std::shared_ptr<ov::Node>> m_model_extra_inputs;
diff --git a/ggml/src/ggml-openvino/openvino/decoder.hpp b/ggml/src/ggml-openvino/openvino/decoder.hpp
index 8f86a4de064..4304001190f 100644
--- a/ggml/src/ggml-openvino/openvino/decoder.hpp
+++ b/ggml/src/ggml-openvino/openvino/decoder.hpp
@@ -5,6 +5,7 @@
 #include <openvino/core/node.hpp>
 #include <openvino/frontend/decoder.hpp>
 #include <string>
+#include "ggml.h"
 
 namespace ov {
 namespace frontend {
@@ -45,13 +46,9 @@ class GgmlDecoder : public DecoderBase {
 
     virtual std::vector<std::string> get_output_names() const = 0;
 
-    virtual const std::string& get_op_type() const = 0;
-
     virtual const std::string& get_op_name() const = 0;
 
-    virtual void visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const = 0;
-
-    virtual int get_op_case() const = 0;
+    virtual void visit_subgraph(std::function<void(ggml_tensor * tensor, bool is_static)> node_visitor) const = 0;
 
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_inputs() const = 0;
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_extra_inputs() const = 0;
diff --git a/ggml/src/ggml-openvino/openvino/node_context.hpp b/ggml/src/ggml-openvino/openvino/node_context.hpp
index 0d76dc83e05..b49d2ea51f6 100644
--- a/ggml/src/ggml-openvino/openvino/node_context.hpp
+++ b/ggml/src/ggml-openvino/openvino/node_context.hpp
@@ -1,11 +1,11 @@
 #pragma once
 
+#include "ggml-openvino/ggml-decoder.h"
+
 #include <cstdint>
 #include <openvino/frontend/node_context.hpp>
 #include <string>
 
-#include "decoder.hpp"
-
 namespace ov {
 namespace frontend {
 namespace ggml {
@@ -16,98 +16,193 @@ typedef std::map<std::string, Output<Node>> TensorMap;
 
 class NodeContext : public frontend::NodeContext {
 public:
-    NodeContext(const std::shared_ptr<GgmlDecoder>& decoder,
-                std::shared_ptr<TensorMap>& tensor_map,
-                TranslateSession* translate_session = nullptr)
-        : ov::frontend::NodeContext(decoder->get_op_type()),
-          m_decoder(decoder),
-          m_tensor_map(tensor_map),
-          m_translate_session(translate_session) {
-        m_input_names = decoder->get_input_names();
-        m_output_names = decoder->get_output_names();
-    }
+    NodeContext(ggml_tensor * node,
+                std::shared_ptr<TensorMap> & tensor_map,
+                bool is_static = false,
+                std::string op_type = "",
+                TranslateSession * translate_session = nullptr) :
+        ov::frontend::NodeContext(op_type),
+        m_node(node),
+        m_tensor_map(tensor_map),
+        m_is_static(is_static),
+        m_translate_session(translate_session),
+        m_node_name(std::string(node->name)),
+        m_op_case(0) {
+        std::string node_name;
+        if (node->op == GGML_OP_SET_ROWS) {
+            node_name = std::string(node->view_src->name);
+        } else {
+            node_name = std::string(node->name);
+        }
 
-    TranslateSession* get_translate_session() const {
-        return m_translate_session;
+        m_output_names.push_back(node_name);
+        m_outputs[node_name] = node;
+
+        for (int i = 0; i < GGML_MAX_SRC; i++) {
+            auto * src = node->src[i];
+            if (src == nullptr) {
+                continue;
+            }
+            std::string src_name = std::string(src->name);
+            m_input_names.push_back(src_name);
+            m_inputs[src_name] = src;
+        }
+
+        m_op_case = compute_op_case(node);
     }
 
-    const std::vector<std::string>& get_input_names() const { return m_input_names; }
+    TranslateSession * get_translate_session() const { return m_translate_session; }
 
-    size_t get_input_size() const override {
-        return m_decoder->get_input_size();
-    }
+    const std::vector<std::string> & get_input_names() const { return m_input_names; }
+
+    const std::vector<std::string> & get_output_names() const { return m_output_names; }
+
+    size_t get_input_size() const override { return m_input_names.size(); }
 
     ov::element::Type get_input_type(size_t index) const {
-        return m_decoder->get_input_type(m_input_names[index]);
+        return GgmlOvDecoder::get_ov_type(m_inputs.at(m_input_names[index]));
     }
 
     PartialShape get_input_shape(size_t index) const {
-        return m_decoder->get_input_shape(m_input_names[index]);
+        return ov::PartialShape(GgmlOvDecoder::get_shape(m_inputs.at(m_input_names[index])));
     }
 
     std::vector<size_t> get_input_stride(size_t index) const {
-        return m_decoder->get_input_stride(m_input_names[index]);
+        return GgmlOvDecoder::get_stride(m_inputs.at(m_input_names[index]));
     }
 
     std::string get_output_name() const { return m_output_names[0]; }
 
     PartialShape get_output_shape(size_t index) const {
-        return m_decoder->get_output_shape(m_output_names[index]);
+        return ov::PartialShape(GgmlOvDecoder::get_shape(m_outputs.at(m_output_names[index])));
     }
 
-    std::vector<size_t> get_output_stride(size_t index) const {
-        return m_decoder->get_output_stride(m_output_names[index]);
-    }
+    int32_t * get_input_op_params(size_t index) const { return m_inputs.at(m_input_names[index])->op_params; }
 
-    int32_t* get_input_op_params(size_t index) const {
-        return m_decoder->get_input_op_params(m_input_names[index]);
-    }
-
-    int32_t* get_output_op_params(size_t index) const {
-        return m_decoder->get_output_op_params(m_output_names[index]);
-    }
+    int32_t * get_output_op_params(size_t index) const { return m_outputs.at(m_output_names[index])->op_params; }
 
     ov::element::Type get_output_type(size_t index) const {
-        return m_decoder->get_output_type(m_output_names[index]);
+        return GgmlOvDecoder::get_ov_type(m_outputs.at(m_output_names[index]));
     }
 
-    Output<Node> get_input(int idx) const override {
-        return m_tensor_map->at(m_decoder->get_input_name(idx));
-    }
+    Output<Node> get_input(int idx) const override { return m_tensor_map->at(m_input_names[idx]); }
 
-    Output<Node> get_input(const std::string& name) const override {
+    Output<Node> get_input(const std::string & name) const override {
         if (m_tensor_map->find(name) == m_tensor_map->end()) {
             throw std::runtime_error("'" + name + "' not found in tensor map.");
         }
         return m_tensor_map->at(name);
     }
 
-    bool has_input(const std::string& name) const {
-        return m_tensor_map->find(name) != m_tensor_map->end();
-    }
+    bool has_input(const std::string & name) const { return m_tensor_map->find(name) != m_tensor_map->end(); }
 
-    const std::string& get_name() const override {
-        return m_decoder->get_op_name();
-    }
+    const std::string & get_name() const override { return m_node_name; }
 
-    ov::Any get_attribute_as_any(const std::string& name) const override {
-        return m_decoder->get_attribute(name);
+    ov::Any get_attribute_as_any(const std::string & name) const override {
+        return nullptr;
+        GGML_UNUSED(name);
     }
 
-    int get_op_case() const {
-        return m_decoder->get_op_case();
-    }
-    bool is_static() const { return m_decoder->is_static(); }
+    int get_op_case() const { return m_op_case; }
+
+    bool is_static() const { return m_is_static; }
 
 private:
-    std::shared_ptr<GgmlDecoder> m_decoder;
-    std::shared_ptr<TensorMap>& m_tensor_map;
-    TranslateSession* m_translate_session;
+    ggml_tensor * m_node;
+    std::shared_ptr<TensorMap> & m_tensor_map;
+    bool m_is_static = false;
+    TranslateSession * m_translate_session;
     std::vector<std::string> m_input_names;
     std::vector<std::string> m_output_names;
+    std::string m_node_name;
+    std::map<std::string, ggml_tensor *> m_inputs;
+    std::map<std::string, ggml_tensor *> m_outputs;
+    int m_op_case;
+
+    int compute_op_case(ggml_tensor * node) {
+        int op_case = 0;
+        switch (node->op) {
+        case GGML_OP_RESHAPE: {
+            if (node->src[0]->op == GGML_OP_RESHAPE && node->src[0]->src[0]->ne[0] == node->ne[0] &&
+                node->src[0]->src[0]->ne[1] == node->ne[1]) {
+                op_case = 4;
+            } else if (node->ne[0] * node->ne[1] == node->src[0]->ne[0]) {
+                op_case = 1;
+            } else if (node->src[0]->ne[0] * node->src[0]->ne[1] == node->ne[0]) {
+                op_case = 2;
+            } else if (node->src[0]->ne[0] * node->src[0]->ne[1] == node->ne[1]) {
+                op_case = 3;
+            }
+            break;
+        }
+        case GGML_OP_CONT: {
+            if (node->src[0]->op == GGML_OP_PERMUTE) {
+                op_case = 1;
+            } else if (node->src[0]->op == GGML_OP_TRANSPOSE) {
+                op_case = 2;
+            } else if (node->src[0]->op == GGML_OP_VIEW) {
+                // The input comes from a VIEW which is subtensor
+                op_case = 3;
+            }
+            break;
+        }
+        case GGML_OP_PERMUTE: {
+            if (node->src[0]->op != GGML_OP_VIEW) {
+                op_case = 1;
+            } else if (ggml_is_contiguous(node->src[0])) {
+                std::string src_name(node->view_src->name);
+                if (src_name.find("cache") == std::string::npos) {
+                    op_case = 1;
+                } else {
+                    // Permute kv cache (view)
+                    if (!(std::string(node->name).find("swa") != std::string::npos)) {
+                        op_case = 2;
+                    } else {
+                        op_case = 3;
+                    }
+                }
+            }
+            break;
+        }
+        case GGML_OP_MUL_MAT: {
+            if (node->src[0]->op == GGML_OP_CONT && node->src[0]->src[0]->op == GGML_OP_TRANSPOSE) {
+                op_case = 2;
+            } else if (node->src[0]->op == GGML_OP_VIEW && node->src[1]->op == GGML_OP_VIEW) {
+                // test-backend-ops case
+                op_case = 3;
+            }
+            break;
+        }
+        case GGML_OP_GET_ROWS: {
+            if (node->src[1]->op == GGML_OP_VIEW) {
+                op_case = 2;
+            }
+            break;
+        }
+        case GGML_OP_ROPE: {
+            if (node->src[0]->op == GGML_OP_VIEW) {
+                op_case = 2;
+            }
+            break;
+        }
+        case GGML_OP_VIEW: {
+            if (node->src[0]->op == GGML_OP_VIEW) {
+                auto * src = node->src[0];
+                auto * view_src = src->view_src;
+                if (view_src->ne[1] != src->ne[2]) {
+                    throw std::runtime_error("Unsupported VIEW case");
+                }
+                op_case = 2;
+            }
+        }
+        default:
+            break;
+        }
+        return op_case;
+    }
 };
 
-using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::ggml::NodeContext&)>;
+using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::ggml::NodeContext &)>;
 
 }  // namespace ggml
 }  // namespace frontend
diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp
index d12701acdc7..60d2d1d55e0 100644
--- a/ggml/src/ggml-openvino/openvino/translate_session.cpp
+++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp
@@ -5,6 +5,7 @@
 #include "input_model.hpp"
 #include "pass/eliminate_zp.hpp"
 #include "pass/mark_decompression_convert_constant_folding.hpp"
+#include "ggml-openvino/ggml-decoder.h"
 
 #include <cstdint>
 #include <cstdlib>
@@ -164,8 +165,8 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
         (*tensor_map)[it.first] = it.second;
     }
 
-    auto node_visitor = [&](std::shared_ptr<GgmlDecoder> node) {
-        auto operation_type = node->get_op_type();
+    auto node_visitor = [&](ggml_tensor * node, bool is_static) {
+        auto operation_type = GgmlOvDecoder::get_ggml_op_type(node);
         if (operation_type == "GGML_OP_NONE") {
             return;
         }
@@ -174,10 +175,10 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
         auto it = m_translator_map.find(operation_type);
         FRONT_END_OP_CONVERSION_CHECK(it != m_translator_map.end(), "Translation for operation type ", operation_type,
                                       " is not implemented.");
-        NodeContext node_context(node, tensor_map, this);
+        NodeContext node_context(node, tensor_map, is_static, operation_type, this);
         converted_outputs = it->second(node_context);
 
-        const auto & node_output_names = node->get_output_names();
+        const auto & node_output_names = node_context.get_output_names();
         FRONT_END_OP_CONVERSION_CHECK(node_output_names.size() == converted_outputs.size(), "Number of ",
                                       operation_type, " outputs greater than number of converted outputs, which are ",
                                       node_output_names.size(), " and ", converted_outputs.size(), " respectively.");