diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index 72f6144708a..20cc02e98ba 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -59,13 +59,19 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph,
     }
 
     validate_cgraph();
-
     for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
         auto * cur_node = cgraph->nodes[node_n];
         m_nodes.push_back(cur_node);
         set_input_output(cur_node);
     }
 
+    m_is_full_model = has_inp_tokens && has_output;
+    if (!m_is_full_model) {
+        compute_cgraph_dynamic_dims();
+        add_extra_model_inputs_for_fallback();
+        add_extra_model_outputs_for_fallback();
+    }
+
     for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
         m_node_info_list[node_n].node_op_case = compute_op_case(m_node_info_list[node_n].node);
         m_node_info_list[node_n].node_op_type = compute_op_type(m_node_info_list[node_n].node);
@@ -150,6 +156,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
         current_node_info.node_inputs[src_name] = src;
         current_node_info.node_inputs_names.push_back(src_name);
 
+        if (src_name == "inp_tokens") {
+            has_inp_tokens = true;
+        }
+
         // Add model inputs
         if (!naive && !src->view_src) {
             ggml_backend_buffer * buffer = src->buffer;
@@ -176,6 +186,9 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
     if (!naive) {
         // Model outputs are tensors with GGML_TENSOR_FLAG_OUTPUT flag and kv_caches
         static std::set<std::string> debug_output_names = {};
+        if (node_output_name.find("output") != std::string::npos) {
+            has_output = true;
+        }
         // Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph
         if (node->op == GGML_OP_SET_ROWS || node->flags & GGML_TENSOR_FLAG_OUTPUT ||
             node_output_name.find("output") != std::string::npos || debug_output_names.count(node_output_name)) {
@@ -264,6 +277,9 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
                 throw std::runtime_error("Unsupported VIEW case");
             }
             op_case = 2;
+            if (!m_is_full_model && m_model_inputs.find(std::string(src->name)) != m_model_inputs.end()) {
+                op_case = 0;
+            }
         }
         break;
     }
@@ -359,7 +375,7 @@ void GgmlOvDecoder::validate_cgraph() const {
     }
 }
 
-ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input) const {
+ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input, int dynamic_dim_index) const {
     auto name = std::string(input->name);
     ov::PartialShape input_shape;
 
@@ -391,6 +407,9 @@ ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, co
     } else {
         input_shape = ov::PartialShape{get_shape(input)};
     }
+    if (dynamic_dim_index != -1) {
+        input_shape[3-dynamic_dim_index] = -1;
+    }
     return input_shape;
 }
 
@@ -863,3 +882,201 @@ const std::string & GgmlOvDecoder::get_op_type() const {
     static const std::string unknown_op = "UNKNOWN_GGML_OP";
     return unknown_op;
 }
+
+/**
+ * @brief Computes the dynamic dimensions for the computation graph nodes to support fallback mechanisms.
+ *
+ * This function traverses the computation graph and determines the dynamic dimensions
+ * for each node based on its operation type and dependencies. The dynamic dimension
+ * is stored in the `m_node_dynamic_dims` map, where a value of -1 indicates no dynamic
+ * dimension. Specific operations such as GGML_OP_GET_ROWS, GGML_OP_MUL, GGML_OP_VIEW,
+ * etc., are handled to compute the dynamic dimension index.
+ *
+ * Key behaviors:
+ * - Nodes with operations like GGML_OP_NONE, GGML_OP_GET_ROWS, GGML_OP_MUL, and others
+ *   are analyzed to determine their dynamic dimensions.
+ * - Nodes with specific names (e.g., "inp_tokens", "inp_pos", "inp_out_ids") are
+ *   explicitly assigned a dynamic dimension index of 0.
+ * - For operations like GGML_OP_VIEW and GGML_OP_RESHAPE, the function ensures that
+ *   the dynamic dimension is uniquely determined; otherwise, a warning is printed.
+ * - Unhandled operations print a message indicating the node name and operation type.
+ *
+ * This function is critical for preparing the computation graph for execution, ensuring
+ * that dynamic dimensions are correctly propagated and resolved.
+ */
+void GgmlOvDecoder::compute_cgraph_dynamic_dims() {
+    auto visit_node = [&](auto && self, ggml_tensor * node) -> void {
+        if (!node) {
+            return;
+        }
+
+        if (node->op == GGML_OP_CPY) {
+            m_node_dynamic_dims[node] = -1;
+        }
+
+        if (m_node_dynamic_dims.count(node)) {
+            return;
+        }
+        for (int i = 0; i < GGML_MAX_SRC; i++) {
+            ggml_tensor * src = node->src[i];
+            if (src) {
+                self(self, src);
+            }
+        }
+        switch (node->op) {
+        case GGML_OP_NONE:
+            m_node_dynamic_dims[node] = -1;
+            if (std::string(node->name) == "inp_tokens" || std::string(node->name) == "inp_pos" ||
+                std::string(node->name) == "inp_out_ids") {
+                m_node_dynamic_dims[node] = 0;
+            }
+            break;
+        case GGML_OP_GET_ROWS:
+            m_node_dynamic_dims[node] = -1;
+            if (m_node_dynamic_dims[node->src[1]] != -1) {
+                m_node_dynamic_dims[node] = 1;
+            }
+            break;
+        case GGML_OP_MUL:
+        case GGML_OP_MUL_MAT:
+            m_node_dynamic_dims[node] = -1;
+            if (m_node_dynamic_dims[node->src[0]] != -1) {
+                m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
+            }
+            if (m_node_dynamic_dims[node->src[1]] != -1) {
+                m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[1]];
+            }
+            break;
+        case GGML_OP_VIEW:
+        case GGML_OP_FLASH_ATTN_EXT:
+        case GGML_OP_PERMUTE:
+        case GGML_OP_RESHAPE:
+            m_node_dynamic_dims[node] = -1;
+            if (m_node_dynamic_dims[node->src[0]] != -1) {
+                auto dynamic_dim_idx = m_node_dynamic_dims[node->src[0]];
+                auto dynamic_dim_value = node->src[0]->ne[dynamic_dim_idx];
+                int same_dim_count = 0;
+                for (int i = 0; i < 4; i++) {
+                    if (node->ne[i] == dynamic_dim_value) {
+                        m_node_dynamic_dims[node] = i;
+                        same_dim_count++;
+                    }
+                }
+                if (same_dim_count != 1) {
+                    std::cout << "Cannot determine dynamic dim for node: " << node->name << std::endl;
+                }
+            }
+            break;
+        case GGML_OP_RMS_NORM:
+        case GGML_OP_ADD:
+        case GGML_OP_GLU:
+        case GGML_OP_ROPE:
+        case GGML_OP_SCALE:
+            m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
+            break;
+        case GGML_OP_CPY:
+        case GGML_OP_SET_ROWS:
+            m_node_dynamic_dims[node] = -1;
+            break;
+        default:
+            std::cout << "Doesn't handle node name: " << node->name << " op: " << ggml_op_name(node->op) << std::endl;
+            break;
+        }
+    };
+
+    for (int i = 0; i < m_cgraph->n_nodes; i++) {
+        ggml_tensor * node = m_cgraph->nodes[i];
+        visit_node(visit_node, node);
+    }
+}
+
+/**
+ * @brief Adds extra model outputs to support fallback mechanisms.
+ *
+ * This function ensures that all relevant nodes in the computation graph are included
+ * as model outputs for fallback scenarios. It creates a mapping of tensor data addresses
+ * to their corresponding nodes, excluding nodes with the GGML_OP_VIEW operation.
+ *
+ * Key behaviors:
+ * - Iterates through all nodes in the computation graph and maps their data addresses
+ *   to the corresponding tensor nodes, skipping nodes with GGML_OP_VIEW.
+ * - Adds nodes to the `m_model_outputs` map if they are not already present, using
+ *   the tensor's name as the key.
+ *
+ * This function is essential for ensuring that fallback mechanisms have access to all
+ * necessary model outputs, particularly in scenarios where certain outputs are not
+ * explicitly defined in the original model configuration.
+ */
+void GgmlOvDecoder::add_extra_model_outputs_for_fallback() {
+    std::map<void *, ggml_tensor *> address_map;
+    for (int i = 0; i < m_cgraph->n_nodes; i++) {
+        ggml_tensor * node = m_cgraph->nodes[i];
+        if (node->op == GGML_OP_VIEW) {
+            continue;
+        }
+        address_map[node->data] = node;
+    }
+
+    for (const auto & pair : address_map) {
+        const std::string & name = pair.second->name;
+        if (m_model_outputs.find(name) == m_model_outputs.end()) {
+            m_model_outputs[name] = pair.second;
+        }
+    }
+}
+
+/**
+* @brief Adds extra model inputs to support fallback mechanisms.
+*
+* This function ensures that all necessary input nodes in the computation graph are
+* included as model inputs for fallback scenarios. It iterates through the source nodes
+* of each computation graph node and adds them to the `m_model_inputs` map if they meet
+* specific criteria.
+*
+* Key behaviors:
+* - Skips source nodes that are already present in `m_model_weights` or `m_model_inputs`.
+* - Excludes intermediate nodes that are part of `m_node_info_list`.
+* - For eligible source nodes, creates OpenVINO parameter nodes with appropriate types
+*   and shapes, and assigns them friendly names.
+* - Updates the `m_inputs` and `m_model_inputs` maps with the new parameter nodes.
+*
+* This function is critical for ensuring that fallback mechanisms have access to all
+* required model inputs, particularly in scenarios where certain inputs are not
+* explicitly defined in the original model configuration.
+*/
+void GgmlOvDecoder::add_extra_model_inputs_for_fallback() {
+    for (int i = 0; i < m_cgraph->n_nodes; i++) {
+        ggml_tensor * node = m_cgraph->nodes[i];
+        for (int i = 0; i < GGML_MAX_SRC; i++) {
+            auto * src = node->src[i];
+            if (src == nullptr) {
+                continue;
+            }
+            std::string src_name = std::string(src->name);
+            if (m_model_weights.find(src_name) != m_model_weights.end()) {
+                continue;
+            }
+
+            bool is_intermediate_node = false;
+            for (const auto & node_info : m_node_info_list) {
+                if (node_info.node == src) {
+                    is_intermediate_node = true;
+                    break;
+                }
+            }
+            if (is_intermediate_node) {
+                continue;
+            }
+            if (m_model_inputs.find(src_name) != m_model_inputs.end()) {
+                continue;
+            }
+
+            m_inputs[src_name] = src;
+            auto param_node = std::make_shared<ov::op::v0::Parameter>(
+                get_ov_type(src), get_graph_input_shape(node, src, m_node_dynamic_dims[src]));
+            param_node->set_friendly_name(src_name);
+            param_node->output(0).get_tensor().set_names({src_name});
+            m_model_inputs[src_name] = param_node;
+        }
+    }
+}
diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h
index 111eb7200b8..341bc768501 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@@ -175,7 +175,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     virtual bool is_static() const override { return m_is_static; }
 
-    ov::PartialShape get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input) const;
+    ov::PartialShape get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input, int dynamic_dim_index=-1) const;
 
     static void dump_cgraph(const ggml_cgraph * cgraph, std::string & filename);
 
@@ -202,8 +202,11 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     void set_compute_params(const ComputeParams & compute_params) { m_compute_params = compute_params; }
 
+    virtual bool is_full_model() const override {return m_is_full_model; }
+
     bool m_is_static = false;
     bool m_is_prefill = false;
+    bool m_is_full_model = true; // label the cgraph is splited or not
     int m_prefill_chunk_size = 0;
 
     static std::vector<size_t> get_shape(const ggml_tensor * tensor);
@@ -216,6 +219,13 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
     void set_input_output(ggml_tensor * node, bool naive = false);
     int compute_op_case(const ggml_tensor * node) const;
 
+    // @brief Computes the dynamic dimensions for the computation graph nodes to support fallback mechanisms.
+    void compute_cgraph_dynamic_dims();
+    // @brief Adds extra model outputs to support fallback mechanisms.
+    void add_extra_model_outputs_for_fallback();
+    // @brief Adds extra model inputs to support fallback mechanisms.
+    void add_extra_model_inputs_for_fallback();
+
     void validate_cgraph() const;
 
     ggml_cgraph * m_cgraph = nullptr;
@@ -228,6 +238,10 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
     std::map<std::string, std::shared_ptr<ov::Node>> m_model_weights;
     std::map<std::string, ggml_tensor *> m_model_outputs;
     std::vector<NodeInfo> m_node_info_list;
+    std::map<ggml_tensor *, int> m_node_dynamic_dims; // map from ggml_tensor to its dynamic dimension index, -1 means static
+
+    bool has_inp_tokens = false;
+    bool has_output = false;
 
     ModelParams m_model_params;
     ComputeParams m_compute_params;
diff --git a/ggml/src/ggml-openvino/openvino/decoder.hpp b/ggml/src/ggml-openvino/openvino/decoder.hpp
index 1603c7fd201..71d3c26e9c9 100644
--- a/ggml/src/ggml-openvino/openvino/decoder.hpp
+++ b/ggml/src/ggml-openvino/openvino/decoder.hpp
@@ -53,6 +53,8 @@ class GgmlDecoder : public DecoderBase {
 
     virtual int get_op_case(int node_idx) const = 0;
 
+    virtual bool is_full_model() const = 0;
+
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_inputs() const = 0;
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_extra_inputs() const = 0;
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_weights() const = 0;
diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp
index 546778a4707..45fe19d4918 100644
--- a/ggml/src/ggml-openvino/openvino/translate_session.cpp
+++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp
@@ -190,7 +190,7 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
         }
     };
 
-    if (!m_naive) {
+    if (!m_naive && ggml_model_decoder->is_full_model()) {
         preprocess(*tensor_map, *ggml_model_decoder);
     }
     ggml_model_decoder->visit_subgraph(node_visitor);
diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp
index 1f94d4bad60..5e0f5cb097d 100644
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@@ -3,9 +3,9 @@
 #include "ggml-impl.h"
 #include "ggml-openvino/ggml-decoder.h"
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "openvino/frontend.hpp"
 #include "openvino/input_model.hpp"
-
 #include <algorithm>
 #include <cassert>
 #include <cmath>
@@ -432,7 +432,7 @@ std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string & dev
 }
 
 bool is_naive(ggml_cgraph * cgraph) {
-    constexpr int naive_graph_size_threshold = 20;
+    constexpr int naive_graph_size_threshold = 0;
     return cgraph->n_nodes < naive_graph_size_threshold;
 }
 
@@ -480,12 +480,77 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
     const auto * ggml_tensor = ggml_decoder->get_input_ggml_tensor(name);
     auto * input_data = ggml_tensor->data;
     ov::Shape input_shape;
-    if (ggml_tensor->op == GGML_OP_VIEW) {
+    if (0) {
         // This case is added to make test-backend-ops work
         input_shape = ggml_decoder->get_shape(ggml_tensor->view_src);
     } else {
         input_shape =  ggml_decoder->get_shape(ggml_tensor);
     }
+
+    // If the tensor is a result of PERMUTE operation, use ggml_cont to make it contiguous
+    if (ggml_tensor->op == GGML_OP_PERMUTE && !ggml_decoder->is_full_model()) {
+        // Create a temporary context for ggml_cont operation
+        // Need space for: tensor overhead, tensor data, graph structure, and work buffer
+        size_t mem_size = ggml_tensor_overhead() * 4 + ggml_nbytes(ggml_tensor) * 2 + 1024 * 1024;
+        struct ggml_init_params params = {
+            /*.mem_size   =*/mem_size,
+            /*.mem_buffer =*/NULL,
+            /*.no_alloc   =*/false,
+        };
+        struct ggml_context * temp_ctx = ggml_init(params);
+        if (temp_ctx == NULL) {
+            throw std::runtime_error("Failed to initialize temporary context for PERMUTE");
+        }
+
+        // Create contiguous tensor using ggml_cont
+        struct ggml_tensor * cont_tensor = ggml_cont(temp_ctx, const_cast<struct ggml_tensor *>(ggml_tensor));
+
+        // Build a simple graph to compute ggml_cont
+        struct ggml_cgraph * gf = ggml_new_graph(temp_ctx);
+        ggml_build_forward_expand(gf, cont_tensor);
+        ggml_graph_compute_with_ctx(temp_ctx, gf, 1);
+
+        // Create OpenVINO tensor with contiguous data
+        ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
+        memcpy(input_tensor.data(), cont_tensor->data, ggml_nbytes(cont_tensor));
+
+        // Free temporary context
+        ggml_free(temp_ctx);
+
+        return input_tensor;
+    }
+
+    // If the tensor is a result of VIEW operation, use ggml_cont to make it contiguous
+    if (ggml_tensor->op == GGML_OP_VIEW && !ggml_decoder->is_full_model()) {
+        // if the ggml_tensor shape size is equal to the source tensor shape size, no need to reconstruct the ov input tensor data
+        if (ggml_nelements(ggml_tensor) == ggml_nelements(ggml_tensor->view_src)) {
+            auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data);
+            return input_tensor;
+        }
+
+        // Create OpenVINO input tensor, the data need to reconstructed based on the view tensor shape & stride
+        // Todo: parallel copy & the copy the whole last dim one loop (perf improve)
+        ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
+        const auto * src_tensor = ggml_tensor->view_src;
+        size_t des_index = 0;
+        for (size_t i0 = 0; i0 < static_cast<size_t>(ggml_tensor->ne[3]); i0++) {
+            for (size_t i1 = 0; i1 < static_cast<size_t>(ggml_tensor->ne[2]); i1++) {
+                for (size_t i2 = 0; i2 < static_cast<size_t>(ggml_tensor->ne[1]); i2++) {
+                    for (size_t i3 = 0; i3 < static_cast<size_t>(ggml_tensor->ne[0]); i3++) {
+                    size_t src_index = ggml_tensor->view_offs + i0 * ggml_tensor->nb[3] + i1 * ggml_tensor->nb[2] +
+                                       i2 * ggml_tensor->nb[1] + i3 * ggml_tensor->nb[0];
+
+                    memcpy(static_cast<char *>(input_tensor.data()) + des_index,
+                           static_cast<const char *>(src_tensor->data) + src_index, ggml_tensor->nb[0]);
+                    des_index += ggml_tensor->nb[0];
+                }
+            }
+            }
+        }
+        return input_tensor;
+    }
+
+
     auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data);
     return input_tensor;
 }