openvinotoolkit · maxnick · Jan 22, 2026 · Dec 15, 2025
diff --git a/include/oneapi/dnnl/dnnl_types.h b/include/oneapi/dnnl/dnnl_types.h
@@ -64,24 +64,6 @@ typedef enum {
     dnnl_format_kind_max = 0x7fff,
 } dnnl_format_kind_t;
 
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-/// Sparse encodings.
-typedef enum {
-    /// Undefined sparse encoding kind, used for empty memory descriptors.
-    dnnl_sparse_encoding_undef = 0,
-    /// Compressed Sparse Row (CSR) encoding.
-    dnnl_csr,
-    /// An encoding that is used for an opaque storage schema for
-    /// tensors with unstructured sparsity. A memory descriptor with the
-    /// packed encoding cannot be used to create a memory object. It can
-    /// only be used to create a primitive descriptor to query the
-    /// actual memory descriptor (similar to the format tag `any`).
-    dnnl_packed,
-    /// Coordinate Sparse Encoding (COO).
-    dnnl_coo,
-} dnnl_sparse_encoding_t;
-#endif
-
 #ifdef DNNL_EXPERIMENTAL_PROFILING
 /// Profiling data kind.
 typedef enum {
@@ -2371,6 +2353,10 @@ typedef enum {
     dnnl_sparse_encoding_packed,
     dnnl_sparse_encoding_csr,
     dnnl_sparse_encoding_coo,
+    // Legacy aliases for backward compatibility.
+    dnnl_packed = dnnl_sparse_encoding_packed,
+    dnnl_csr = dnnl_sparse_encoding_csr,
+    dnnl_coo = dnnl_sparse_encoding_coo,
 } dnnl_sparse_encoding_t;
 
 /* typedef struct dnnl_sparse_desc *dnnl_sparse_desc_t; */
@@ -2925,13 +2911,12 @@ typedef enum {
     dnnl_query_inner_nblks_s32, ///< number of innermost blocks
     dnnl_query_inner_blks, ///< vector of sizes of the innermost blocks
     dnnl_query_inner_idxs, ///< vector of logical indices of the blocks
-#ifdef DNNL_EXPERIMENTAL_SPARSE
     dnnl_query_sparse_encoding, ///< Sparse encoding
+#ifdef DNNL_EXPERIMENTAL_SPARSE
     dnnl_query_nnz_s64, ///< Number of non-zero entries
     dnnl_query_num_handles_s32, ///< Number of buffers required for a memory
 ///  descriptor
 #endif
-    dnnl_query_sparse_encoding,
 
     // Max value to prevent UB for internal use only dnnl_query_t
     dnnl_query_max = 0x7fff,

diff --git a/src/cpu/aarch64/acl_reorder.cpp b/src/cpu/aarch64/acl_reorder.cpp
diff --git a/src/cpu/aarch64/acl_reorder.hpp b/src/cpu/aarch64/acl_reorder.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2025 Arm Ltd. and affiliates
+* Copyright 2025 Arm Ltd. and affiliates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -13,239 +13,22 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
-#ifndef CPU_ACL_REORDER_HPP
-#define CPU_ACL_REORDER_HPP
 
-#include "arm_compute/core/Types.h"
-#include "common/utils.hpp"
-#include "cpu/acl/acl_utils.hpp"
-#include "cpu/aarch64/cpu_isa_traits.hpp"
-#include "cpu/reorder/cpu_reorder_pd.hpp"
+#ifndef CPU_AARCH64_ACL_REORDER_HPP
+#define CPU_AARCH64_ACL_REORDER_HPP
 
+// Keep include path compatibility with code that expects this header.
+#include "cpu/aarch64/reorder/acl_reorder.hpp"
+
+// Provide the expected cpu::acl namespace alias used by common headers.
 namespace dnnl {
 namespace impl {
 namespace cpu {
 namespace acl {
-
-struct acl_reorder_obj_t {
-    arm_compute::NEReorderLayer reorder;
-    arm_compute::Tensor src_tensor;
-    arm_compute::Tensor dst_tensor;
-    arm_compute::WeightFormat src_wf;
-    arm_compute::WeightFormat dst_wf;
-};
-
-struct acl_reorder_conf_t {
-    arm_compute::TensorInfo src_info;
-    arm_compute::TensorInfo dst_info;
-    arm_compute::WeightFormat src_wf;
-    arm_compute::WeightFormat dst_wf;
-};
-
-struct acl_reorder_resource_t : public resource_t {
-    acl_reorder_resource_t()
-        : acl_obj_(utils::make_unique<acl_reorder_obj_t>()) {}
-
-    status_t configure(const acl_reorder_conf_t &app) {
-        if (!acl_obj_) return status::out_of_memory;
-
-        // Init Compute Library tensors based on info from descriptor
-        acl_obj_->src_tensor.allocator()->init(app.src_info);
-        acl_obj_->dst_tensor.allocator()->init(app.dst_info);
-
-        // clang-format off
-        acl_obj_->reorder.configure(
-            &acl_obj_->src_tensor,
-            &acl_obj_->dst_tensor,
-            app.src_wf,
-            app.dst_wf
-            );
-        // clang-format on
-
-        return status::success;
-    }
-
-    acl_reorder_obj_t &get_acl_obj() const { return *acl_obj_; }
-    DNNL_DISALLOW_COPY_AND_ASSIGN(acl_reorder_resource_t);
-
-private:
-    std::unique_ptr<acl_reorder_obj_t> acl_obj_;
-}; // acl_reorder_resource_t
-
-struct acl_reorder_fwd_t : public primitive_t {
-    using primitive_t::primitive_t;
-    struct pd_t : public cpu_reorder_pd_t {
-
-        using cpu_reorder_pd_t::cpu_reorder_pd_t;
-
-        DECLARE_COMMON_PD_T("acl", acl_reorder_fwd_t);
-
-        static status_t create(reorder_pd_t **reorder_pd, engine_t *engine,
-                const primitive_attr_t *attr, engine_t *src_engine,
-                const memory_desc_t *src_md, engine_t *dst_engine,
-                const memory_desc_t *dst_md) {
-
-            using namespace acl_utils;
-
-            // ACL reorder support f32->f32 and f32->bf16
-            bool ok = src_md->data_type == data_type::f32
-                    && utils::one_of(
-                            dst_md->data_type, data_type::f32, data_type::bf16)
-                    && attr->has_default_values();
-
-            if (!ok) return status::unimplemented;
-
-            if (!attr->scales_.has_default_values(DNNL_ARG_DST)) {
-                int mask = attr->scales_.get_mask(DNNL_ARG_DST);
-                const memory_desc_wrapper input_d(src_md);
-                if (input_d.has_runtime_dims_or_strides() && mask > 0)
-                    return status::unimplemented;
-            }
-
-            // Create and check primitive descriptor
-            auto _pd = make_unique_pd<pd_t>(attr, src_engine->kind(), src_md,
-                    dst_engine->kind(), dst_md);
-            if (_pd == nullptr) return status::out_of_memory;
-            if (_pd->init(engine, src_engine, dst_engine) != status::success) {
-                return status::unimplemented;
-            }
-
-            // In case we have two or four dimensions, we can't have one of the
-            // two first dimensions as 1. This is valid for f32->f32 and f32->bf16.
-            if (dst_md->dims[0] == 1 || dst_md->dims[1] == 1) {
-                return status::unimplemented;
-            }
-
-            auto src_tag = memory_desc_matches_one_of_tag(
-                    *src_md, format_tag::ab, format_tag::ba, format_tag::cdba);
-            ACL_CHECK_SUPPORT(format_tag::undef == src_tag,
-                    "Only ab, ba or cdba source formats supported");
-
-            auto dst_tag = memory_desc_matches_one_of_tag(*dst_md,
-                    format_tag::BA8b4a, format_tag::BA4b4a, format_tag::Ab4a,
-                    format_tag::Ab8a, format_tag::Acdb8a, format_tag::Acdb4a);
-            ACL_CHECK_SUPPORT(format_tag::undef == dst_tag,
-                    "Only Ab4a/Ab8a, BA8b4a/BA4b4a and Acdb8a/Acdb4a "
-                    "destination formats supported");
-
-            if (dst_tag == format_tag::BA4b4a || dst_tag == format_tag::Acdb4a
-                    || dst_tag == format_tag::Ab4a) {
-                _pd->app_.dst_wf = arm_compute::WeightFormat::OHWIo4;
-            } else if (aarch64::mayiuse(aarch64::sve_256)
-                    && (dst_tag == format_tag::BA8b4a
-                            || dst_tag == format_tag::Acdb8a
-                            || dst_tag == format_tag::Ab8a)) {
-                _pd->app_.dst_wf = arm_compute::WeightFormat::OHWIo8;
-            } else {
-                return status::unimplemented;
-            }
-
-            arm_compute::TensorShape acl_tensor_shape_in;
-            arm_compute::TensorShape acl_tensor_shape_out;
-
-            // Switch for 2 or 4 dim tensors
-            switch (src_md->ndims) {
-                case 2: {
-                    if (src_tag == format_tag::ab
-                            && dst_md->data_type == data_type::bf16
-                            && utils::one_of(dst_tag, format_tag::BA8b4a,
-                                    format_tag::BA4b4a)) { // bf16
-                        acl_tensor_shape_in = arm_compute::TensorShape(
-                                src_md->dims[0], src_md->dims[1]);
-                        acl_tensor_shape_out = arm_compute::TensorShape(
-                                dst_md->padded_dims[0], dst_md->padded_dims[1]);
-                    } else if (src_tag == format_tag::ba
-                            && dst_md->data_type == data_type::f32
-                            && !utils::one_of(dst_tag, format_tag::BA8b4a,
-                                    format_tag::BA4b4a)) { // f32
-                        acl_tensor_shape_in = arm_compute::TensorShape(
-                                src_md->dims[1], src_md->dims[0]);
-                        acl_tensor_shape_out = arm_compute::TensorShape(
-                                dst_md->padded_dims[1], dst_md->padded_dims[0]);
-                    } else {
-                        return status::unimplemented;
-                    }
-                } break;
-                case 4: {
-                    // Currently only supporting AxBx1x1 cases
-                    if (dst_md->dims[2] != 1 || dst_md->dims[3] != 1) {
-                        return status::unimplemented;
-                    }
-
-                    acl_tensor_shape_in = arm_compute::TensorShape(
-                            src_md->dims[3], src_md->dims[2], src_md->dims[1],
-                            src_md->dims[0]);
-                    acl_tensor_shape_out = arm_compute::TensorShape(
-                            dst_md->padded_dims[3], dst_md->padded_dims[2],
-                            dst_md->padded_dims[1], dst_md->padded_dims[0]);
-                    break;
-                }
-                default: return status::unimplemented;
-            }
-
-            // Choose the data layout
-            const auto acl_layout = arm_compute::DataLayout::NCHW;
-
-            // Set Source WeightFormat
-            _pd->app_.src_wf = arm_compute::WeightFormat::OHWI;
-
-            // Create ACL tensor infos
-            const arm_compute::DataType src_acl_data_t
-                    = acl_utils::get_acl_data_t(src_md->data_type);
-            _pd->app_.src_info = arm_compute::TensorInfo(
-                    acl_tensor_shape_in, 1, src_acl_data_t, acl_layout);
-
-            const arm_compute::DataType dst_acl_data_t
-                    = acl_utils::get_acl_data_t(dst_md->data_type);
-            _pd->app_.dst_info = arm_compute::TensorInfo(
-                    acl_tensor_shape_out, 1, dst_acl_data_t, acl_layout);
-
-            ACL_CHECK_VALID(arm_compute::NEReorderLayer::validate(
-                    &_pd->app_.src_info, &_pd->app_.dst_info, _pd->app_.src_wf,
-                    _pd->app_.dst_wf));
-
-            // Init scratch memory, not used so 0 in this implementation
-            _pd->init_scratchpad_md();
-
-            return safe_ptr_assign(*reorder_pd, _pd.release());
-        } // create
-
-        friend dnnl::impl::impl_list_item_t;
-        acl_reorder_conf_t app_;
-
-    }; // pd_t
-
-    acl_reorder_fwd_t(const pd_t *apd) : primitive_t(apd) {}
-
-    status_t create_resource(
-            engine_t *engine, resource_mapper_t &mapper) const override {
-        if (mapper.has_resource(this)) return status::success;
-
-        auto r = utils::make_unique<acl_reorder_resource_t>();
-        if (!r) return status::out_of_memory;
-
-        // Configure the resource based on information from primitive descriptor
-        CHECK(r->configure(pd()->app_));
-
-        mapper.add(this, std::move(r));
-        return status::success;
-    }
-
-    status_t execute(const exec_ctx_t &ctx) const override {
-        return execute_forward(ctx);
-    }
-
-private:
-    // To guard the const execute_forward, the mutex must be 'mutable'
-    mutable std::mutex mtx;
-    status_t execute_forward(const exec_ctx_t &ctx) const;
-    const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
-
-}; // acl_reorder_fwd_t
-
+using aarch64::acl_reorder_fwd_t;
 } // namespace acl
 } // namespace cpu
 } // namespace impl
 } // namespace dnnl
 
-#endif // CPU_ACL_REORDER_HPP
+#endif