From 1956ed3c97e5c0f9427ccb4116e8ebc1ec9630dd Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 08:20:59 +0000
Subject: [PATCH 01/17] Adjusted cmake

Co-authored-by: Fabian Hofer <Integer-Ctrl@users.noreply.github.com>
---
 CMakeLists.txt                           | 3 ++-
 include/MachineLearningCompiler/Tensor.h | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 17275e1..bb23771 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -436,7 +436,7 @@ target_include_directories(${PROJECT_NAME}
     PUBLIC
         # using the project name as additional directory to include <project_name>/header.h instead of header.h if it is included as internal library
         # where top-level project will look for the library's public headers
-        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/${PROJECT_NAME}>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
         # where external projects will look for the library's public headers
         $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
 )
@@ -471,6 +471,7 @@ install(EXPORT "${PROJECT_NAME}Targets"
     NAMESPACE ${namespace}::
     DESTINATION cmake
 )
+add_library(mlc::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
 
 include(CMakePackageConfigHelpers)
 
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index d70236c..fca0995 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -12,6 +12,12 @@ namespace mlc
     float *data;
     std::vector<uint64_t> dim_sizes;
 
+    /**
+     * @brief Construct a new Tensor with with a pointer to memory and the dimension sizes sorted in by stride in descending order.
+     *
+     * @param data The pointer to the data array.
+     * @param dim_sizes The dimension sizes sorted by stride in descending order.
+     */
     inline Tensor(float *data, const std::vector<uint64_t> &dim_sizes) : data(data), dim_sizes(dim_sizes) {};
   };
 

From 4c8cafc00d1b6cf5eff527747c89e68b2e8e05fd Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 08:28:50 +0000
Subject: [PATCH 02/17] feat: tensor constructor

---
 include/MachineLearningCompiler/Tensor.h | 27 +++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index fca0995..f98da0e 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -9,6 +9,7 @@ namespace mlc
 {
   struct Tensor
   {
+    bool ownsData = false;
     float *data;
     std::vector<uint64_t> dim_sizes;
 
@@ -18,7 +19,31 @@ namespace mlc
      * @param data The pointer to the data array.
      * @param dim_sizes The dimension sizes sorted by stride in descending order.
      */
-    inline Tensor(float *data, const std::vector<uint64_t> &dim_sizes) : data(data), dim_sizes(dim_sizes) {};
+    Tensor(float *data, const std::vector<uint64_t> &dim_sizes) : data(data), dim_sizes(dim_sizes) {};
+
+    /**
+     * @brief Construct a new Tensor with the dimension sizes sorted by stride in descending order.
+     *
+     * @param dim_sizes The dimension sizes sorted by stride in descending order.
+     */
+    Tensor(const std::vector<uint64_t> &dim_sizes) : dim_sizes(dim_sizes)
+    {
+      uint64_t size = 1;
+      for (auto dim : dim_sizes)
+      {
+        size *= dim;
+      }
+      data = new float[size];
+      ownsData = true;
+    };
+
+    ~Tensor()
+    {
+      if (ownsData && data != nullptr)
+      {
+        delete[] data;
+      }
+    }
   };
 
   /**

From fa9cebeeeb34e5afd4be0d835d5fe5980a717e50 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 08:47:17 +0000
Subject: [PATCH 03/17] fix

---
 include/MachineLearningCompiler/Tensor.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index f98da0e..ededcdf 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -2,6 +2,7 @@
 #define MLC_TENSOR_H
 
 #include <cstdint>
+#include <functional>
 #include <string>
 #include <vector>
 
@@ -10,7 +11,7 @@ namespace mlc
   struct Tensor
   {
     bool ownsData = false;
-    float *data;
+    float *data = nullptr;
     std::vector<uint64_t> dim_sizes;
 
     /**
@@ -42,6 +43,7 @@ namespace mlc
       if (ownsData && data != nullptr)
       {
         delete[] data;
+        data = nullptr;
       }
     }
   };
@@ -60,7 +62,7 @@ namespace mlc
    * @param output The output tensor.
    * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
    */
-  void einsum(const std::vector<Tensor> &inputs, Tensor &output, const std::string &tree);
+  void einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
 
 }  // namespace mlc
 

From 4e98d80526df15684a45b39591ef60c0f74fa76f Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 08:51:16 +0000
Subject: [PATCH 04/17] fix2

---
 src/interface/Tensor.cpp    | 4 ++--
 src/interface/TensorUtils.h | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index 67a7eea..7b74525 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -34,7 +34,7 @@ void mlc::fill_random(Tensor &tensor)
   }
 }
 
-void mlc::einsum(const std::vector<Tensor> &inputs, Tensor &output, const std::string &tree)
+void mlc::einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
 {
   mini_jit::EinsumTree einsumTree(tree);
   mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
@@ -47,7 +47,7 @@ void mlc::einsum(const std::vector<Tensor> &inputs, Tensor &output, const std::s
   std::vector<void *> tensors(inputs.size() + 1);
   for (size_t i = 0; i < inputs.size(); i++)
   {
-    tensors[i] = inputs[i].data;
+    tensors[i] = inputs[i].get().data;
   }
   tensors[inputs.size()] = output.data;
 
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 8abcba3..d69a69a 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -2,14 +2,15 @@
 #include "../main/EinsumTree.h"
 #include <iostream>
 
-constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor> &inputs,
+constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
+                                           const std::vector<std::reference_wrapper<mlc::Tensor>> &inputs,
                                            std::vector<int64_t> &sorted_dim_sizes)
 {
   if (root->left != nullptr)
   {
     if (root->left->type == mini_jit::EinsumTree::NodeType::Leaf)
     {
-      const auto &dim_sizes = inputs[root->left->input_tensor_index].dim_sizes;
+      const auto &dim_sizes = inputs[root->left->input_tensor_index].get().dim_sizes;
       uint i = 0;
       for (int64_t id : root->left->output_dim_ids)
       {
@@ -27,7 +28,7 @@ constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNod
   {
     if (root->right->type == mini_jit::EinsumTree::NodeType::Leaf)
     {
-      const auto &dim_sizes = inputs[root->right->input_tensor_index].dim_sizes;
+      const auto &dim_sizes = inputs[root->right->input_tensor_index].get().dim_sizes;
       uint i = 0;
       for (int64_t id : root->right->output_dim_ids)
       {

From 7f48100450b3b9df4f6caeb1a475265e23769fef Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 10:07:12 +0000
Subject: [PATCH 05/17] Started setup

---
 include/MachineLearningCompiler/Error.h  | 55 ++++++++++++++++++++++
 include/MachineLearningCompiler/Setup.h  | 59 ++++++++++++++++++++++++
 include/MachineLearningCompiler/Tensor.h | 16 ++++++-
 src/interface/Setup.cpp                  |  1 +
 src/interface/SetupEinsum.cpp            | 43 +++++++++++++++++
 src/interface/SetupEinsum.h              | 27 +++++++++++
 src/interface/Tensor.cpp                 | 31 +++++++++++--
 src/interface/TensorUtils.h              | 56 ++++++++++++++++++++--
 8 files changed, 277 insertions(+), 11 deletions(-)
 create mode 100644 include/MachineLearningCompiler/Error.h
 create mode 100644 include/MachineLearningCompiler/Setup.h
 create mode 100644 src/interface/Setup.cpp
 create mode 100644 src/interface/SetupEinsum.cpp
 create mode 100644 src/interface/SetupEinsum.h

diff --git a/include/MachineLearningCompiler/Error.h b/include/MachineLearningCompiler/Error.h
new file mode 100644
index 0000000..a720c4e
--- /dev/null
+++ b/include/MachineLearningCompiler/Error.h
@@ -0,0 +1,55 @@
+#ifndef MLC_ERROR_H
+#define MLC_ERROR_H
+#include <string>
+#include <cstdint>
+
+namespace mlc
+{
+  enum ErrorType : int64_t
+  {
+    Undefined = -1,
+    None = 0,
+
+    // Parse Errors
+    ParseExpectedLeftBracket = 1,
+    ParseExpectedRightBracket = 2,
+    ParseExpectedArrow = 3,
+    ParseExpectedComma = 4,
+    ParseExpectedDimensionList = 5,
+    ParseNotAllowedToParseAgain = 6,
+    ParseUndefinedNode = 7,
+
+    // Einsum Errors
+    EinsumInvalidRoot = 8,
+    EinsumNotEnoughInputTensors = 9,
+    EinsumTooManyInputTensors = 10,
+    EinsumNullPtrAsInputTensor = 11,
+
+    // Execute Errors
+    ExecuteWrongDType = 101,
+    ExecuteWrongDimension = 102,
+    ExecuteWrongPrimitive = 103,
+    ExecuteFirstTouchPrimitive = 104,
+    ExecuteWrongFirstTouchPrimitive = 104,
+    ExecuteWrongMainPrimitive = 105,
+    ExecuteWrongLastTouchPrimitive = 106,
+    ExecuteTypeNotSupported = 107,
+    ExecuteInvalidPrimitiveConfiguration = 108,
+    ExecuteInvalidFirstTouchConfiguration = 109,
+    ExecuteInvalidMainConfiguration = 110,
+    ExecuteInvalidLastTouchConfiguration = 111,
+    ExecuteInvalidExecutionOrder = 112,
+    ExecuteInvalidStrides = 113,
+    ExecuteKDimensionMustNotBeShared = 114,
+    ExecuteSharedRequiredForParallelExecution = 115,
+  };
+
+  struct Error
+  {
+    ErrorType type;
+    std::string message;
+  };
+
+}  // namespace mlc
+
+#endif  // MLC_ERROR_H
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/Setup.h b/include/MachineLearningCompiler/Setup.h
new file mode 100644
index 0000000..471e653
--- /dev/null
+++ b/include/MachineLearningCompiler/Setup.h
@@ -0,0 +1,59 @@
+#ifndef MLC_SETUP_H
+#define MLC_SETUP_H
+
+#include "Error.h"
+#include "Tensor.h"
+#include <cstdint>
+#include <vector>
+
+namespace mlc
+{
+  class Setup
+  {
+  public:
+    /**
+     * @brief Executes the setup einsum expression with input tensor of the same size.
+     *
+     * @param inputs The inputs to be einsum calculation.
+     * @param output The output of the einsum calculation.
+     * @return Error The error during the
+     */
+    virtual Error execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output) = 0;
+
+    /**
+     * @brief Executes the setup einsum expression with input tensor of the same size.
+     *
+     * @param inputs The inputs to be einsum calculation.
+     * @param output The output of the einsum calculation.
+     * @return Error The error during the
+     */
+    virtual Error execute(const std::vector<Tensor *> &inputs, Tensor &output) = 0;
+
+    /**
+     * @brief Gets the error that was produces during the setup of the tree.
+     *
+     * @return Error The error that was produces during the setup.
+     */
+    virtual Error getSetupError() const = 0;
+  };
+
+  /**
+   * @brief
+   *
+   * @param inputs The input tensors.
+   * @param output The output tensor.
+   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
+   */
+  Setup &einsum_setup(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
+
+  /**
+   * @brief Executes contractions based on the given tree.
+   *
+   * @param inputs The input tensors.
+   * @param output The output tensor.
+   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
+   */
+  Setup &einsum_setup(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
+}  // namespace mlc
+
+#endif  // MLC_SETUP_H
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index ededcdf..b284aab 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -1,6 +1,7 @@
 #ifndef MLC_TENSOR_H
 #define MLC_TENSOR_H
 
+#include "Error.h"
 #include <cstdint>
 #include <functional>
 #include <string>
@@ -38,6 +39,9 @@ namespace mlc
       ownsData = true;
     };
 
+    /**
+     * @brief Destroys the tensor.
+     */
     ~Tensor()
     {
       if (ownsData && data != nullptr)
@@ -56,14 +60,22 @@ namespace mlc
   void fill_random(Tensor &tensor);
 
   /**
-   * @brief Executes contractions based on the given tree.
+   * @brief
    *
    * @param inputs The input tensors.
    * @param output The output tensor.
    * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
    */
-  void einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
+  Error einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
 
+  /**
+   * @brief Executes contractions based on the given tree.
+   *
+   * @param inputs The input tensors.
+   * @param output The output tensor.
+   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
+   */
+  Error einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
 }  // namespace mlc
 
 #endif  // MLC_TENSOR
\ No newline at end of file
diff --git a/src/interface/Setup.cpp b/src/interface/Setup.cpp
new file mode 100644
index 0000000..93dbc80
--- /dev/null
+++ b/src/interface/Setup.cpp
@@ -0,0 +1 @@
+#include "Setup.h"
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.cpp b/src/interface/SetupEinsum.cpp
new file mode 100644
index 0000000..b680ab2
--- /dev/null
+++ b/src/interface/SetupEinsum.cpp
@@ -0,0 +1,43 @@
+#include "SetupEinsum.h"
+#include "TensorUtils.h"
+#include <utility>
+
+mlc::SetupEinsum::SetupEinsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
+    : einsumTree(tree)
+{
+  std::vector<Tensor *> tensors(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i)
+  {
+    tensors[i] = &(inputs[i].get());
+  }
+
+  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+  {
+    mlc::ErrorType type = ::convertParseError(errorParse);
+    error = {type, ""};  // TODO add error message
+  }
+
+  std::vector<int64_t> sorted_dim_sizes;
+  ::get_sorted_dimensions_sizes(einsumTree.get_root(), tensors, sorted_dim_sizes);
+  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+  error = {mlc::ErrorType::None, "Success"};
+}
+
+mlc::SetupEinsum::SetupEinsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree) : einsumTree(tree)
+{
+  mini_jit::EinsumTree einsumTree(tree);
+  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+  {
+    mlc::ErrorType type = ::convertParseError(errorParse);
+    error = {type, ""};  // TODO add error message
+  }
+
+  std::vector<int64_t> sorted_dim_sizes;
+  ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+  error = {mlc::ErrorType::None, "Success"};
+}
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.h b/src/interface/SetupEinsum.h
new file mode 100644
index 0000000..7213539
--- /dev/null
+++ b/src/interface/SetupEinsum.h
@@ -0,0 +1,27 @@
+#ifndef MLC_SETUPEINSUM_H
+#define MLC_SETUPEINSUM_H
+
+#include "../../include/MachineLearningCompiler/Setup.h"
+#include "../main/EinsumTree.h"
+
+namespace mlc
+{
+  class SetupEinsum : public Setup
+  {
+  public:
+    SetupEinsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
+    SetupEinsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
+
+    virtual Error execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output) override;
+    virtual Error execute(const std::vector<Tensor *> &inputs, Tensor &output) override;
+    virtual Error getSetupError() const override;
+
+  private:
+    std::vector<uint64_t> sortedDimSizes;
+    Error error;
+    mini_jit::EinsumTree einsumTree;
+  };
+
+}  // namespace mlc
+
+#endif  // MLC_SETUPEINSUM_H
\ No newline at end of file
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index 7b74525..1e7cd8d 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -34,11 +34,25 @@ void mlc::fill_random(Tensor &tensor)
   }
 }
 
-void mlc::einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
+mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
+{
+  std::vector<Tensor *> tensors(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i)
+  {
+    tensors[i] = &(inputs[i].get());
+  }
+  einsum(tensors, output, tree);
+}
+
+mlc::Error mlc::einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
 {
   mini_jit::EinsumTree einsumTree(tree);
   mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-  (void)(errorParse);
+  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+  {
+    mlc::ErrorType type = ::convertParseError(errorParse);
+    return {type, ""};  // TODO add error message
+  }
 
   std::vector<int64_t> sorted_dim_sizes;
   ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
@@ -47,9 +61,16 @@ void mlc::einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tens
   std::vector<void *> tensors(inputs.size() + 1);
   for (size_t i = 0; i < inputs.size(); i++)
   {
-    tensors[i] = inputs[i].get().data;
+    tensors[i] = inputs[i]->data;
   }
   tensors[inputs.size()] = output.data;
 
-  einsumTree.execute(tensors);
-}
+  mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
+  if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+  {
+    mlc::ErrorType type = ::convertErrorExecute(errorExecute);
+    return {type, ""};  // TODO add error message
+  }
+
+  return {mlc::ErrorType::None, "Success"};
+}
\ No newline at end of file
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index d69a69a..765a337 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -2,15 +2,14 @@
 #include "../main/EinsumTree.h"
 #include <iostream>
 
-constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
-                                           const std::vector<std::reference_wrapper<mlc::Tensor>> &inputs,
+constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
                                            std::vector<int64_t> &sorted_dim_sizes)
 {
   if (root->left != nullptr)
   {
     if (root->left->type == mini_jit::EinsumTree::NodeType::Leaf)
     {
-      const auto &dim_sizes = inputs[root->left->input_tensor_index].get().dim_sizes;
+      const auto &dim_sizes = inputs[root->left->input_tensor_index]->dim_sizes;
       uint i = 0;
       for (int64_t id : root->left->output_dim_ids)
       {
@@ -28,7 +27,7 @@ constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNod
   {
     if (root->right->type == mini_jit::EinsumTree::NodeType::Leaf)
     {
-      const auto &dim_sizes = inputs[root->right->input_tensor_index].get().dim_sizes;
+      const auto &dim_sizes = inputs[root->right->input_tensor_index]->dim_sizes;
       uint i = 0;
       for (int64_t id : root->right->output_dim_ids)
       {
@@ -43,6 +42,55 @@ constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNod
   }
 }
 
+constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
+{
+  switch (error)
+  {
+  case mini_jit::EinsumTree::ErrorParse::None:
+    return mlc::ErrorType::None;
+  case mini_jit::EinsumTree::ErrorParse::ExpectedLeftBracket:
+    return mlc::ErrorType::ParseExpectedLeftBracket;
+  case mini_jit::EinsumTree::ErrorParse::ExpectedRightBracket:
+    return mlc::ErrorType::ParseExpectedRightBracket;
+  case mini_jit::EinsumTree::ErrorParse::ExpectedArrow:
+    return mlc::ErrorType::ParseExpectedArrow;
+  case mini_jit::EinsumTree::ErrorParse::ExpectedComma:
+    return mlc::ErrorType::ParseExpectedComma;
+  case mini_jit::EinsumTree::ErrorParse::ExpectedDimensionList:
+    return mlc::ErrorType::ParseExpectedDimensionList;
+  case mini_jit::EinsumTree::ErrorParse::NotAllowedToParseAgain:
+    return mlc::ErrorType::ParseNotAllowedToParseAgain;
+  case mini_jit::EinsumTree::ErrorParse::UndefinedNode:
+    return mlc::ErrorType::ParseUndefinedNode;
+  default:
+    return mlc::ErrorType::Undefined;
+  }
+}
+
+constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
+{
+  if (static_cast<int64_t>(error) > 100)
+  {
+    return static_cast<mlc::ErrorType>(static_cast<int64_t>(error));
+  }
+
+  switch (error)
+  {
+  case mini_jit::EinsumTree::ErrorExecute::None:
+    return mlc::ErrorType::None;
+  case mini_jit::EinsumTree::ErrorExecute::InvalidRoot:
+    return mlc::ErrorType::EinsumInvalidRoot;
+  case mini_jit::EinsumTree::ErrorExecute::NotEnoughInputTensors:
+    return mlc::ErrorType::EinsumNotEnoughInputTensors;
+  case mini_jit::EinsumTree::ErrorExecute::TooManyInputTensors:
+    return mlc::ErrorType::EinsumTooManyInputTensors;
+  case mini_jit::EinsumTree::ErrorExecute::NullPtrAsInputTensor:
+    return mlc::ErrorType::EinsumNullPtrAsInputTensor;
+  default:
+    return mlc::ErrorType::Undefined;
+  }
+}
+
 // constexpr void fill_random(mlc::Tensor &tensor, uint64_t index, uint64_t offset)
 // {
 //   if (index < (tensor.dim_sizes.size() - 1))

From 5c863f92b77b9ee3e320a8409a5d5e31d6ac1ce6 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 13:20:16 +0000
Subject: [PATCH 06/17] less code duplications

---
 include/MachineLearningCompiler/All.h |   8 +
 src/interface/SetupEinsum.cpp         |  44 ++---
 src/interface/SetupEinsum.h           |  38 +++++
 src/interface/Tensor.cpp              |  35 +---
 src/interface/TensorUtils.h           | 232 +++++++++++++++-----------
 5 files changed, 198 insertions(+), 159 deletions(-)
 create mode 100644 include/MachineLearningCompiler/All.h

diff --git a/include/MachineLearningCompiler/All.h b/include/MachineLearningCompiler/All.h
new file mode 100644
index 0000000..2c8e043
--- /dev/null
+++ b/include/MachineLearningCompiler/All.h
@@ -0,0 +1,8 @@
+#ifndef MLC_ALL_H
+#define MLC_ALL_H
+
+#include "Error.h"
+#include "Setup.h"
+#include "Tensor.h"
+
+#endif  // MLC_ALL_H
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.cpp b/src/interface/SetupEinsum.cpp
index b680ab2..a420a48 100644
--- a/src/interface/SetupEinsum.cpp
+++ b/src/interface/SetupEinsum.cpp
@@ -5,39 +5,25 @@
 mlc::SetupEinsum::SetupEinsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
     : einsumTree(tree)
 {
-  std::vector<Tensor *> tensors(inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
-  {
-    tensors[i] = &(inputs[i].get());
-  }
-
-  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
-  {
-    mlc::ErrorType type = ::convertParseError(errorParse);
-    error = {type, ""};  // TODO add error message
-  }
-
-  std::vector<int64_t> sorted_dim_sizes;
-  ::get_sorted_dimensions_sizes(einsumTree.get_root(), tensors, sorted_dim_sizes);
-  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
-
-  error = {mlc::ErrorType::None, "Success"};
+  setup<std::reference_wrapper<Tensor>>(inputs, output, tree);
 }
 
 mlc::SetupEinsum::SetupEinsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree) : einsumTree(tree)
 {
-  mini_jit::EinsumTree einsumTree(tree);
-  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
-  {
-    mlc::ErrorType type = ::convertParseError(errorParse);
-    error = {type, ""};  // TODO add error message
-  }
+  setup<Tensor *>(inputs, output, tree);
+}
+
+mlc::Error mlc::SetupEinsum::getSetupError() const
+{
+  return Error();
+}
 
-  std::vector<int64_t> sorted_dim_sizes;
-  ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
-  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+mlc::Error mlc::SetupEinsum::execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output)
+{
+  return execute<std::reference_wrapper<Tensor>>(inputs, output);
+}
 
-  error = {mlc::ErrorType::None, "Success"};
+mlc::Error mlc::SetupEinsum::execute(const std::vector<Tensor *> &inputs, Tensor &output)
+{
+  return execute<Tensor *>(inputs, output);
 }
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.h b/src/interface/SetupEinsum.h
index 7213539..ae3f220 100644
--- a/src/interface/SetupEinsum.h
+++ b/src/interface/SetupEinsum.h
@@ -17,11 +17,49 @@ namespace mlc
     virtual Error getSetupError() const override;
 
   private:
+    template <typename T> void setup(const std::vector<T> &inputs, Tensor &output, const std::string &tree);
+    template <typename T> Error execute(const std::vector<T> &inputs, Tensor &output);
+
     std::vector<uint64_t> sortedDimSizes;
     Error error;
     mini_jit::EinsumTree einsumTree;
   };
 
+  template <typename T> inline void SetupEinsum::setup(const std::vector<T> &inputs, Tensor &output, const std::string &tree)
+  {
+    mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+    if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+    {
+      mlc::ErrorType type = ::convertParseError(errorParse);
+      error = {type, ""};  // TODO add error message
+    }
+
+    std::vector<int64_t> sorted_dim_sizes;
+    ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+    einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+    error = {mlc::ErrorType::None, "Success"};
+  }
+
+  template <typename T> inline Error SetupEinsum::execute(const std::vector<T> &inputs, Tensor &output)
+  {
+    std::vector<void *> tensors(inputs.size() + 1);
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+      tensors[i] = getTensor(inputs[i])->data;
+    }
+    tensors[inputs.size()] = output.data;
+
+    mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
+    if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+    {
+      mlc::ErrorType type = ::convertErrorExecute(errorExecute);
+      return {type, ""};  // TODO add error message
+    }
+
+    return {mlc::ErrorType::None, "Success"};
+  }
+
 }  // namespace mlc
 
 #endif  // MLC_SETUPEINSUM_H
\ No newline at end of file
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index 1e7cd8d..fbc094c 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -36,41 +36,10 @@ void mlc::fill_random(Tensor &tensor)
 
 mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
 {
-  std::vector<Tensor *> tensors(inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
-  {
-    tensors[i] = &(inputs[i].get());
-  }
-  einsum(tensors, output, tree);
+  return ::einsum<std::reference_wrapper<Tensor>>(inputs, output, tree);
 }
 
 mlc::Error mlc::einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
 {
-  mini_jit::EinsumTree einsumTree(tree);
-  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
-  {
-    mlc::ErrorType type = ::convertParseError(errorParse);
-    return {type, ""};  // TODO add error message
-  }
-
-  std::vector<int64_t> sorted_dim_sizes;
-  ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
-  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
-
-  std::vector<void *> tensors(inputs.size() + 1);
-  for (size_t i = 0; i < inputs.size(); i++)
-  {
-    tensors[i] = inputs[i]->data;
-  }
-  tensors[inputs.size()] = output.data;
-
-  mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
-  if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
-  {
-    mlc::ErrorType type = ::convertErrorExecute(errorExecute);
-    return {type, ""};  // TODO add error message
-  }
-
-  return {mlc::ErrorType::None, "Success"};
+  return ::einsum<Tensor *>(inputs, output, tree);
 }
\ No newline at end of file
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 765a337..1ffd572 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -1,122 +1,160 @@
 #include "../../include/MachineLearningCompiler/Tensor.h"
 #include "../main/EinsumTree.h"
+#include <functional>
 #include <iostream>
+#include <string>
 
-constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
-                                           std::vector<int64_t> &sorted_dim_sizes)
+namespace
 {
-  if (root->left != nullptr)
+
+  template <typename T> constexpr mlc::Tensor *getTensor(T &)
   {
-    if (root->left->type == mini_jit::EinsumTree::NodeType::Leaf)
-    {
-      const auto &dim_sizes = inputs[root->left->input_tensor_index]->dim_sizes;
-      uint i = 0;
-      for (int64_t id : root->left->output_dim_ids)
-      {
-        sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
-        sorted_dim_sizes[id] = dim_sizes[i++];
-      }
-    }
-    else
-    {
-      get_sorted_dimensions_sizes(root->left, inputs, sorted_dim_sizes);
-    }
+    static_assert("No generic conversion of tensor possible");
+    return nullptr;
+  }
+
+  template <> constexpr mlc::Tensor *getTensor<mlc::Tensor *>(mlc::Tensor *&tensor)
+  {
+    return tensor;
   }
 
-  if (root->right != nullptr)
+  template <> constexpr mlc::Tensor *getTensor<std::reference_wrapper<mlc::Tensor>>(std::reference_wrapper<mlc::Tensor> &tensor)
   {
-    if (root->right->type == mini_jit::EinsumTree::NodeType::Leaf)
+    return &(tensor.get());
+  }
+
+  template <typename T>
+  constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<T> &inputs,
+                                             std::vector<int64_t> &sorted_dim_sizes)
+  {
+    if (root->left != nullptr)
     {
-      const auto &dim_sizes = inputs[root->right->input_tensor_index]->dim_sizes;
-      uint i = 0;
-      for (int64_t id : root->right->output_dim_ids)
+      if (root->left->type == mini_jit::EinsumTree::NodeType::Leaf)
+      {
+        const auto &dim_sizes = getTensor(inputs[root->left->input_tensor_index])->dim_sizes;
+        uint i = 0;
+        for (int64_t id : root->left->output_dim_ids)
+        {
+          sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
+          sorted_dim_sizes[id] = dim_sizes[i++];
+        }
+      }
+      else
       {
-        sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
-        sorted_dim_sizes[id] = dim_sizes[i++];
+        get_sorted_dimensions_sizes(root->left, inputs, sorted_dim_sizes);
       }
     }
-    else
+
+    if (root->right != nullptr)
     {
-      get_sorted_dimensions_sizes(root->right, inputs, sorted_dim_sizes);
+      if (root->right->type == mini_jit::EinsumTree::NodeType::Leaf)
+      {
+        const auto &dim_sizes = getTensor(inputs[root->right->input_tensor_index])->dim_sizes;
+        uint i = 0;
+        for (int64_t id : root->right->output_dim_ids)
+        {
+          sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
+          sorted_dim_sizes[id] = dim_sizes[i++];
+        }
+      }
+      else
+      {
+        get_sorted_dimensions_sizes(root->right, inputs, sorted_dim_sizes);
+      }
     }
   }
-}
 
-constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
-{
-  switch (error)
+  constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
+                                             const std::vector<std::reference_wrapper<mlc::Tensor>> &inputs,
+                                             std::vector<int64_t> &sorted_dim_sizes)
   {
-  case mini_jit::EinsumTree::ErrorParse::None:
-    return mlc::ErrorType::None;
-  case mini_jit::EinsumTree::ErrorParse::ExpectedLeftBracket:
-    return mlc::ErrorType::ParseExpectedLeftBracket;
-  case mini_jit::EinsumTree::ErrorParse::ExpectedRightBracket:
-    return mlc::ErrorType::ParseExpectedRightBracket;
-  case mini_jit::EinsumTree::ErrorParse::ExpectedArrow:
-    return mlc::ErrorType::ParseExpectedArrow;
-  case mini_jit::EinsumTree::ErrorParse::ExpectedComma:
-    return mlc::ErrorType::ParseExpectedComma;
-  case mini_jit::EinsumTree::ErrorParse::ExpectedDimensionList:
-    return mlc::ErrorType::ParseExpectedDimensionList;
-  case mini_jit::EinsumTree::ErrorParse::NotAllowedToParseAgain:
-    return mlc::ErrorType::ParseNotAllowedToParseAgain;
-  case mini_jit::EinsumTree::ErrorParse::UndefinedNode:
-    return mlc::ErrorType::ParseUndefinedNode;
-  default:
-    return mlc::ErrorType::Undefined;
+    get_sorted_dimensions_sizes<std::reference_wrapper<mlc::Tensor>>(root, inputs, sorted_dim_sizes);
   }
-}
 
-constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
-{
-  if (static_cast<int64_t>(error) > 100)
+  constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
+                                             std::vector<int64_t> &sorted_dim_sizes)
+  {
+    get_sorted_dimensions_sizes<mlc::Tensor *>(root, inputs, sorted_dim_sizes);
+  }
+
+  constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
   {
-    return static_cast<mlc::ErrorType>(static_cast<int64_t>(error));
+    switch (error)
+    {
+    case mini_jit::EinsumTree::ErrorParse::None:
+      return mlc::ErrorType::None;
+    case mini_jit::EinsumTree::ErrorParse::ExpectedLeftBracket:
+      return mlc::ErrorType::ParseExpectedLeftBracket;
+    case mini_jit::EinsumTree::ErrorParse::ExpectedRightBracket:
+      return mlc::ErrorType::ParseExpectedRightBracket;
+    case mini_jit::EinsumTree::ErrorParse::ExpectedArrow:
+      return mlc::ErrorType::ParseExpectedArrow;
+    case mini_jit::EinsumTree::ErrorParse::ExpectedComma:
+      return mlc::ErrorType::ParseExpectedComma;
+    case mini_jit::EinsumTree::ErrorParse::ExpectedDimensionList:
+      return mlc::ErrorType::ParseExpectedDimensionList;
+    case mini_jit::EinsumTree::ErrorParse::NotAllowedToParseAgain:
+      return mlc::ErrorType::ParseNotAllowedToParseAgain;
+    case mini_jit::EinsumTree::ErrorParse::UndefinedNode:
+      return mlc::ErrorType::ParseUndefinedNode;
+    default:
+      return mlc::ErrorType::Undefined;
+    }
   }
 
-  switch (error)
+  constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
   {
-  case mini_jit::EinsumTree::ErrorExecute::None:
-    return mlc::ErrorType::None;
-  case mini_jit::EinsumTree::ErrorExecute::InvalidRoot:
-    return mlc::ErrorType::EinsumInvalidRoot;
-  case mini_jit::EinsumTree::ErrorExecute::NotEnoughInputTensors:
-    return mlc::ErrorType::EinsumNotEnoughInputTensors;
-  case mini_jit::EinsumTree::ErrorExecute::TooManyInputTensors:
-    return mlc::ErrorType::EinsumTooManyInputTensors;
-  case mini_jit::EinsumTree::ErrorExecute::NullPtrAsInputTensor:
-    return mlc::ErrorType::EinsumNullPtrAsInputTensor;
-  default:
-    return mlc::ErrorType::Undefined;
+    if (static_cast<int64_t>(error) > 100)
+    {
+      return static_cast<mlc::ErrorType>(static_cast<int64_t>(error));
+    }
+
+    switch (error)
+    {
+    case mini_jit::EinsumTree::ErrorExecute::None:
+      return mlc::ErrorType::None;
+    case mini_jit::EinsumTree::ErrorExecute::InvalidRoot:
+      return mlc::ErrorType::EinsumInvalidRoot;
+    case mini_jit::EinsumTree::ErrorExecute::NotEnoughInputTensors:
+      return mlc::ErrorType::EinsumNotEnoughInputTensors;
+    case mini_jit::EinsumTree::ErrorExecute::TooManyInputTensors:
+      return mlc::ErrorType::EinsumTooManyInputTensors;
+    case mini_jit::EinsumTree::ErrorExecute::NullPtrAsInputTensor:
+      return mlc::ErrorType::EinsumNullPtrAsInputTensor;
+    default:
+      return mlc::ErrorType::Undefined;
+    }
   }
-}
-
-// constexpr void fill_random(mlc::Tensor &tensor, uint64_t index, uint64_t offset)
-// {
-//   if (index < (tensor.dim_sizes.size() - 1))
-//   {
-//     for (uint64_t i = 0; i < tensor.dim_sizes[index]; i++)
-//     {
-//       fill_random(tensor, index + 1, offset + tensor.strides[index] * i);
-//     }
-//   }
-//   else
-//   {
-//     for (uint64_t i = 0; i < tensor.dim_sizes[index]; i++)
-//     {
-//       float denominator = 1;
-//       denominator = static_cast<float>(std::rand());
-//       if (denominator == 0)
-//       {
-//         denominator = 1;
-//       }
-
-//       float numerator = 1;
-//       numerator = static_cast<float>(std::rand());
-
-//       float random = numerator / denominator;
-
-//       tensor.data[offset + tensor.strides[index] * i] = random;
-//     }
-//   }
-// }
\ No newline at end of file
+
+  template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
+  {
+    mini_jit::EinsumTree einsumTree(tree);
+    mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+    if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+    {
+      mlc::ErrorType type = ::convertParseError(errorParse);
+      return {type, ""};  // TODO add error message
+    }
+
+    std::vector<int64_t> sorted_dim_sizes;
+    ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+    einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+    std::vector<void *> tensors(inputs.size() + 1);
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+      tensors[i] = getTensor(inputs[i])->data;
+    }
+    tensors[inputs.size()] = output.data;
+
+    mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
+    if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+    {
+      mlc::ErrorType type = ::convertErrorExecute(errorExecute);
+      return {type, ""};  // TODO add error message
+    }
+
+    return {mlc::ErrorType::None, "Success"};
+  }
+
+}  // namespace
\ No newline at end of file

From fe7bd2b34c2860c52e8c2ded9ded49bf6ba470de Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Fri, 27 Jun 2025 13:41:32 +0000
Subject: [PATCH 07/17] added implementation

---
 src/interface/Setup.cpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/interface/Setup.cpp b/src/interface/Setup.cpp
index 93dbc80..e55adc3 100644
--- a/src/interface/Setup.cpp
+++ b/src/interface/Setup.cpp
@@ -1 +1,14 @@
-#include "Setup.h"
\ No newline at end of file
+#include "../../include/MachineLearningCompiler/Setup.h"
+#include "SetupEinsum.h"
+
+mlc::Setup &mlc::einsum_setup(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
+{
+  mlc::SetupEinsum setup(inputs, output, tree);
+  return setup;
+}
+
+mlc::Setup &mlc::einsum_setup(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
+{
+  mlc::SetupEinsum setup(inputs, output, tree);
+  return setup;
+}

From 42722e0cb696ddd8974a694670af70f3cfe39401 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Tue, 1 Jul 2025 06:24:40 +0000
Subject: [PATCH 08/17] Added dimension check

---
 src/interface/SetupEinsum.cpp     | 24 ++++++++++++-
 src/interface/SetupEinsum.h       | 59 ++++++++++++++++++++++++++++++-
 src/main/EinsumTree.cpp           |  5 +++
 src/main/EinsumTree.h             |  2 ++
 src/test/interface/Setup.test.cpp | 24 +++++++++++++
 5 files changed, 112 insertions(+), 2 deletions(-)
 create mode 100644 src/test/interface/Setup.test.cpp

diff --git a/src/interface/SetupEinsum.cpp b/src/interface/SetupEinsum.cpp
index a420a48..dae4235 100644
--- a/src/interface/SetupEinsum.cpp
+++ b/src/interface/SetupEinsum.cpp
@@ -15,15 +15,37 @@ mlc::SetupEinsum::SetupEinsum(const std::vector<Tensor *> &inputs, Tensor &outpu
 
 mlc::Error mlc::SetupEinsum::getSetupError() const
 {
-  return Error();
+  return error;
 }
 
 mlc::Error mlc::SetupEinsum::execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output)
 {
+  if (error.type != ErrorType::None)
+  {
+    return error;
+  }
+
+  Error checkError = hasSameDimensions(inputs);
+  if (checkError.type != ErrorType::None)
+  {
+    return checkError;
+  }
+
   return execute<std::reference_wrapper<Tensor>>(inputs, output);
 }
 
 mlc::Error mlc::SetupEinsum::execute(const std::vector<Tensor *> &inputs, Tensor &output)
 {
+  if (error.type != ErrorType::None)
+  {
+    return error;
+  }
+
+  Error checkError = hasSameDimensions(inputs);
+  if (checkError.type != ErrorType::None)
+  {
+    return checkError;
+  }
+
   return execute<Tensor *>(inputs, output);
 }
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.h b/src/interface/SetupEinsum.h
index ae3f220..79428fa 100644
--- a/src/interface/SetupEinsum.h
+++ b/src/interface/SetupEinsum.h
@@ -3,6 +3,7 @@
 
 #include "../../include/MachineLearningCompiler/Setup.h"
 #include "../main/EinsumTree.h"
+#include <vector>
 
 namespace mlc
 {
@@ -19,8 +20,8 @@ namespace mlc
   private:
     template <typename T> void setup(const std::vector<T> &inputs, Tensor &output, const std::string &tree);
     template <typename T> Error execute(const std::vector<T> &inputs, Tensor &output);
+    template <typename T> Error hasSameDimensions(const std::vector<T> &inputs);
 
-    std::vector<uint64_t> sortedDimSizes;
     Error error;
     mini_jit::EinsumTree einsumTree;
   };
@@ -60,6 +61,62 @@ namespace mlc
     return {mlc::ErrorType::None, "Success"};
   }
 
+  template <typename T> inline Error SetupEinsum::hasSameDimensions(const std::vector<T> &inputs)
+  {
+    std::vector<mini_jit::EinsumTree::EinsumNode *> nodesToProcess = {einsumTree.get_root()};
+    auto &sortedDimSizes = einsumTree.get_sorted_dim_sizes();
+    uint32_t processedInputs = 0;
+    while (nodesToProcess.size() > 0)
+    {
+      mini_jit::EinsumTree::EinsumNode *node = nodesToProcess.back();
+      nodesToProcess.pop_back();
+
+      if (node->type == mini_jit::EinsumTree::NodeType::Leaf)
+      {
+        if (!(node->input_tensor_index < inputs.size()))
+        {
+          return {ErrorType::EinsumTooManyInputTensors, "The was more input tensors than the original setup used."}
+        }
+
+        Tensor *tensor = getTensor(inputs[node->input_tensor_index]);
+
+        if (tensor->dim_sizes.size() != node->output_dim_ids.size())
+        {
+          return {ErrorType::ExecuteWrongDimension, "The count of dimensions do not match."};
+        }
+
+        for (size_t i = 0; i < node->output_dim_ids.size(); i++)
+        {
+          if (tensor->dim_sizes[i] != static_cast<uint64_t>(sortedDimSizes[node->output_dim_ids[i]]))
+          {
+            return {ErrorType::ExecuteWrongDimension,
+                    "The input tensor dimension has a different size than the size than the tensor it was setup up with."};
+          }
+        }
+
+        processedInputs++;
+        continue;
+      }
+
+      if (node->left != nullptr)
+      {
+        nodesToProcess.push_back(node->left);
+      }
+
+      if (node->right != nullptr)
+      {
+        nodesToProcess.push_back(node->right);
+      }
+    }
+
+    if (processedInputs < inputs.size())
+    {
+      return {mlc::ErrorType::EinsumNotEnoughInputTensors, "There was less input tensors than the original setups used."};
+    }
+
+    return {mlc::ErrorType::None, "Success"};
+  }
+
 }  // namespace mlc
 
 #endif  // MLC_SETUPEINSUM_H
\ No newline at end of file
diff --git a/src/main/EinsumTree.cpp b/src/main/EinsumTree.cpp
index 61c58aa..37ab945 100644
--- a/src/main/EinsumTree.cpp
+++ b/src/main/EinsumTree.cpp
@@ -165,6 +165,11 @@ void mini_jit::EinsumTree::set_sorted_dim_sizes(const std::vector<int64_t> &sort
   EinsumTree::dim_sizes = sorted_dim_sizes;
 }
 
+const std::vector<int64_t> &mini_jit::EinsumTree::get_sorted_dim_sizes()
+{
+  return dim_sizes;
+}
+
 void mini_jit::EinsumTree::delete_tree(EinsumNode *node)
 {
   if (node == nullptr)
diff --git a/src/main/EinsumTree.h b/src/main/EinsumTree.h
index 73d0890..5eaaa20 100644
--- a/src/main/EinsumTree.h
+++ b/src/main/EinsumTree.h
@@ -251,6 +251,8 @@ namespace mini_jit
      */
     void set_sorted_dim_sizes(const std::vector<int64_t> &sorted_dim_sizes);
 
+    const std::vector<int64_t> &get_sorted_dim_sizes();
+
     /**
      * Parses the einsum tree string and builds the tree structure.
      *
diff --git a/src/test/interface/Setup.test.cpp b/src/test/interface/Setup.test.cpp
new file mode 100644
index 0000000..937165d
--- /dev/null
+++ b/src/test/interface/Setup.test.cpp
@@ -0,0 +1,24 @@
+#include "../../../include/MachineLearningCompiler/Setup.h"
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/generators/catch_generators.hpp>
+#include <catch2/generators/catch_generators_range.hpp>
+#include <cmath>
+#include <vector>
+
+TEST_CASE("Test tensor einsum setup", "[setup][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+  std::vector<uint64_t> shape2 = {4, 5};
+  std::vector<uint64_t> shape3 = {3, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  size_t total_size2 = shape2[0] * shape2[1];
+  size_t total_size3 = shape3[0] * shape3[1];
+
+  mlc::Tensor tensor1(shape1);
+  mlc::Tensor tensor2(shape2);
+  mlc::Tensor tensor3(shape3);
+
+  mlc::Setup &setup = mlc::einsum_setup({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
+  setup.execute({tensor1, tensor2}, tensor3);
+}
\ No newline at end of file

From cee70662663dcab7db3922f9266bed8a4302aca0 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Tue, 1 Jul 2025 08:47:50 +0000
Subject: [PATCH 09/17] more functions

---
 .github/workflows/ci.yml                      |  1 -
 .../getting_started/building_project.rst      |  2 +
 include/MachineLearningCompiler/Error.h       |  2 +-
 include/MachineLearningCompiler/Setup.h       | 11 ++-
 include/MachineLearningCompiler/Tensor.h      | 82 +++++++++++++++++--
 src/interface/SetupEinsum.h                   |  3 +-
 src/interface/Tensor.cpp                      | 81 +++++++++++++++---
 src/interface/TensorUtils.h                   | 58 ++++++++++---
 src/test/interface/Tensor.test.cpp            | 78 ++++++++++++++++++
 src/test/interface/TensorUtils.test.cpp       |  2 +-
 10 files changed, 283 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 58e32fd..d9a2bf6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,7 +3,6 @@ name: CI
 
 on:
   push:
-    branches: [ "main" ]
   pull_request:
     branches: [ "main" ]
 
diff --git a/docs_sphinx/getting_started/building_project.rst b/docs_sphinx/getting_started/building_project.rst
index 5aa6ead..41b149c 100644
--- a/docs_sphinx/getting_started/building_project.rst
+++ b/docs_sphinx/getting_started/building_project.rst
@@ -120,6 +120,8 @@ Building
 6. Now we can build the project. The most desired command might be
 
     .. code-block:: bash
+
+        cmake --build . --target tests
         
     Options for ``--target`` are **benchmarks** and **tests**
 
diff --git a/include/MachineLearningCompiler/Error.h b/include/MachineLearningCompiler/Error.h
index a720c4e..e226d2e 100644
--- a/include/MachineLearningCompiler/Error.h
+++ b/include/MachineLearningCompiler/Error.h
@@ -1,7 +1,7 @@
 #ifndef MLC_ERROR_H
 #define MLC_ERROR_H
-#include <string>
 #include <cstdint>
+#include <string>
 
 namespace mlc
 {
diff --git a/include/MachineLearningCompiler/Setup.h b/include/MachineLearningCompiler/Setup.h
index 471e653..290b97d 100644
--- a/include/MachineLearningCompiler/Setup.h
+++ b/include/MachineLearningCompiler/Setup.h
@@ -1,6 +1,5 @@
 #ifndef MLC_SETUP_H
 #define MLC_SETUP_H
-
 #include "Error.h"
 #include "Tensor.h"
 #include <cstdint>
@@ -18,7 +17,7 @@ namespace mlc
      * @param output The output of the einsum calculation.
      * @return Error The error during the
      */
-    virtual Error execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output) = 0;
+    virtual Error execute(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output) = 0;
 
     /**
      * @brief Executes the setup einsum expression with input tensor of the same size.
@@ -27,7 +26,7 @@ namespace mlc
      * @param output The output of the einsum calculation.
      * @return Error The error during the
      */
-    virtual Error execute(const std::vector<Tensor *> &inputs, Tensor &output) = 0;
+    virtual Error execute(const std::vector<const Tensor *> &inputs, Tensor &output) = 0;
 
     /**
      * @brief Gets the error that was produces during the setup of the tree.
@@ -38,13 +37,13 @@ namespace mlc
   };
 
   /**
-   * @brief
+   * @brief Sets up the einsum tree for contraction based on the given tree.
    *
    * @param inputs The input tensors.
    * @param output The output tensor.
    * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
    */
-  Setup &einsum_setup(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
+  Setup &einsum_setup(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree);
 
   /**
    * @brief Executes contractions based on the given tree.
@@ -53,7 +52,7 @@ namespace mlc
    * @param output The output tensor.
    * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
    */
-  Setup &einsum_setup(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
+  Setup &einsum_setup(const std::vector<const Tensor *> &inputs, Tensor &output, const std::string &tree);
 }  // namespace mlc
 
 #endif  // MLC_SETUP_H
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index b284aab..b32552c 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -1,6 +1,5 @@
 #ifndef MLC_TENSOR_H
 #define MLC_TENSOR_H
-
 #include "Error.h"
 #include <cstdint>
 #include <functional>
@@ -15,6 +14,12 @@ namespace mlc
     float *data = nullptr;
     std::vector<uint64_t> dim_sizes;
 
+    // deletes the default constructor
+    Tensor() = delete;
+
+    // deletes the copy constructor
+    Tensor(const Tensor &) = delete;
+
     /**
      * @brief Construct a new Tensor with with a pointer to memory and the dimension sizes sorted in by stride in descending order.
      *
@@ -59,23 +64,90 @@ namespace mlc
    */
   void fill_random(Tensor &tensor);
 
+  /**
+   * @brief Fills the tensor with the given number.
+   *
+   * @param tensor The tensor to fill.
+   * @param number The number used to fill the tensor.
+   */
+  void fill_number(Tensor &tensor, float number);
+
+  /**
+   * @brief Fills the tensor based on the given function.
+   *
+   * @param tensor The tensor to fill.
+   * @param function The function that gets the current tensor and the current index of the tensor as input.
+   *            index = index0 * stride0 + index1 * stride1 + ... + indexN * strideN.
+   */
+  void fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t)> function);
+
   /**
    * @brief
    *
    * @param inputs The input tensors.
    * @param output The output tensor.
-   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
+   * @param tree The (nested) einsum tree to contract in the format [in0],[in1]->[out].
+   * @return Error The error code or ErrorType::None on success.
    */
-  Error einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
+  Error einsum(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree);
 
   /**
    * @brief Executes contractions based on the given tree.
    *
    * @param inputs The input tensors.
    * @param output The output tensor.
-   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
+   * @param tree The (nested) einsum tree to contract in the format [in0],[in1]->[out].
+   * @return Error The error code or ErrorType::None on success.
+   */
+  Error einsum(const std::vector<const Tensor *> &inputs, Tensor &output, const std::string &tree);
+
+  /**
+   * @brief Perform a binary contraction and adds it to the output.
+   *
+   * @param input0 The first input tensor.
+   * @param input1 The second input tensor.
+   * @param output The output to add the result to.
+   * @param contraction The string to show the dimension to be contracted in the format [in0],[in1]->[out].
+   * @return Error The error code or ErrorType::None on success.
+   */
+  Error contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction);
+
+  /**
+   * @brief Perform a general matrix-matrix multiplication and adds it to the output.
+   *
+   * @param input0 The first input tensor.
+   * @param input1 The second input tensor.
+   * @param output The output to add the result to.
+   * @return Error The error code or ErrorType::None on success.
+   */
+  Error gemm(const Tensor &input0, const Tensor &input1, Tensor output);
+
+  /**
+   * @brief Performs a zero unary that sets the output tensor to zero.
+   *
+   * @param input The input tensor.
+   * @param output The output tensor.
+   * @return Error The error code or ErrorType::None on success.
+   */
+  Error unary_zero(const Tensor &input, Tensor &output);
+
+  /**
+   * @brief Performs a relu unary that applies Rectified Linear Unit on the tensor input.
+   *
+   * @param input The input tensor.
+   * @param output The ouput tensor.
+   * @return Error The error code or ErrorType::None on success.
+   */
+  Error unary_relu(const Tensor &input, Tensor &output);
+
+  /**
+   * @brief Performs a identity unary that copies the input tensor to the output tensor
+   *
+   * @param input The input tensor.
+   * @param output The output tensor.
+   * @return Error The error code or ErrorType::None on success.
    */
-  Error einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
+  Error unary_identity(const Tensor &input, Tensor output);
 }  // namespace mlc
 
 #endif  // MLC_TENSOR
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.h b/src/interface/SetupEinsum.h
index 79428fa..58230ba 100644
--- a/src/interface/SetupEinsum.h
+++ b/src/interface/SetupEinsum.h
@@ -1,6 +1,5 @@
 #ifndef MLC_SETUPEINSUM_H
 #define MLC_SETUPEINSUM_H
-
 #include "../../include/MachineLearningCompiler/Setup.h"
 #include "../main/EinsumTree.h"
 #include <vector>
@@ -36,7 +35,7 @@ namespace mlc
     }
 
     std::vector<int64_t> sorted_dim_sizes;
-    ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+    get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
     einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
 
     error = {mlc::ErrorType::None, "Success"};
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index fbc094c..c254087 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -10,12 +10,11 @@ void mlc::fill_random(Tensor &tensor)
     return;
   }
 
-  uint64_t size = 1;
-  for (auto dim : tensor.dim_sizes)
-  {
-    size *= dim;
-  }
+  uint64_t size = getTensorSize(&tensor);
 
+#ifdef MLC_USE_OPENMP
+#pragma omp parallel for simd
+#endif
   for (size_t i = 0; i < size; ++i)
   {
     float denominator = 1;
@@ -34,12 +33,74 @@ void mlc::fill_random(Tensor &tensor)
   }
 }
 
-mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
+void mlc::fill_number(Tensor &tensor, float number)
 {
-  return ::einsum<std::reference_wrapper<Tensor>>(inputs, output, tree);
+  if (tensor.dim_sizes.size() == 0)
+  {
+    return;
+  }
+
+  uint64_t size = getTensorSize(&tensor);
+
+#ifdef MLC_USE_OPENMP
+#pragma omp parallel for simd
+#endif
+  for (size_t i = 0; i < size; i++)
+  {
+    tensor.data[i] = number;
+  }
 }
 
-mlc::Error mlc::einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
+void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t)> function)
 {
-  return ::einsum<Tensor *>(inputs, output, tree);
-}
\ No newline at end of file
+  if (tensor.dim_sizes.size() == 0)
+  {
+    return;
+  }
+
+  uint64_t size = getTensorSize(&tensor);
+
+#ifdef MLC_USE_OPENMP
+#pragma omp parallel for simd
+#endif
+  for (size_t i = 0; i < size; i++)
+  {
+    tensor.data[i] = function(tensor, i);
+  }
+}
+
+mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree)
+{
+  return einsum<std::reference_wrapper<const Tensor>>(inputs, output, tree);
+}
+
+mlc::Error mlc::einsum(const std::vector<const Tensor *> &inputs, Tensor &output, const std::string &tree)
+{
+  return einsum<const Tensor *>(inputs, output, tree);
+}
+
+mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction)
+{
+  return einsum<const Tensor *>({&input0, &input1}, output, contraction);
+}
+
+mlc::Error mlc::unary_zero(const Tensor &input, Tensor &output)
+{
+  (void)input;
+  (void)output;
+  return {ErrorType::None, ""};
+}
+
+mlc::Error mlc::unary_relu(const Tensor &input, Tensor &output)
+{
+  (void)input;
+  (void)output;
+  return {ErrorType::None, ""};
+}
+
+mlc::Error mlc::unary_identity(const Tensor &input, Tensor output)
+{
+  (void)input;
+  (void)output;
+  return {ErrorType::None, ""};
+}
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 1ffd572..24ac9fd 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -1,28 +1,47 @@
+#ifndef MLC_TENSORUTILS_H
+#define MLC_TENSORUTILS_H
 #include "../../include/MachineLearningCompiler/Tensor.h"
 #include "../main/EinsumTree.h"
+#include <cassert>
+#include <cstdint>
 #include <functional>
 #include <iostream>
 #include <string>
 
-namespace
+namespace mlc
 {
-
-  template <typename T> constexpr mlc::Tensor *getTensor(T &)
+  /**
+   * @brief Function definition for converting a generic type to a pointer to a mlc::Tensor.
+   *
+   * @param T The type to convert.
+   * @return mlc::Tensor
+   */
+  template <typename T> constexpr const mlc::Tensor *getTensor(T &)
   {
     static_assert("No generic conversion of tensor possible");
     return nullptr;
   }
 
-  template <> constexpr mlc::Tensor *getTensor<mlc::Tensor *>(mlc::Tensor *&tensor)
+  /**
+   * @brief Gets the pointer to the mlc::Tensor.
+   *
+   * @param tensor The tensor to get the pointer from.
+   * @return Pointer to the mlc::Tensor.
+   */
+  template <> constexpr const mlc::Tensor *getTensor<const mlc::Tensor *>(const mlc::Tensor *&tensor)
   {
     return tensor;
   }
 
-  template <> constexpr mlc::Tensor *getTensor<std::reference_wrapper<mlc::Tensor>>(std::reference_wrapper<mlc::Tensor> &tensor)
+  // TODO: doc
+  template <>
+  constexpr const mlc::Tensor *
+  getTensor<const std::reference_wrapper<const mlc::Tensor>>(const std::reference_wrapper<const mlc::Tensor> &tensor)
   {
     return &(tensor.get());
   }
 
+  // TODO: doc
   template <typename T>
   constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<T> &inputs,
                                              std::vector<int64_t> &sorted_dim_sizes)
@@ -64,6 +83,7 @@ namespace
     }
   }
 
+  // TODO: doc
   constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
                                              const std::vector<std::reference_wrapper<mlc::Tensor>> &inputs,
                                              std::vector<int64_t> &sorted_dim_sizes)
@@ -71,12 +91,14 @@ namespace
     get_sorted_dimensions_sizes<std::reference_wrapper<mlc::Tensor>>(root, inputs, sorted_dim_sizes);
   }
 
+  // TODO: doc
   constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
                                              std::vector<int64_t> &sorted_dim_sizes)
   {
     get_sorted_dimensions_sizes<mlc::Tensor *>(root, inputs, sorted_dim_sizes);
   }
 
+  // TODO: doc
   constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
   {
     switch (error)
@@ -102,6 +124,7 @@ namespace
     }
   }
 
+  // TODO: doc
   constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
   {
     if (static_cast<int64_t>(error) > 100)
@@ -126,35 +149,48 @@ namespace
     }
   }
 
+  // TODO: doc
   template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
   {
     mini_jit::EinsumTree einsumTree(tree);
     mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
     if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
     {
-      mlc::ErrorType type = ::convertParseError(errorParse);
-      return {type, ""};  // TODO add error message
+      mlc::ErrorType type = convertParseError(errorParse);
+      return {type, "Failed during parsing the given einsum tree."};
     }
 
     std::vector<int64_t> sorted_dim_sizes;
-    ::get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+    get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
     einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
 
     std::vector<void *> tensors(inputs.size() + 1);
     for (size_t i = 0; i < inputs.size(); i++)
     {
       tensors[i] = getTensor(inputs[i])->data;
+      assert(tensors[i] != nullptr);
     }
     tensors[inputs.size()] = output.data;
 
     mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
     if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
     {
-      mlc::ErrorType type = ::convertErrorExecute(errorExecute);
-      return {type, ""};  // TODO add error message
+      mlc::ErrorType type = convertErrorExecute(errorExecute);
+      return {type, "Failed during calculation of the einsum tree."};
     }
 
     return {mlc::ErrorType::None, "Success"};
   }
 
-}  // namespace
\ No newline at end of file
+  // TODO: doc
+  constexpr uint64_t getTensorSize(const mlc::Tensor *tensor)
+  {
+    uint64_t size = 1;
+    for (auto dim : tensor->dim_sizes)
+    {
+      size *= dim;
+    }
+    return size;
+  }
+}  // namespace mlc
+#endif  // MLC_TENSORUTILS_H
\ No newline at end of file
diff --git a/src/test/interface/Tensor.test.cpp b/src/test/interface/Tensor.test.cpp
index fec9b34..1475202 100644
--- a/src/test/interface/Tensor.test.cpp
+++ b/src/test/interface/Tensor.test.cpp
@@ -28,6 +28,50 @@ TEST_CASE("Test tensor fill_random", "[tensor][correctness]")
   }
 }
 
+TEST_CASE("Test tensor fill_number", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  float *data1 = new float[total_size1];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = std::nanf("1");
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::fill_number(tensor1, 1);
+
+  for (size_t i = 0; i < total_size1; i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor1.data[i] == 1);
+  }
+}
+
+TEST_CASE("Test tensor fill_lambda", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  float *data1 = new float[total_size1];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = std::nanf("1");
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::fill_lambda(tensor1, [](const mlc::Tensor &, size_t index) { return index; });
+
+  for (size_t i = 0; i < total_size1; i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor1.data[i] == i);
+  }
+}
+
 TEST_CASE("Test tensor einsum", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
@@ -60,4 +104,38 @@ TEST_CASE("Test tensor einsum", "[tensor][correctness]")
   mlc::Tensor tensor3(data2, shape3);
 
   mlc::einsum({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
+}
+
+TEST_CASE("Test tensor contraction", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+  std::vector<uint64_t> shape2 = {4, 5};
+  std::vector<uint64_t> shape3 = {3, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  size_t total_size2 = shape2[0] * shape2[1];
+  size_t total_size3 = shape3[0] * shape3[1];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+  float *data3 = new float[total_size3];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = static_cast<float>(2 * i);
+  }
+  for (size_t i = 0; i < total_size3; ++i)
+  {
+    data3[i] = static_cast<float>(3 * i);
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+  mlc::Tensor tensor3(data2, shape3);
+
+  mlc::contraction(tensor1, tensor2, tensor3, "[0,1],[1,2]->[0,2]");
 }
\ No newline at end of file
diff --git a/src/test/interface/TensorUtils.test.cpp b/src/test/interface/TensorUtils.test.cpp
index 9e75ea2..c809de4 100644
--- a/src/test/interface/TensorUtils.test.cpp
+++ b/src/test/interface/TensorUtils.test.cpp
@@ -40,7 +40,7 @@ TEST_CASE("Test tensor utils get_sorted_dimensions_sizes", "[tensor][correctness
   tree.parse_tree();
 
   std::vector<int64_t> sorted_dimensions_sizes;
-  get_sorted_dimensions_sizes(tree.get_root(), {tensor1, tensor2}, sorted_dimensions_sizes);
+  mlc::get_sorted_dimensions_sizes(tree.get_root(), {tensor1, tensor2}, sorted_dimensions_sizes);
 
   std::vector<int64_t> expected = {3, 4, 5};
 

From f0aa38bbcdb51f9279232ad8f43152581395b99b Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Tue, 1 Jul 2025 09:34:09 +0000
Subject: [PATCH 10/17] fix: cpp shit

---
 include/MachineLearningCompiler/Tensor.h |   2 +-
 src/interface/Tensor.cpp                 |  14 +-
 src/interface/TensorUtils.h              | 303 ++++++++++++-----------
 src/test/interface/Tensor.test.cpp       |  38 ++-
 src/test/interface/TensorUtils.test.cpp  |   2 +-
 5 files changed, 200 insertions(+), 159 deletions(-)

diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index b32552c..e785c00 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -99,7 +99,7 @@ namespace mlc
    * @param tree The (nested) einsum tree to contract in the format [in0],[in1]->[out].
    * @return Error The error code or ErrorType::None on success.
    */
-  Error einsum(const std::vector<const Tensor *> &inputs, Tensor &output, const std::string &tree);
+  Error einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
 
   /**
    * @brief Perform a binary contraction and adds it to the output.
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index c254087..cf6593b 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -10,7 +10,7 @@ void mlc::fill_random(Tensor &tensor)
     return;
   }
 
-  uint64_t size = getTensorSize(&tensor);
+  uint64_t size = internal::getTensorSize(&tensor);
 
 #ifdef MLC_USE_OPENMP
 #pragma omp parallel for simd
@@ -40,7 +40,7 @@ void mlc::fill_number(Tensor &tensor, float number)
     return;
   }
 
-  uint64_t size = getTensorSize(&tensor);
+  uint64_t size = internal::getTensorSize(&tensor);
 
 #ifdef MLC_USE_OPENMP
 #pragma omp parallel for simd
@@ -58,7 +58,7 @@ void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t
     return;
   }
 
-  uint64_t size = getTensorSize(&tensor);
+  uint64_t size = internal::getTensorSize(&tensor);
 
 #ifdef MLC_USE_OPENMP
 #pragma omp parallel for simd
@@ -71,17 +71,17 @@ void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t
 
 mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree)
 {
-  return einsum<std::reference_wrapper<const Tensor>>(inputs, output, tree);
+  return internal::einsum<std::reference_wrapper<const Tensor>>(inputs, output, tree);
 }
 
-mlc::Error mlc::einsum(const std::vector<const Tensor *> &inputs, Tensor &output, const std::string &tree)
+mlc::Error mlc::einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
 {
-  return einsum<const Tensor *>(inputs, output, tree);
+  return internal::einsum<Tensor *>(inputs, output, tree);
 }
 
 mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction)
 {
-  return einsum<const Tensor *>({&input0, &input1}, output, contraction);
+  return internal::einsum<std::reference_wrapper<const Tensor>>({input0, input1}, output, contraction);
 }
 
 mlc::Error mlc::unary_zero(const Tensor &input, Tensor &output)
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 24ac9fd..2cae6e0 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -10,187 +10,192 @@
 
 namespace mlc
 {
-  /**
-   * @brief Function definition for converting a generic type to a pointer to a mlc::Tensor.
-   *
-   * @param T The type to convert.
-   * @return mlc::Tensor
-   */
-  template <typename T> constexpr const mlc::Tensor *getTensor(T &)
+  namespace internal
   {
-    static_assert("No generic conversion of tensor possible");
-    return nullptr;
-  }
-
-  /**
-   * @brief Gets the pointer to the mlc::Tensor.
-   *
-   * @param tensor The tensor to get the pointer from.
-   * @return Pointer to the mlc::Tensor.
-   */
-  template <> constexpr const mlc::Tensor *getTensor<const mlc::Tensor *>(const mlc::Tensor *&tensor)
-  {
-    return tensor;
-  }
+    /**
+     * @brief Function definition for converting a generic type to a pointer to a mlc::Tensor.
+     *
+     * @param T The type to convert.
+     * @return mlc::Tensor
+     */
+    template <typename T> constexpr const mlc::Tensor *getTensor(const T &)
+    {
+      static_assert(false, "No generic conversion of tensor possible.");
+      return nullptr;
+    }
 
-  // TODO: doc
-  template <>
-  constexpr const mlc::Tensor *
-  getTensor<const std::reference_wrapper<const mlc::Tensor>>(const std::reference_wrapper<const mlc::Tensor> &tensor)
-  {
-    return &(tensor.get());
-  }
+    /**
+     * @brief Gets the pointer to the mlc::Tensor.
+     *
+     * @param tensor The tensor to get the pointer from.
+     * @return Pointer to the mlc::Tensor.
+     */
+    template <> constexpr const mlc::Tensor *getTensor<mlc::Tensor *>(mlc::Tensor *const &tensor)
+    {
+      std::cout << tensor << std::endl;
+      return tensor;
+    }
 
-  // TODO: doc
-  template <typename T>
-  constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<T> &inputs,
-                                             std::vector<int64_t> &sorted_dim_sizes)
-  {
-    if (root->left != nullptr)
+    // TODO: doc
+    template <>
+    constexpr const mlc::Tensor *
+    getTensor<std::reference_wrapper<const mlc::Tensor>>(const std::reference_wrapper<const mlc::Tensor> &tensor)
     {
-      if (root->left->type == mini_jit::EinsumTree::NodeType::Leaf)
-      {
-        const auto &dim_sizes = getTensor(inputs[root->left->input_tensor_index])->dim_sizes;
-        uint i = 0;
-        for (int64_t id : root->left->output_dim_ids)
-        {
-          sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
-          sorted_dim_sizes[id] = dim_sizes[i++];
-        }
-      }
-      else
-      {
-        get_sorted_dimensions_sizes(root->left, inputs, sorted_dim_sizes);
-      }
+      return &(tensor.get());
     }
 
-    if (root->right != nullptr)
+    // TODO: doc
+    template <typename T>
+    constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<T> &inputs,
+                                               std::vector<int64_t> &sorted_dim_sizes)
     {
-      if (root->right->type == mini_jit::EinsumTree::NodeType::Leaf)
+      if (root->left != nullptr)
       {
-        const auto &dim_sizes = getTensor(inputs[root->right->input_tensor_index])->dim_sizes;
-        uint i = 0;
-        for (int64_t id : root->right->output_dim_ids)
+        if (root->left->type == mini_jit::EinsumTree::NodeType::Leaf)
         {
-          sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
-          sorted_dim_sizes[id] = dim_sizes[i++];
+          const auto &dim_sizes = getTensor<T>(inputs[root->left->input_tensor_index])->dim_sizes;
+          uint i = 0;
+          for (int64_t id : root->left->output_dim_ids)
+          {
+            sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
+            sorted_dim_sizes[id] = dim_sizes[i++];
+          }
+        }
+        else
+        {
+          get_sorted_dimensions_sizes<T>(root->left, inputs, sorted_dim_sizes);
         }
       }
-      else
+
+      if (root->right != nullptr)
       {
-        get_sorted_dimensions_sizes(root->right, inputs, sorted_dim_sizes);
+        if (root->right->type == mini_jit::EinsumTree::NodeType::Leaf)
+        {
+          const auto &dim_sizes = getTensor<T>(inputs[root->right->input_tensor_index])->dim_sizes;
+          uint i = 0;
+          for (int64_t id : root->right->output_dim_ids)
+          {
+            sorted_dim_sizes.resize(std::max(static_cast<int64_t>(sorted_dim_sizes.size()), id + 1));
+            sorted_dim_sizes[id] = dim_sizes[i++];
+          }
+        }
+        else
+        {
+          get_sorted_dimensions_sizes<T>(root->right, inputs, sorted_dim_sizes);
+        }
       }
     }
-  }
 
-  // TODO: doc
-  constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
-                                             const std::vector<std::reference_wrapper<mlc::Tensor>> &inputs,
-                                             std::vector<int64_t> &sorted_dim_sizes)
-  {
-    get_sorted_dimensions_sizes<std::reference_wrapper<mlc::Tensor>>(root, inputs, sorted_dim_sizes);
-  }
-
-  // TODO: doc
-  constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
-                                             std::vector<int64_t> &sorted_dim_sizes)
-  {
-    get_sorted_dimensions_sizes<mlc::Tensor *>(root, inputs, sorted_dim_sizes);
-  }
-
-  // TODO: doc
-  constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
-  {
-    switch (error)
+    // TODO: doc
+    constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
+                                               const std::vector<std::reference_wrapper<const mlc::Tensor>> &inputs,
+                                               std::vector<int64_t> &sorted_dim_sizes)
     {
-    case mini_jit::EinsumTree::ErrorParse::None:
-      return mlc::ErrorType::None;
-    case mini_jit::EinsumTree::ErrorParse::ExpectedLeftBracket:
-      return mlc::ErrorType::ParseExpectedLeftBracket;
-    case mini_jit::EinsumTree::ErrorParse::ExpectedRightBracket:
-      return mlc::ErrorType::ParseExpectedRightBracket;
-    case mini_jit::EinsumTree::ErrorParse::ExpectedArrow:
-      return mlc::ErrorType::ParseExpectedArrow;
-    case mini_jit::EinsumTree::ErrorParse::ExpectedComma:
-      return mlc::ErrorType::ParseExpectedComma;
-    case mini_jit::EinsumTree::ErrorParse::ExpectedDimensionList:
-      return mlc::ErrorType::ParseExpectedDimensionList;
-    case mini_jit::EinsumTree::ErrorParse::NotAllowedToParseAgain:
-      return mlc::ErrorType::ParseNotAllowedToParseAgain;
-    case mini_jit::EinsumTree::ErrorParse::UndefinedNode:
-      return mlc::ErrorType::ParseUndefinedNode;
-    default:
-      return mlc::ErrorType::Undefined;
+      get_sorted_dimensions_sizes<std::reference_wrapper<const mlc::Tensor>>(root, inputs, sorted_dim_sizes);
     }
-  }
 
-  // TODO: doc
-  constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
-  {
-    if (static_cast<int64_t>(error) > 100)
+    // TODO: doc
+    constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
+                                               std::vector<int64_t> &sorted_dim_sizes)
     {
-      return static_cast<mlc::ErrorType>(static_cast<int64_t>(error));
+      get_sorted_dimensions_sizes<mlc::Tensor *>(root, inputs, sorted_dim_sizes);
     }
 
-    switch (error)
+    // TODO: doc
+    constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
     {
-    case mini_jit::EinsumTree::ErrorExecute::None:
-      return mlc::ErrorType::None;
-    case mini_jit::EinsumTree::ErrorExecute::InvalidRoot:
-      return mlc::ErrorType::EinsumInvalidRoot;
-    case mini_jit::EinsumTree::ErrorExecute::NotEnoughInputTensors:
-      return mlc::ErrorType::EinsumNotEnoughInputTensors;
-    case mini_jit::EinsumTree::ErrorExecute::TooManyInputTensors:
-      return mlc::ErrorType::EinsumTooManyInputTensors;
-    case mini_jit::EinsumTree::ErrorExecute::NullPtrAsInputTensor:
-      return mlc::ErrorType::EinsumNullPtrAsInputTensor;
-    default:
-      return mlc::ErrorType::Undefined;
+      switch (error)
+      {
+      case mini_jit::EinsumTree::ErrorParse::None:
+        return mlc::ErrorType::None;
+      case mini_jit::EinsumTree::ErrorParse::ExpectedLeftBracket:
+        return mlc::ErrorType::ParseExpectedLeftBracket;
+      case mini_jit::EinsumTree::ErrorParse::ExpectedRightBracket:
+        return mlc::ErrorType::ParseExpectedRightBracket;
+      case mini_jit::EinsumTree::ErrorParse::ExpectedArrow:
+        return mlc::ErrorType::ParseExpectedArrow;
+      case mini_jit::EinsumTree::ErrorParse::ExpectedComma:
+        return mlc::ErrorType::ParseExpectedComma;
+      case mini_jit::EinsumTree::ErrorParse::ExpectedDimensionList:
+        return mlc::ErrorType::ParseExpectedDimensionList;
+      case mini_jit::EinsumTree::ErrorParse::NotAllowedToParseAgain:
+        return mlc::ErrorType::ParseNotAllowedToParseAgain;
+      case mini_jit::EinsumTree::ErrorParse::UndefinedNode:
+        return mlc::ErrorType::ParseUndefinedNode;
+      default:
+        return mlc::ErrorType::Undefined;
+      }
     }
-  }
 
-  // TODO: doc
-  template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
-  {
-    mini_jit::EinsumTree einsumTree(tree);
-    mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-    if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+    // TODO: doc
+    constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
     {
-      mlc::ErrorType type = convertParseError(errorParse);
-      return {type, "Failed during parsing the given einsum tree."};
-    }
-
-    std::vector<int64_t> sorted_dim_sizes;
-    get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
-    einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+      if (static_cast<int64_t>(error) > 100)
+      {
+        return static_cast<mlc::ErrorType>(static_cast<int64_t>(error));
+      }
 
-    std::vector<void *> tensors(inputs.size() + 1);
-    for (size_t i = 0; i < inputs.size(); i++)
-    {
-      tensors[i] = getTensor(inputs[i])->data;
-      assert(tensors[i] != nullptr);
+      switch (error)
+      {
+      case mini_jit::EinsumTree::ErrorExecute::None:
+        return mlc::ErrorType::None;
+      case mini_jit::EinsumTree::ErrorExecute::InvalidRoot:
+        return mlc::ErrorType::EinsumInvalidRoot;
+      case mini_jit::EinsumTree::ErrorExecute::NotEnoughInputTensors:
+        return mlc::ErrorType::EinsumNotEnoughInputTensors;
+      case mini_jit::EinsumTree::ErrorExecute::TooManyInputTensors:
+        return mlc::ErrorType::EinsumTooManyInputTensors;
+      case mini_jit::EinsumTree::ErrorExecute::NullPtrAsInputTensor:
+        return mlc::ErrorType::EinsumNullPtrAsInputTensor;
+      default:
+        return mlc::ErrorType::Undefined;
+      }
     }
-    tensors[inputs.size()] = output.data;
 
-    mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
-    if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+    // TODO: doc
+    template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
     {
-      mlc::ErrorType type = convertErrorExecute(errorExecute);
-      return {type, "Failed during calculation of the einsum tree."};
-    }
+      mini_jit::EinsumTree einsumTree(tree);
+      mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+      if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+      {
+        mlc::ErrorType type = convertParseError(errorParse);
+        return {type, "Failed during parsing the given einsum tree."};
+      }
 
-    return {mlc::ErrorType::None, "Success"};
-  }
+      std::vector<int64_t> sorted_dim_sizes;
+      get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+      einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
 
-  // TODO: doc
-  constexpr uint64_t getTensorSize(const mlc::Tensor *tensor)
-  {
-    uint64_t size = 1;
-    for (auto dim : tensor->dim_sizes)
+      std::vector<void *> tensors(inputs.size() + 1);
+      for (size_t i = 0; i < inputs.size(); i++)
+      {
+        tensors[i] = getTensor<T>(inputs[i])->data;
+        assert(tensors[i] != nullptr);
+      }
+      tensors[inputs.size()] = output.data;
+
+      mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
+      if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+      {
+        mlc::ErrorType type = convertErrorExecute(errorExecute);
+        return {type, "Failed during calculation of the einsum tree."};
+      }
+
+      return {mlc::ErrorType::None, "Success"};
+    }
+
+    // TODO: doc
+    constexpr uint64_t getTensorSize(const mlc::Tensor *tensor)
     {
-      size *= dim;
+      uint64_t size = 1;
+      for (auto dim : tensor->dim_sizes)
+      {
+        size *= dim;
+      }
+      return size;
     }
-    return size;
-  }
+
+  }  // namespace internal
 }  // namespace mlc
 #endif  // MLC_TENSORUTILS_H
\ No newline at end of file
diff --git a/src/test/interface/Tensor.test.cpp b/src/test/interface/Tensor.test.cpp
index 1475202..4f0e551 100644
--- a/src/test/interface/Tensor.test.cpp
+++ b/src/test/interface/Tensor.test.cpp
@@ -72,7 +72,7 @@ TEST_CASE("Test tensor fill_lambda", "[tensor][correctness]")
   }
 }
 
-TEST_CASE("Test tensor einsum", "[tensor][correctness]")
+TEST_CASE("Test tensor einsum reference", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
   std::vector<uint64_t> shape2 = {4, 5};
@@ -106,6 +106,42 @@ TEST_CASE("Test tensor einsum", "[tensor][correctness]")
   mlc::einsum({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
 }
 
+TEST_CASE("Test tensor einsum pointer", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+  std::vector<uint64_t> shape2 = {4, 5};
+  std::vector<uint64_t> shape3 = {3, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  size_t total_size2 = shape2[0] * shape2[1];
+  size_t total_size3 = shape3[0] * shape3[1];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+  float *data3 = new float[total_size3];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = static_cast<float>(2 * i);
+  }
+  for (size_t i = 0; i < total_size3; ++i)
+  {
+    data3[i] = static_cast<float>(3 * i);
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+  mlc::Tensor tensor3(data2, shape3);
+  std::vector<mlc::Tensor *> inputs{&tensor1, &tensor2};
+
+  CAPTURE(inputs);
+  mlc::einsum(inputs, tensor3, "[0,1],[1,2]->[0,2]");
+}
+
 TEST_CASE("Test tensor contraction", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
diff --git a/src/test/interface/TensorUtils.test.cpp b/src/test/interface/TensorUtils.test.cpp
index c809de4..99c93f9 100644
--- a/src/test/interface/TensorUtils.test.cpp
+++ b/src/test/interface/TensorUtils.test.cpp
@@ -40,7 +40,7 @@ TEST_CASE("Test tensor utils get_sorted_dimensions_sizes", "[tensor][correctness
   tree.parse_tree();
 
   std::vector<int64_t> sorted_dimensions_sizes;
-  mlc::get_sorted_dimensions_sizes(tree.get_root(), {tensor1, tensor2}, sorted_dimensions_sizes);
+  mlc::internal::get_sorted_dimensions_sizes(tree.get_root(), {tensor1, tensor2}, sorted_dimensions_sizes);
 
   std::vector<int64_t> expected = {3, 4, 5};
 

From 3d76792663c21e343d9b16dcab9c900a4112da1a Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Tue, 1 Jul 2025 12:26:37 +0000
Subject: [PATCH 11/17] Finished lib

Co-authored-by: Fabian Hofer <Integer-Ctrl@users.noreply.github.com>
---
 include/MachineLearningCompiler/Error.h     |   6 +-
 include/MachineLearningCompiler/Tensor.h    |  25 +-
 include/MachineLearningCompiler/UnaryType.h |  16 ++
 src/interface/Tensor.cpp                    | 235 ++++++++++++++++--
 src/interface/TensorUtils.h                 |  57 ++++-
 src/main/EinsumTree.h                       |  16 +-
 src/main/TensorOperation.cpp                |   3 +-
 src/main/TensorOptimization.cpp             |  21 +-
 src/main/TensorOptimization.h               |   2 +-
 src/test/interface/Setup.test.cpp           |   2 +-
 src/test/interface/Tensor.test.cpp          | 249 +++++++++++++++++++-
 src/test/interface/TensorUtils.test.cpp     |   2 +-
 12 files changed, 587 insertions(+), 47 deletions(-)
 create mode 100644 include/MachineLearningCompiler/UnaryType.h

diff --git a/include/MachineLearningCompiler/Error.h b/include/MachineLearningCompiler/Error.h
index e226d2e..a70fea7 100644
--- a/include/MachineLearningCompiler/Error.h
+++ b/include/MachineLearningCompiler/Error.h
@@ -5,7 +5,7 @@
 
 namespace mlc
 {
-  enum ErrorType : int64_t
+  enum class ErrorType : int64_t
   {
     Undefined = -1,
     None = 0,
@@ -42,6 +42,10 @@ namespace mlc
     ExecuteInvalidStrides = 113,
     ExecuteKDimensionMustNotBeShared = 114,
     ExecuteSharedRequiredForParallelExecution = 115,
+
+    // Tensor Errors
+    TensorExpected2DTensor = 201,
+    ExpectedSingleContraction = 202,
   };
 
   struct Error
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index e785c00..8967042 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -1,6 +1,7 @@
 #ifndef MLC_TENSOR_H
 #define MLC_TENSOR_H
 #include "Error.h"
+#include "UnaryType.h"
 #include <cstdint>
 #include <functional>
 #include <string>
@@ -113,23 +114,37 @@ namespace mlc
   Error contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction);
 
   /**
-   * @brief Perform a general matrix-matrix multiplication and adds it to the output.
+   * @brief Performs a contraction on two input tensor and one output tensor. Before and after the contraction, a first touch unary and a
+   * last touch unary are applied to the output tensor.
    *
    * @param input0 The first input tensor.
    * @param input1 The second input tensor.
    * @param output The output to add the result to.
+   * @param contraction The string to show the dimension to be contracted in the format [in0],[in1]->[out].
+   * @param firstTouch The unary that should be execute before the contraction.
+   * @param lastTouch The unary that should be executed after the contraction.
    * @return Error The error code or ErrorType::None on success.
    */
-  Error gemm(const Tensor &input0, const Tensor &input1, Tensor output);
+  Error contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction, const UnaryType firstTouch,
+                    const UnaryType lastTouch);
+
+  /**
+   * @brief Perform a general matrix-matrix multiplication and adds it to the output.
+   *
+   * @param input0 The first input tensor in the form MxK where M is the leading dimension.
+   * @param input1 The second input tensor in the form KxN where K is the leading dimension.
+   * @param output The output to add the result to in the form MxN where M is the leading dimension.
+   * @return Error The error code or ErrorType::None on success.
+   */
+  Error gemm(const Tensor &input0, const Tensor &input1, Tensor &output);
 
   /**
    * @brief Performs a zero unary that sets the output tensor to zero.
    *
    * @param input The input tensor.
-   * @param output The output tensor.
    * @return Error The error code or ErrorType::None on success.
    */
-  Error unary_zero(const Tensor &input, Tensor &output);
+  Error unary_zero(Tensor &input);
 
   /**
    * @brief Performs a relu unary that applies Rectified Linear Unit on the tensor input.
@@ -147,7 +162,7 @@ namespace mlc
    * @param output The output tensor.
    * @return Error The error code or ErrorType::None on success.
    */
-  Error unary_identity(const Tensor &input, Tensor output);
+  Error unary_identity(const Tensor &input, Tensor &output);
 }  // namespace mlc
 
 #endif  // MLC_TENSOR
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/UnaryType.h b/include/MachineLearningCompiler/UnaryType.h
new file mode 100644
index 0000000..9aefbaa
--- /dev/null
+++ b/include/MachineLearningCompiler/UnaryType.h
@@ -0,0 +1,16 @@
+#ifndef MLC_UNARY_H
+#define MLC_UNARY_H
+#include <cstdint>
+
+namespace mlc
+{
+  enum class UnaryType : int64_t
+  {
+    None = 0,
+    Zero = 1,
+    ReLu = 2,
+    Identity = 3,
+  };
+}  // namespace mlc
+
+#endif  // MLC_UNARY_H
\ No newline at end of file
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index cf6593b..2f921d5 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -1,5 +1,6 @@
 #include "../../include/MachineLearningCompiler/Tensor.h"
 #include "../main/EinsumTree.h"
+#include "../main/TensorOperation.h"
 #include "TensorUtils.h"
 #include <iostream>
 
@@ -13,7 +14,7 @@ void mlc::fill_random(Tensor &tensor)
   uint64_t size = internal::getTensorSize(&tensor);
 
 #ifdef MLC_USE_OPENMP
-#pragma omp parallel for simd
+#pragma omp parallel for
 #endif
   for (size_t i = 0; i < size; ++i)
   {
@@ -43,7 +44,7 @@ void mlc::fill_number(Tensor &tensor, float number)
   uint64_t size = internal::getTensorSize(&tensor);
 
 #ifdef MLC_USE_OPENMP
-#pragma omp parallel for simd
+#pragma omp parallel for
 #endif
   for (size_t i = 0; i < size; i++)
   {
@@ -61,7 +62,7 @@ void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t
   uint64_t size = internal::getTensorSize(&tensor);
 
 #ifdef MLC_USE_OPENMP
-#pragma omp parallel for simd
+#pragma omp parallel for
 #endif
   for (size_t i = 0; i < size; i++)
   {
@@ -84,23 +85,229 @@ mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &
   return internal::einsum<std::reference_wrapper<const Tensor>>({input0, input1}, output, contraction);
 }
 
-mlc::Error mlc::unary_zero(const Tensor &input, Tensor &output)
+mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction,
+                            const UnaryType firstTouch, const UnaryType lastTouch)
 {
-  (void)input;
-  (void)output;
-  return {ErrorType::None, ""};
+  mini_jit::EinsumTree einsumTree(contraction);
+  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+  {
+    mlc::ErrorType type = internal::convertParseError(errorParse);
+    return {type, "Failed during parsing the given einsum tree."};
+  }
+  if (einsumTree.get_root()->left->type != mini_jit::EinsumTree::NodeType::Leaf ||
+      einsumTree.get_root()->right->type != mini_jit::EinsumTree::NodeType::Leaf)
+  {
+    return {mlc::ErrorType::ExpectedSingleContraction, "Expected the given einsum string to be a single string."};
+  }
+
+  std::vector<int64_t> sorted_dim_sizes;
+  internal::get_sorted_dimensions_sizes(einsumTree.get_root(), {input0, input1}, sorted_dim_sizes);
+  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config = einsumTree.lower_node(einsumTree.get_root());
+  config.first_touch = internal::convertPrimitiveType(firstTouch);
+  config.last_touch = internal::convertPrimitiveType(lastTouch);
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input0.data, input1.data, output.data);
+  return {ErrorType::None, "Success"};
+}
+
+mlc::Error mlc::gemm(const Tensor &input0, const Tensor &input1, Tensor &output)
+{
+  if (input0.dim_sizes.size() != 2 || input1.dim_sizes.size() != 2 || output.dim_sizes.size() != 2)
+  {
+    return {ErrorType::TensorExpected2DTensor, "GEMM requires input0 and input1 to be 2D tensors and output to be a 2D tensor."};
+  }
+
+  int64_t mSize = static_cast<int64_t>(input0.dim_sizes[1]);
+  int64_t nSize = static_cast<int64_t>(input1.dim_sizes[0]);
+  int64_t kSize = static_cast<int64_t>(input0.dim_sizes[0]);
+
+  if (static_cast<int64_t>(output.dim_sizes[1]) != mSize)
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same m dimension size as the input0."};
+  }
+
+  if (static_cast<int64_t>(output.dim_sizes[0]) != nSize)
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same n dimension size as the input1."};
+  }
+
+  if (static_cast<int64_t>(input1.dim_sizes[1]) != kSize)
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the input1 tensor to have the same k dimension size as the input0."};
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                                                                // first_touch
+    mini_jit::TensorConfig::prim_t::gemm,                                                                                // main
+    mini_jit::TensorConfig::prim_t::none,                                                                                // last touch
+    {mini_jit::TensorConfig::dim_t::m, mini_jit::TensorConfig::dim_t::n, mini_jit::TensorConfig::dim_t::k},              // dim_types
+    {mini_jit::TensorConfig::exec_t::prim, mini_jit::TensorConfig::exec_t::prim, mini_jit::TensorConfig::exec_t::prim},  // exec_types
+    {mSize, nSize, kSize},                                                                                               // dim_sizes
+    {1, 0, mSize},                                                                                                       // strides_in0
+    {0, kSize, 1},                                                                                                       // strides_in1
+    {1, mSize, 0},                                                                                                       // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                                                               // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input0.data, input1.data, output.data);
+  return {ErrorType::None, "Success"};
+}
+
+mlc::Error mlc::unary_zero(Tensor &input)
+{
+  int64_t stride = 1;
+  std::vector<int64_t> dimSizes(input.dim_sizes.size());
+  std::vector<int64_t> strides(input.dim_sizes.size());
+
+  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
+  {
+    strides[i] = stride;
+    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
+    stride *= input.dim_sizes[i];
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
+    mini_jit::TensorConfig::prim_t::zero,                                      // main
+    mini_jit::TensorConfig::prim_t::none,                                      // last touch
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
+    dimSizes,                                                                  // dim_sizes
+    strides,                                                                   // strides_in0
+    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
+    strides,                                                                   // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input.data, nullptr, input.data);
+  return {ErrorType::None, "Success"};
 }
 
 mlc::Error mlc::unary_relu(const Tensor &input, Tensor &output)
 {
-  (void)input;
-  (void)output;
-  return {ErrorType::None, ""};
+  if (output.dim_sizes.size() != input.dim_sizes.size())
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+  }
+
+  for (size_t i = 0; i < input.dim_sizes.size(); i++)
+  {
+    if (output.dim_sizes[i] != input.dim_sizes[i])
+    {
+      return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+    }
+  }
+
+  int64_t stride = 1;
+  std::vector<int64_t> dimSizes(input.dim_sizes.size());
+  std::vector<int64_t> strides(input.dim_sizes.size());
+
+  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
+  {
+    strides[i] = stride;
+    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
+    stride *= input.dim_sizes[i];
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
+    mini_jit::TensorConfig::prim_t::relu,                                      // main
+    mini_jit::TensorConfig::prim_t::none,                                      // last touch
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
+    dimSizes,                                                                  // dim_sizes
+    strides,                                                                   // strides_in0
+    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
+    strides,                                                                   // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input.data, nullptr, output.data);
+  return {ErrorType::None, "Success"};
 }
 
-mlc::Error mlc::unary_identity(const Tensor &input, Tensor output)
+mlc::Error mlc::unary_identity(const Tensor &input, Tensor &output)
 {
-  (void)input;
-  (void)output;
-  return {ErrorType::None, ""};
+  if (output.dim_sizes.size() != input.dim_sizes.size())
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+  }
+
+  for (size_t i = 0; i < input.dim_sizes.size(); i++)
+  {
+    if (output.dim_sizes[i] != input.dim_sizes[i])
+    {
+      return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+    }
+  }
+
+  int64_t stride = 1;
+  std::vector<int64_t> dimSizes(input.dim_sizes.size());
+  std::vector<int64_t> strides(input.dim_sizes.size());
+
+  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
+  {
+    strides[i] = stride;
+    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
+    stride *= input.dim_sizes[i];
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
+    mini_jit::TensorConfig::prim_t::copy,                                      // main
+    mini_jit::TensorConfig::prim_t::none,                                      // last touch
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
+    dimSizes,                                                                  // dim_sizes
+    strides,                                                                   // strides_in0
+    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
+    strides,                                                                   // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input.data, nullptr, output.data);
+  return {ErrorType::None, "Success"};
 }
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 2cae6e0..a45b881 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -32,7 +32,6 @@ namespace mlc
      */
     template <> constexpr const mlc::Tensor *getTensor<mlc::Tensor *>(mlc::Tensor *const &tensor)
     {
-      std::cout << tensor << std::endl;
       return tensor;
     }
 
@@ -152,6 +151,47 @@ namespace mlc
       }
     }
 
+    constexpr mlc::ErrorType convertTensorOperationError(mini_jit::TensorOperation::error_t error)
+    {
+      switch (error)
+      {
+      case mini_jit::TensorOperation::error_t::success:
+        return mlc::ErrorType::None;
+      case mini_jit::TensorOperation::error_t::err_wrong_dtype:
+        return mlc::ErrorType::ExecuteWrongDType;
+      case mini_jit::TensorOperation::error_t::err_wrong_dimension:
+        return mlc::ErrorType::ExecuteWrongDimension;
+      case mini_jit::TensorOperation::error_t::err_wrong_primitive:
+        return mlc::ErrorType::ExecuteWrongPrimitive;
+      case mini_jit::TensorOperation::error_t::err_wrong_first_touch_primitive:
+        return mlc::ErrorType::ExecuteFirstTouchPrimitive;
+      case mini_jit::TensorOperation::error_t::err_wrong_main_primitive:
+        return mlc::ErrorType::ExecuteWrongMainPrimitive;
+      case mini_jit::TensorOperation::error_t::err_wrong_last_touch_primitive:
+        return mlc::ErrorType::ExecuteWrongLastTouchPrimitive;
+      case mini_jit::TensorOperation::error_t::err_execution_type_not_supported:
+        return mlc::ErrorType::ExecuteTypeNotSupported;
+      case mini_jit::TensorOperation::error_t::err_invalid_primitive_configuration:
+        return mlc::ErrorType::ExecuteInvalidPrimitiveConfiguration;
+      case mini_jit::TensorOperation::error_t::err_invalid_first_touch_configuration:
+        return mlc::ErrorType::ExecuteInvalidFirstTouchConfiguration;
+      case mini_jit::TensorOperation::error_t::err_invalid_main_configuration:
+        return mlc::ErrorType::ExecuteInvalidMainConfiguration;
+      case mini_jit::TensorOperation::error_t::err_invalid_last_touch_configuration:
+        return mlc::ErrorType::ExecuteInvalidLastTouchConfiguration;
+      case mini_jit::TensorOperation::error_t::err_invalid_execution_order:
+        return mlc::ErrorType::ExecuteInvalidExecutionOrder;
+      case mini_jit::TensorOperation::error_t::err_invalid_strides:
+        return mlc::ErrorType::ExecuteInvalidStrides;
+      case mini_jit::TensorOperation::error_t::err_k_dimension_must_not_be_shared:
+        return mlc::ErrorType::ExecuteKDimensionMustNotBeShared;
+      case mini_jit::TensorOperation::error_t::err_shared_required_for_parallel_execution:
+        return mlc::ErrorType::ExecuteSharedRequiredForParallelExecution;
+      default:
+        return mlc::ErrorType::Undefined;
+      }
+    }
+
     // TODO: doc
     template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
     {
@@ -196,6 +236,21 @@ namespace mlc
       return size;
     }
 
+    // TODO: doc
+    constexpr mini_jit::TensorConfig::prim_t convertPrimitiveType(mlc::UnaryType type)
+    {
+      switch (type)
+      {
+      case mlc::UnaryType::None:
+        return mini_jit::TensorConfig::prim_t::none;
+      case mlc::UnaryType::Identity:
+        return mini_jit::TensorConfig::prim_t::copy;
+      case mlc::UnaryType::Zero:
+        return mini_jit::TensorConfig::prim_t::zero;
+      case mlc::UnaryType::ReLu:
+        return mini_jit::TensorConfig::prim_t::relu;
+      }
+    }
   }  // namespace internal
 }  // namespace mlc
 #endif  // MLC_TENSORUTILS_H
\ No newline at end of file
diff --git a/src/main/EinsumTree.h b/src/main/EinsumTree.h
index 5eaaa20..e556d8d 100644
--- a/src/main/EinsumTree.h
+++ b/src/main/EinsumTree.h
@@ -119,14 +119,6 @@ namespace mini_jit
     EinsumNode *parse_node(size_t &pos, const std::string &str);
 
     // Lowering
-    /**
-     * Lowers the given EinsumNode to a TensorConfig.
-     *
-     * @param node The EinsumNode to lower.
-     * @return A TensorConfig representing the lowered node.
-     */
-    TensorConfig lower_node(const EinsumNode *node);
-
     /**
      * Retrieves the dimension types and sizes for the given EinsumNode.
      *
@@ -311,6 +303,14 @@ namespace mini_jit
      * @return ErrorExecute indicating the result of the execution operation.
      */
     ErrorExecute execute(const std::vector<void *> &tensors);
+
+    /**
+     * Lowers the given EinsumNode to a TensorConfig.
+     *
+     * @param node The EinsumNode to lower.
+     * @return A TensorConfig representing the lowered node.
+     */
+    TensorConfig lower_node(const EinsumNode *node);
   };
 };  // namespace mini_jit
 
diff --git a/src/main/TensorOperation.cpp b/src/main/TensorOperation.cpp
index 5e34e40..88cb2ca 100644
--- a/src/main/TensorOperation.cpp
+++ b/src/main/TensorOperation.cpp
@@ -107,13 +107,14 @@ int32_t mini_jit::TensorOperation::findMatch(const std::span<const TensorConfig:
                                              TensorConfig::exec_t searchExec, uint32_t startIndex)
 {
   release_assert(dim.size() == exec.size(), "Expected the dimension types size to match the execution types size.");
-  release_assert(startIndex <= dim.size(), "Expected the start index to be less than the dimension types size.");
 
   if (startIndex >= dim.size())
   {
     return -1;
   }
 
+  release_assert(startIndex < dim.size(), "Expected the start index to be less than the dimension types size.");
+
   for (auto [iDim, iExec] = std::tuple{dim.begin() + startIndex, exec.begin() + startIndex}; iDim != dim.end(); ++iDim, ++iExec)
   {
     if (*iDim == searchDim && *iExec == searchExec)
diff --git a/src/main/TensorOptimization.cpp b/src/main/TensorOptimization.cpp
index a89d89e..e7f1588 100644
--- a/src/main/TensorOptimization.cpp
+++ b/src/main/TensorOptimization.cpp
@@ -93,7 +93,11 @@ void mini_jit::TensorOptimization::_primitive_identification(TensorConfig &confi
 
       if (fixed_k2 == false && (primitive_k2 == -1 || primitive_stride < primitive_k2_stride))
       {
-        primitive_k2 = std::distance(config.dim_types.begin(), iDim);
+        int32_t index = std::distance(config.dim_types.begin(), iDim);
+        if (index != primitive_k1)
+        {
+          primitive_k2 = index;
+        }
       }
     }
     else if (*iDim == TensorConfig::dim_t::m)
@@ -260,7 +264,7 @@ void mini_jit::TensorOptimization::_dimension_reordering_shared(TensorConfig &co
   }
   if (primitive_m != -1)
   {
-    int32_t new_index = config.dim_types.size() - 2 - (primitive_k1 != -1);
+    int32_t new_index = config.dim_types.size() - 1 - (primitive_n != -1) - (primitive_k1 != -1);
     _swap_elements(config, primitive_m, new_index);
     _reorder_helper_adjust_index(new_index, primitive_m, primitive_m, primitive_n, primitive_k1, primitive_k2);
     primitive_m = new_index;
@@ -410,7 +414,7 @@ void mini_jit::TensorOptimization::_dimension_reordering_fusing(TensorConfig &co
   }
 }
 
-void mini_jit::TensorOptimization::_swap_elements(TensorConfig &config, size_t index1, size_t index2)
+void mini_jit::TensorOptimization::_swap_elements(TensorConfig &config, int64_t index1, int64_t index2)
 {
   if (index1 == index2)
   {
@@ -424,8 +428,10 @@ void mini_jit::TensorOptimization::_swap_elements(TensorConfig &config, size_t i
   release_assert(config.dim_types.size() == config.strides_in0.size(), "Expected the dimension types size to match the strides_in0 size.");
   release_assert(config.dim_types.size() == config.strides_in1.size(), "Expected the dimension types size to match the strides_in1 size.");
   release_assert(config.dim_types.size() == config.strides_out.size(), "Expected the dimension types size to match the strides_out size.");
-  release_assert(index1 < config.dim_types.size(), "Expected the index1 to be less than the dimension types size.");
-  release_assert(index2 < config.dim_types.size(), "Expected the index2 to be less than the dimension types size.");
+  release_assert(index1 < static_cast<int64_t>(config.dim_types.size()), "Expected the index1 to be less than the dimension types size.");
+  release_assert(index2 < static_cast<int64_t>(config.dim_types.size()), "Expected the index2 to be less than the dimension types size.");
+  release_assert(index1 >= 0, "Expected the index1 to be larger equal than 0.");
+  release_assert(index2 >= 0, "Expected the index2 to be larger equal than 0.");
 
   std::iter_swap(config.dim_types.begin() + index1, config.dim_types.begin() + index2);
   std::iter_swap(config.dim_sizes.begin() + index1, config.dim_sizes.begin() + index2);
@@ -498,6 +504,11 @@ void mini_jit::TensorOptimization::_dimension_fusing(TensorConfig &config)
 {
   for (size_t i = 0; i + 1 < config.dim_sizes.size(); ++i)
   {
+    if (config.dim_sizes.size() <= 2)
+    {
+      return;
+    }
+
     // Check if adjacent dims have the same type and their product is less equal than 256
     // stride(X) = |Y| * stride(Y)
     if (config.dim_types[i] == config.dim_types[i + 1] && config.strides_in0[i] == (config.dim_sizes[i + 1] * config.strides_in0[i + 1]) &&
diff --git a/src/main/TensorOptimization.h b/src/main/TensorOptimization.h
index d018932..66ee34f 100644
--- a/src/main/TensorOptimization.h
+++ b/src/main/TensorOptimization.h
@@ -74,7 +74,7 @@ namespace mini_jit
      * @param index1 The index of element 1 to be set a position of index2.
      * @param index2 The index of element 2 ot be set a position of index1.
      */
-    void _swap_elements(TensorConfig &config, size_t index1, size_t index2);
+    void _swap_elements(TensorConfig &config, int64_t index1, int64_t index2);
 
     /**
      * @brief Moves an element from the old index to the new index position.
diff --git a/src/test/interface/Setup.test.cpp b/src/test/interface/Setup.test.cpp
index 937165d..fd6f051 100644
--- a/src/test/interface/Setup.test.cpp
+++ b/src/test/interface/Setup.test.cpp
@@ -5,7 +5,7 @@
 #include <cmath>
 #include <vector>
 
-TEST_CASE("Test tensor einsum setup", "[setup][correctness]")
+TEST_CASE("Test interface tensor einsum setup", "[setup][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
   std::vector<uint64_t> shape2 = {4, 5};
diff --git a/src/test/interface/Tensor.test.cpp b/src/test/interface/Tensor.test.cpp
index 4f0e551..f91700b 100644
--- a/src/test/interface/Tensor.test.cpp
+++ b/src/test/interface/Tensor.test.cpp
@@ -6,7 +6,7 @@
 #include <cmath>
 #include <vector>
 
-TEST_CASE("Test tensor fill_random", "[tensor][correctness]")
+TEST_CASE("Test interface tensor fill_random", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
 
@@ -28,7 +28,7 @@ TEST_CASE("Test tensor fill_random", "[tensor][correctness]")
   }
 }
 
-TEST_CASE("Test tensor fill_number", "[tensor][correctness]")
+TEST_CASE("Test interface tensor fill_number", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
 
@@ -50,7 +50,7 @@ TEST_CASE("Test tensor fill_number", "[tensor][correctness]")
   }
 }
 
-TEST_CASE("Test tensor fill_lambda", "[tensor][correctness]")
+TEST_CASE("Test interface tensor fill_lambda", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
 
@@ -72,7 +72,7 @@ TEST_CASE("Test tensor fill_lambda", "[tensor][correctness]")
   }
 }
 
-TEST_CASE("Test tensor einsum reference", "[tensor][correctness]")
+TEST_CASE("Test interface tensor einsum reference", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
   std::vector<uint64_t> shape2 = {4, 5};
@@ -103,10 +103,15 @@ TEST_CASE("Test tensor einsum reference", "[tensor][correctness]")
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
-  mlc::einsum({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
+  mlc::Error err = mlc::einsum({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  delete[] data1;
+  delete[] data2;
+  delete[] data3;
 }
 
-TEST_CASE("Test tensor einsum pointer", "[tensor][correctness]")
+TEST_CASE("Test interface tensor einsum pointer", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
   std::vector<uint64_t> shape2 = {4, 5};
@@ -139,10 +144,231 @@ TEST_CASE("Test tensor einsum pointer", "[tensor][correctness]")
   std::vector<mlc::Tensor *> inputs{&tensor1, &tensor2};
 
   CAPTURE(inputs);
-  mlc::einsum(inputs, tensor3, "[0,1],[1,2]->[0,2]");
+  mlc::Error err = mlc::einsum(inputs, tensor3, "[0,1],[1,2]->[0,2]");
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  delete[] data1;
+  delete[] data2;
+  delete[] data3;
+}
+
+TEST_CASE("Test interface tensor contraction", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+  std::vector<uint64_t> shape2 = {4, 5};
+  std::vector<uint64_t> shape3 = {3, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  size_t total_size2 = shape2[0] * shape2[1];
+  size_t total_size3 = shape3[0] * shape3[1];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+  float *data3 = new float[total_size3];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = static_cast<float>(2 * i);
+  }
+  for (size_t i = 0; i < total_size3; ++i)
+  {
+    data3[i] = static_cast<float>(3 * i);
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+  mlc::Tensor tensor3(data2, shape3);
+
+  mlc::Error err = mlc::contraction(tensor1, tensor2, tensor3, "[0,1],[1,2]->[0,2]");
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  delete[] data1;
+  delete[] data2;
+  delete[] data3;
+}
+
+TEST_CASE("Test interface tensor gemm", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {4, 3};  // k, m
+  std::vector<uint64_t> shape2 = {5, 4};  // n, k
+  std::vector<uint64_t> shape3 = {5, 3};  // n, m
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  size_t total_size2 = shape2[0] * shape2[1];
+  size_t total_size3 = shape3[0] * shape3[1];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+  float *data3 = new float[total_size3];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = static_cast<float>(2 * i);
+  }
+  for (size_t i = 0; i < total_size3; ++i)
+  {
+    data3[i] = static_cast<float>(3 * i);
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+  mlc::Tensor tensor3(data2, shape3);
+
+  mlc::Error err = mlc::gemm(tensor1, tensor2, tensor3);
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  delete[] data1;
+  delete[] data2;
+  delete[] data3;
+}
+
+TEST_CASE("Test interface tensor gemm failure", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4, 5};  // Invalid shape for GEMM, should be 2D
+  std::vector<uint64_t> shape2 = {4, 5};
+  std::vector<uint64_t> shape3 = {3, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  size_t total_size2 = shape2[0] * shape2[1];
+  size_t total_size3 = shape3[0] * shape3[1];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+  float *data3 = new float[total_size3];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = static_cast<float>(2 * i);
+  }
+  for (size_t i = 0; i < total_size3; ++i)
+  {
+    data3[i] = static_cast<float>(3 * i);
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+  mlc::Tensor tensor3(data2, shape3);
+
+  mlc::Error err = mlc::gemm(tensor1, tensor2, tensor3);
+  REQUIRE(err.type == mlc::ErrorType::TensorExpected2DTensor);
+
+  delete[] data1;
+  delete[] data2;
+  delete[] data3;
+}
+
+TEST_CASE("Test interface tensor unary zero", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1] * shape1[2];
+
+  float *data1 = new float[total_size1];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+
+  mlc::Error err = mlc::unary_zero(tensor1);
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  for (size_t i = 0; i < total_size1; i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor1.data[i] == 0);
+  }
+
+  delete[] data1;
 }
 
-TEST_CASE("Test tensor contraction", "[tensor][correctness]")
+TEST_CASE("Test interface tensor unary relu", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4, 5};
+  std::vector<uint64_t> shape2 = {3, 4, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1] * shape1[2];
+  size_t total_size2 = shape2[0] * shape2[1] * shape2[2];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+
+  for (int64_t i = 0; i < static_cast<int64_t>(total_size1); ++i)
+  {
+    data1[i] = static_cast<float>(i * (2 * (i % 2) - 1));
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = 0;
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+
+  mlc::Error err = mlc::unary_relu(tensor1, tensor2);
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  for (size_t i = 0; i < total_size1; i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor2.data[i] == std::max(0.0f, tensor1.data[i]));
+  }
+
+  delete[] data1;
+  delete[] data2;
+}
+
+TEST_CASE("Test interface tensor unary identity", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4, 5};
+  std::vector<uint64_t> shape2 = {3, 4, 5};
+
+  size_t total_size1 = shape1[0] * shape1[1] * shape1[2];
+  size_t total_size2 = shape2[0] * shape2[1] * shape2[2];
+
+  float *data1 = new float[total_size1];
+  float *data2 = new float[total_size2];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < total_size2; ++i)
+  {
+    data2[i] = 0;
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::Tensor tensor2(data2, shape2);
+
+  mlc::Error err = mlc::unary_identity(tensor1, tensor2);
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  for (size_t i = 0; i < total_size1; i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor1.data[i] == tensor2.data[i]);
+  }
+
+  delete[] data1;
+  delete[] data2;
+}
+
+TEST_CASE("Test interface tensor contraction first+last", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
   std::vector<uint64_t> shape2 = {4, 5};
@@ -173,5 +399,10 @@ TEST_CASE("Test tensor contraction", "[tensor][correctness]")
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
-  mlc::contraction(tensor1, tensor2, tensor3, "[0,1],[1,2]->[0,2]");
+  mlc::Error err = mlc::contraction(tensor1, tensor2, tensor3, "[0,1],[1,2]->[0,2]", mlc::UnaryType::None, mlc::UnaryType::None);
+  REQUIRE(err.type == mlc::ErrorType::None);
+
+  delete[] data1;
+  delete[] data2;
+  delete[] data3;
 }
\ No newline at end of file
diff --git a/src/test/interface/TensorUtils.test.cpp b/src/test/interface/TensorUtils.test.cpp
index 99c93f9..9748cdc 100644
--- a/src/test/interface/TensorUtils.test.cpp
+++ b/src/test/interface/TensorUtils.test.cpp
@@ -5,7 +5,7 @@
 #include <catch2/generators/catch_generators_range.hpp>
 #include <vector>
 
-TEST_CASE("Test tensor utils get_sorted_dimensions_sizes", "[tensor][correctness]")
+TEST_CASE("Test interface tensor utils get_sorted_dimensions_sizes", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
   std::vector<uint64_t> shape2 = {4, 5};

From 20443d463707f4cb331557d12d5be709f74db4a0 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Wed, 2 Jul 2025 11:10:35 +0000
Subject: [PATCH 12/17] fix ci

---
 src/interface/TensorUtils.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index a45b881..972054c 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -249,6 +249,8 @@ namespace mlc
         return mini_jit::TensorConfig::prim_t::zero;
       case mlc::UnaryType::ReLu:
         return mini_jit::TensorConfig::prim_t::relu;
+      default:
+        return mini_jit::TensorConfig::prim_t::none;
       }
     }
   }  // namespace internal

From 8fe50c1777d90b9587a55339ab5b3c00c42525b2 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Wed, 2 Jul 2025 13:19:37 +0000
Subject: [PATCH 13/17] cleanup

Co-authored-by: Fabian Hofer <Integer-Ctrl@users.noreply.github.com>
---
 CMakeLists.txt                                |  10 +-
 include/MachineLearningCompiler/All.h         |   8 -
 include/MachineLearningCompiler/Setup.h       |  58 -----
 include/MachineLearningCompiler/Tensor.h      |  43 ++++
 src/interface/Contraction.cpp                 |  46 ++++
 src/interface/Einsum.cpp                      |  83 ++++++
 src/interface/Einsum.h                        | 151 +++++++++++
 src/interface/Gemm.cpp                        |  54 ++++
 src/interface/Setup.cpp                       |  14 -
 src/interface/SetupEinsum.cpp                 |  51 ----
 src/interface/SetupEinsum.h                   | 121 ---------
 src/interface/Tensor.cpp                      | 243 ------------------
 src/interface/TensorUtils.h                   | 114 +++++---
 src/interface/Unary.cpp                       | 143 +++++++++++
 ...etup.test.cpp => TensorOperation.test.cpp} |  13 +-
 15 files changed, 607 insertions(+), 545 deletions(-)
 delete mode 100644 include/MachineLearningCompiler/All.h
 delete mode 100644 include/MachineLearningCompiler/Setup.h
 create mode 100644 src/interface/Contraction.cpp
 create mode 100644 src/interface/Einsum.cpp
 create mode 100644 src/interface/Einsum.h
 create mode 100644 src/interface/Gemm.cpp
 delete mode 100644 src/interface/Setup.cpp
 delete mode 100644 src/interface/SetupEinsum.cpp
 delete mode 100644 src/interface/SetupEinsum.h
 create mode 100644 src/interface/Unary.cpp
 rename src/test/interface/{Setup.test.cpp => TensorOperation.test.cpp} (58%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bb23771..dc5d883 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -274,13 +274,19 @@ set(BENCH_KERNLES_FILES
 )
 
 set(SRC_INTERFACE_FILES
-    TensorUtils.h
+    Contraction.cpp
+    Einsum.cpp
+    Einsum.h
+    Gemm.cpp
     Tensor.cpp
+    TensorUtils.h
+    Unary.cpp
 )
 
 set(TEST_INTERFACE_FILES
     TensorUtils.test.cpp
     Tensor.test.cpp
+    TensorOperation.test.cpp
 )
 
 foreach(file ${SRC_MAIN_FILES})
@@ -326,6 +332,8 @@ endforeach()
 # ==== Public headers of the installed library ====
 set(public_headers
     include/${PROJECT_NAME}/Tensor.h
+    include/${PROJECT_NAME}/Error.h
+    include/${PROJECT_NAME}/UnaryType.h
 )
 
 list(APPEND TEST_FILEPATHS "${INTERFACE_FILEPATHS}" "${public_headers}")
diff --git a/include/MachineLearningCompiler/All.h b/include/MachineLearningCompiler/All.h
deleted file mode 100644
index 2c8e043..0000000
--- a/include/MachineLearningCompiler/All.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef MLC_ALL_H
-#define MLC_ALL_H
-
-#include "Error.h"
-#include "Setup.h"
-#include "Tensor.h"
-
-#endif  // MLC_ALL_H
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/Setup.h b/include/MachineLearningCompiler/Setup.h
deleted file mode 100644
index 290b97d..0000000
--- a/include/MachineLearningCompiler/Setup.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef MLC_SETUP_H
-#define MLC_SETUP_H
-#include "Error.h"
-#include "Tensor.h"
-#include <cstdint>
-#include <vector>
-
-namespace mlc
-{
-  class Setup
-  {
-  public:
-    /**
-     * @brief Executes the setup einsum expression with input tensor of the same size.
-     *
-     * @param inputs The inputs to be einsum calculation.
-     * @param output The output of the einsum calculation.
-     * @return Error The error during the
-     */
-    virtual Error execute(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output) = 0;
-
-    /**
-     * @brief Executes the setup einsum expression with input tensor of the same size.
-     *
-     * @param inputs The inputs to be einsum calculation.
-     * @param output The output of the einsum calculation.
-     * @return Error The error during the
-     */
-    virtual Error execute(const std::vector<const Tensor *> &inputs, Tensor &output) = 0;
-
-    /**
-     * @brief Gets the error that was produces during the setup of the tree.
-     *
-     * @return Error The error that was produces during the setup.
-     */
-    virtual Error getSetupError() const = 0;
-  };
-
-  /**
-   * @brief Sets up the einsum tree for contraction based on the given tree.
-   *
-   * @param inputs The input tensors.
-   * @param output The output tensor.
-   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
-   */
-  Setup &einsum_setup(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree);
-
-  /**
-   * @brief Executes contractions based on the given tree.
-   *
-   * @param inputs The input tensors.
-   * @param output The output tensor.
-   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
-   */
-  Setup &einsum_setup(const std::vector<const Tensor *> &inputs, Tensor &output, const std::string &tree);
-}  // namespace mlc
-
-#endif  // MLC_SETUP_H
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index 8967042..2030cbe 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -58,6 +58,39 @@ namespace mlc
     }
   };
 
+  class TensorOperation
+  {
+  public:
+    virtual ~TensorOperation()
+    {
+    }
+
+    /**
+     * @brief Executes the setup einsum expression with input tensor of the same size.
+     *
+     * @param inputs The inputs to be einsum calculation.
+     * @param output The output of the einsum calculation.
+     * @return Error The error code or ErrorType::None on success.
+     */
+    virtual Error execute(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output) = 0;
+
+    /**
+     * @brief Executes the setup einsum expression with input tensor of the same size.
+     *
+     * @param inputs The inputs to be einsum calculation.
+     * @param output The output of the einsum calculation.
+     * @return Error The error code or ErrorType::None on success.
+     */
+    virtual Error execute(const std::vector<const Tensor *> &inputs, Tensor &output) = 0;
+
+    /**
+     * @brief Gets the error that was produces during the setup of the tree.
+     *
+     * @return Error The error code or ErrorType::None on success.
+     */
+    virtual Error getSetupError() const = 0;
+  };
+
   /**
    * @brief Fills the tensor with random float data.
    *
@@ -102,6 +135,16 @@ namespace mlc
    */
   Error einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
 
+  /**
+   * @brief Sets up the einsum tree for contraction based on the given tensor dimensions and tree.
+   *
+   * @param inputs The input tensors shapes.
+   * @param output The output tensor shape.
+   * @param tree The einsum tree to contract in the format [in0],[in1]->[out].
+   */
+  TensorOperation *einsum_operation(const std::vector<std::vector<uint64_t>> &inputs, const std::vector<uint64_t> &output,
+                                    const std::string &tree);
+
   /**
    * @brief Perform a binary contraction and adds it to the output.
    *
diff --git a/src/interface/Contraction.cpp b/src/interface/Contraction.cpp
new file mode 100644
index 0000000..7f826dc
--- /dev/null
+++ b/src/interface/Contraction.cpp
@@ -0,0 +1,46 @@
+#include "../../include/MachineLearningCompiler/Tensor.h"
+#include "../main/EinsumTree.h"
+#include "../main/TensorOperation.h"
+#include "Einsum.h"
+#include "TensorUtils.h"
+
+mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction)
+{
+  return internal::einsum<std::reference_wrapper<const Tensor>>({input0, input1}, output, contraction);
+}
+
+mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction,
+                            const UnaryType firstTouch, const UnaryType lastTouch)
+{
+  mini_jit::EinsumTree einsumTree(contraction);
+  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+  {
+    mlc::ErrorType type = internal::convertParseError(errorParse);
+    return {type, "Failed during parsing the given einsum tree."};
+  }
+  if (einsumTree.get_root()->left->type != mini_jit::EinsumTree::NodeType::Leaf ||
+      einsumTree.get_root()->right->type != mini_jit::EinsumTree::NodeType::Leaf)
+  {
+    return {mlc::ErrorType::ExpectedSingleContraction, "Expected the given einsum string to be a single string."};
+  }
+
+  std::vector<int64_t> sorted_dim_sizes;
+  internal::get_sorted_dimensions_sizes(einsumTree.get_root(), {input0, input1}, sorted_dim_sizes);
+  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config = einsumTree.lower_node(einsumTree.get_root());
+  config.first_touch = internal::convertPrimitiveType(firstTouch);
+  config.last_touch = internal::convertPrimitiveType(lastTouch);
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input0.data, input1.data, output.data);
+  return {ErrorType::None, "Success"};
+}
\ No newline at end of file
diff --git a/src/interface/Einsum.cpp b/src/interface/Einsum.cpp
new file mode 100644
index 0000000..9e8418d
--- /dev/null
+++ b/src/interface/Einsum.cpp
@@ -0,0 +1,83 @@
+#include "Einsum.h"
+#include "../../include/MachineLearningCompiler/Tensor.h"
+#include "../main/EinsumTree.h"
+
+mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree)
+{
+  return internal::einsum<std::reference_wrapper<const Tensor>>(inputs, output, tree);
+}
+
+mlc::Error mlc::einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
+{
+  return internal::einsum<Tensor *>(inputs, output, tree);
+}
+
+mlc::EinsumOperation::EinsumOperation(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &, const std::string &tree)
+    : einsumTree(tree)
+{
+  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+  {
+    mlc::ErrorType type = internal::convertParseError(errorParse);
+    error = {type, "Failed to parse the tree."};
+  }
+
+  std::vector<int64_t> sorted_dim_sizes;
+  internal::get_sorted_dimensions_sizes<std::reference_wrapper<const Tensor>>(einsumTree.get_root(), inputs, sorted_dim_sizes);
+  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+  error = {mlc::ErrorType::None, "Success"};
+}
+
+mlc::Error mlc::EinsumOperation::getSetupError() const
+{
+  return error;
+}
+
+mlc::Error mlc::EinsumOperation::execute(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output)
+{
+  if (error.type != ErrorType::None)
+  {
+    return error;
+  }
+
+  Error checkError = hasSameDimensions<std::reference_wrapper<const Tensor>>(inputs);
+  if (checkError.type != ErrorType::None)
+  {
+    return checkError;
+  }
+
+  return execute<std::reference_wrapper<const Tensor>>(inputs, output);
+}
+
+mlc::Error mlc::EinsumOperation::execute(const std::vector<const Tensor *> &inputs, Tensor &output)
+{
+  if (error.type != ErrorType::None)
+  {
+    return error;
+  }
+
+  Error checkError = hasSameDimensions<const Tensor *>(inputs);
+  if (checkError.type != ErrorType::None)
+  {
+    return checkError;
+  }
+
+  return execute<const Tensor *>(inputs, output);
+}
+
+mlc::TensorOperation *mlc::einsum_operation(const std::vector<std::vector<uint64_t>> &inputs, const std::vector<uint64_t> &output,
+                                            const std::string &tree)
+{
+  std::vector<std::reference_wrapper<const Tensor>> inputTensors;
+  for (const auto &shape : inputs)
+  {
+    // Create a dummy tensor with the given shape
+    Tensor tensor(nullptr, shape);
+    inputTensors.push_back(tensor);
+  }
+
+  Tensor outputTensor(output);
+  EinsumOperation *operation = new EinsumOperation(inputTensors, outputTensor, tree);
+  return operation;
+}
\ No newline at end of file
diff --git a/src/interface/Einsum.h b/src/interface/Einsum.h
new file mode 100644
index 0000000..83368d2
--- /dev/null
+++ b/src/interface/Einsum.h
@@ -0,0 +1,151 @@
+#ifndef MLC_EINSUM_H
+#define MLC_EINSUM_H
+
+#include "../../include/MachineLearningCompiler/Tensor.h"
+#include "../main/EinsumTree.h"
+#include "TensorUtils.h"
+#include <vector>
+
+namespace mlc
+{
+  namespace internal
+  {
+    /**
+     * @brief Executes the einsum expression with the given inputs to output based on the given einsum tree.
+     *
+     * @tparam T The type how the input tensors are passed to the einsum expression.
+     * @param inputs All inputs of the einsum expression.
+     * @param output The single output tensor of the einsum calculation.
+     * @param tree The tree how two tensors are contracted.
+     * @return mlc::Error The error code or ErrorType::None on success.
+     */
+    template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
+    {
+      mini_jit::EinsumTree einsumTree(tree);
+      mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
+      if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
+      {
+        mlc::ErrorType type = convertParseError(errorParse);
+        return {type, "Failed during parsing the given einsum tree."};
+      }
+
+      std::vector<int64_t> sorted_dim_sizes;
+      get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
+      einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
+
+      std::vector<void *> tensors(inputs.size() + 1);
+      for (size_t i = 0; i < inputs.size(); i++)
+      {
+        tensors[i] = getTensor<T>(inputs[i])->data;
+        assert(tensors[i] != nullptr);
+      }
+      tensors[inputs.size()] = output.data;
+
+      mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
+      if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+      {
+        mlc::ErrorType type = convertErrorExecute(errorExecute);
+        return {type, "Failed during calculation of the einsum tree."};
+      }
+
+      return {mlc::ErrorType::None, "Success"};
+    }
+  }  // namespace internal
+
+  class EinsumOperation : public TensorOperation
+  {
+  public:
+    EinsumOperation(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree);
+
+    //! @copydoc mlc::TensorOperation::execute(const std::vector<std::reference_wrapper<const Tensor>> &, Tensor &)
+    virtual Error execute(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output) override;
+    virtual Error execute(const std::vector<const Tensor *> &inputs, Tensor &output) override;
+    virtual Error getSetupError() const override;
+
+  private:
+    /**
+     * @brief Executes the Einsum operation with the given inputs and output tensor.
+     */
+    template <typename T> Error execute(const std::vector<T> &inputs, Tensor &output);
+    template <typename T> Error hasSameDimensions(const std::vector<T> &inputs);
+
+    Error error;
+    mini_jit::EinsumTree einsumTree;
+  };
+
+  template <typename T> inline Error EinsumOperation::execute(const std::vector<T> &inputs, Tensor &output)
+  {
+    std::vector<void *> tensors(inputs.size() + 1);
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+      tensors[i] = internal::getTensor<T>(inputs[i])->data;
+    }
+    tensors[inputs.size()] = output.data;
+
+    mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
+    if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
+    {
+      mlc::ErrorType type = internal::convertErrorExecute(errorExecute);
+      return {type, ""};  // TODO add error message
+    }
+
+    return {mlc::ErrorType::None, "Success"};
+  }
+
+  template <typename T> inline Error EinsumOperation::hasSameDimensions(const std::vector<T> &inputs)
+  {
+    std::vector<mini_jit::EinsumTree::EinsumNode *> nodesToProcess = {einsumTree.get_root()};
+    auto &sortedDimSizes = einsumTree.get_sorted_dim_sizes();
+    uint32_t processedInputs = 0;
+    while (nodesToProcess.size() > 0)
+    {
+      mini_jit::EinsumTree::EinsumNode *node = nodesToProcess.back();
+      nodesToProcess.pop_back();
+
+      if (node->type == mini_jit::EinsumTree::NodeType::Leaf)
+      {
+        if (!(node->input_tensor_index < static_cast<int32_t>(inputs.size())))
+        {
+          return {ErrorType::EinsumTooManyInputTensors, "The was more input tensors than the original setup used."};
+        }
+
+        const Tensor *tensor = internal::getTensor<T>(inputs[node->input_tensor_index]);
+
+        if (tensor->dim_sizes.size() != node->output_dim_ids.size())
+        {
+          return {ErrorType::ExecuteWrongDimension, "The count of dimensions do not match."};
+        }
+
+        for (size_t i = 0; i < node->output_dim_ids.size(); i++)
+        {
+          if (tensor->dim_sizes[i] != static_cast<uint64_t>(sortedDimSizes[node->output_dim_ids[i]]))
+          {
+            return {ErrorType::ExecuteWrongDimension,
+                    "The input tensor dimension has a different size than the size than the tensor it was setup up with."};
+          }
+        }
+
+        processedInputs++;
+        continue;
+      }
+
+      if (node->left != nullptr)
+      {
+        nodesToProcess.push_back(node->left);
+      }
+
+      if (node->right != nullptr)
+      {
+        nodesToProcess.push_back(node->right);
+      }
+    }
+
+    if (processedInputs < inputs.size())
+    {
+      return {mlc::ErrorType::EinsumNotEnoughInputTensors, "There was less input tensors than the original setups used."};
+    }
+
+    return {mlc::ErrorType::None, "Success"};
+  }
+}  // namespace mlc
+#endif  // MLC_EINSUM_H
\ No newline at end of file
diff --git a/src/interface/Gemm.cpp b/src/interface/Gemm.cpp
new file mode 100644
index 0000000..197183d
--- /dev/null
+++ b/src/interface/Gemm.cpp
@@ -0,0 +1,54 @@
+#include "../../include/MachineLearningCompiler/Tensor.h"
+#include "../main/TensorOperation.h"
+#include "TensorUtils.h"
+
+mlc::Error mlc::gemm(const Tensor &input0, const Tensor &input1, Tensor &output)
+{
+  if (input0.dim_sizes.size() != 2 || input1.dim_sizes.size() != 2 || output.dim_sizes.size() != 2)
+  {
+    return {ErrorType::TensorExpected2DTensor, "GEMM requires input0 and input1 to be 2D tensors and output to be a 2D tensor."};
+  }
+
+  int64_t mSize = static_cast<int64_t>(input0.dim_sizes[1]);
+  int64_t nSize = static_cast<int64_t>(input1.dim_sizes[0]);
+  int64_t kSize = static_cast<int64_t>(input0.dim_sizes[0]);
+
+  if (static_cast<int64_t>(output.dim_sizes[1]) != mSize)
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same m dimension size as the input0."};
+  }
+
+  if (static_cast<int64_t>(output.dim_sizes[0]) != nSize)
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same n dimension size as the input1."};
+  }
+
+  if (static_cast<int64_t>(input1.dim_sizes[1]) != kSize)
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the input1 tensor to have the same k dimension size as the input0."};
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                                                                // first_touch
+    mini_jit::TensorConfig::prim_t::gemm,                                                                                // main
+    mini_jit::TensorConfig::prim_t::none,                                                                                // last touch
+    {mini_jit::TensorConfig::dim_t::m, mini_jit::TensorConfig::dim_t::n, mini_jit::TensorConfig::dim_t::k},              // dim_types
+    {mini_jit::TensorConfig::exec_t::prim, mini_jit::TensorConfig::exec_t::prim, mini_jit::TensorConfig::exec_t::prim},  // exec_types
+    {mSize, nSize, kSize},                                                                                               // dim_sizes
+    {1, 0, mSize},                                                                                                       // strides_in0
+    {0, kSize, 1},                                                                                                       // strides_in1
+    {1, mSize, 0},                                                                                                       // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                                                               // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input0.data, input1.data, output.data);
+  return {ErrorType::None, "Success"};
+}
\ No newline at end of file
diff --git a/src/interface/Setup.cpp b/src/interface/Setup.cpp
deleted file mode 100644
index e55adc3..0000000
--- a/src/interface/Setup.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "../../include/MachineLearningCompiler/Setup.h"
-#include "SetupEinsum.h"
-
-mlc::Setup &mlc::einsum_setup(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
-{
-  mlc::SetupEinsum setup(inputs, output, tree);
-  return setup;
-}
-
-mlc::Setup &mlc::einsum_setup(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
-{
-  mlc::SetupEinsum setup(inputs, output, tree);
-  return setup;
-}
diff --git a/src/interface/SetupEinsum.cpp b/src/interface/SetupEinsum.cpp
deleted file mode 100644
index dae4235..0000000
--- a/src/interface/SetupEinsum.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "SetupEinsum.h"
-#include "TensorUtils.h"
-#include <utility>
-
-mlc::SetupEinsum::SetupEinsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree)
-    : einsumTree(tree)
-{
-  setup<std::reference_wrapper<Tensor>>(inputs, output, tree);
-}
-
-mlc::SetupEinsum::SetupEinsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree) : einsumTree(tree)
-{
-  setup<Tensor *>(inputs, output, tree);
-}
-
-mlc::Error mlc::SetupEinsum::getSetupError() const
-{
-  return error;
-}
-
-mlc::Error mlc::SetupEinsum::execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output)
-{
-  if (error.type != ErrorType::None)
-  {
-    return error;
-  }
-
-  Error checkError = hasSameDimensions(inputs);
-  if (checkError.type != ErrorType::None)
-  {
-    return checkError;
-  }
-
-  return execute<std::reference_wrapper<Tensor>>(inputs, output);
-}
-
-mlc::Error mlc::SetupEinsum::execute(const std::vector<Tensor *> &inputs, Tensor &output)
-{
-  if (error.type != ErrorType::None)
-  {
-    return error;
-  }
-
-  Error checkError = hasSameDimensions(inputs);
-  if (checkError.type != ErrorType::None)
-  {
-    return checkError;
-  }
-
-  return execute<Tensor *>(inputs, output);
-}
\ No newline at end of file
diff --git a/src/interface/SetupEinsum.h b/src/interface/SetupEinsum.h
deleted file mode 100644
index 58230ba..0000000
--- a/src/interface/SetupEinsum.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#ifndef MLC_SETUPEINSUM_H
-#define MLC_SETUPEINSUM_H
-#include "../../include/MachineLearningCompiler/Setup.h"
-#include "../main/EinsumTree.h"
-#include <vector>
-
-namespace mlc
-{
-  class SetupEinsum : public Setup
-  {
-  public:
-    SetupEinsum(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output, const std::string &tree);
-    SetupEinsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree);
-
-    virtual Error execute(const std::vector<std::reference_wrapper<Tensor>> &inputs, Tensor &output) override;
-    virtual Error execute(const std::vector<Tensor *> &inputs, Tensor &output) override;
-    virtual Error getSetupError() const override;
-
-  private:
-    template <typename T> void setup(const std::vector<T> &inputs, Tensor &output, const std::string &tree);
-    template <typename T> Error execute(const std::vector<T> &inputs, Tensor &output);
-    template <typename T> Error hasSameDimensions(const std::vector<T> &inputs);
-
-    Error error;
-    mini_jit::EinsumTree einsumTree;
-  };
-
-  template <typename T> inline void SetupEinsum::setup(const std::vector<T> &inputs, Tensor &output, const std::string &tree)
-  {
-    mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-    if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
-    {
-      mlc::ErrorType type = ::convertParseError(errorParse);
-      error = {type, ""};  // TODO add error message
-    }
-
-    std::vector<int64_t> sorted_dim_sizes;
-    get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
-    einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
-
-    error = {mlc::ErrorType::None, "Success"};
-  }
-
-  template <typename T> inline Error SetupEinsum::execute(const std::vector<T> &inputs, Tensor &output)
-  {
-    std::vector<void *> tensors(inputs.size() + 1);
-    for (size_t i = 0; i < inputs.size(); i++)
-    {
-      tensors[i] = getTensor(inputs[i])->data;
-    }
-    tensors[inputs.size()] = output.data;
-
-    mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
-    if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
-    {
-      mlc::ErrorType type = ::convertErrorExecute(errorExecute);
-      return {type, ""};  // TODO add error message
-    }
-
-    return {mlc::ErrorType::None, "Success"};
-  }
-
-  template <typename T> inline Error SetupEinsum::hasSameDimensions(const std::vector<T> &inputs)
-  {
-    std::vector<mini_jit::EinsumTree::EinsumNode *> nodesToProcess = {einsumTree.get_root()};
-    auto &sortedDimSizes = einsumTree.get_sorted_dim_sizes();
-    uint32_t processedInputs = 0;
-    while (nodesToProcess.size() > 0)
-    {
-      mini_jit::EinsumTree::EinsumNode *node = nodesToProcess.back();
-      nodesToProcess.pop_back();
-
-      if (node->type == mini_jit::EinsumTree::NodeType::Leaf)
-      {
-        if (!(node->input_tensor_index < inputs.size()))
-        {
-          return {ErrorType::EinsumTooManyInputTensors, "The was more input tensors than the original setup used."}
-        }
-
-        Tensor *tensor = getTensor(inputs[node->input_tensor_index]);
-
-        if (tensor->dim_sizes.size() != node->output_dim_ids.size())
-        {
-          return {ErrorType::ExecuteWrongDimension, "The count of dimensions do not match."};
-        }
-
-        for (size_t i = 0; i < node->output_dim_ids.size(); i++)
-        {
-          if (tensor->dim_sizes[i] != static_cast<uint64_t>(sortedDimSizes[node->output_dim_ids[i]]))
-          {
-            return {ErrorType::ExecuteWrongDimension,
-                    "The input tensor dimension has a different size than the size than the tensor it was setup up with."};
-          }
-        }
-
-        processedInputs++;
-        continue;
-      }
-
-      if (node->left != nullptr)
-      {
-        nodesToProcess.push_back(node->left);
-      }
-
-      if (node->right != nullptr)
-      {
-        nodesToProcess.push_back(node->right);
-      }
-    }
-
-    if (processedInputs < inputs.size())
-    {
-      return {mlc::ErrorType::EinsumNotEnoughInputTensors, "There was less input tensors than the original setups used."};
-    }
-
-    return {mlc::ErrorType::None, "Success"};
-  }
-
-}  // namespace mlc
-
-#endif  // MLC_SETUPEINSUM_H
\ No newline at end of file
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index 2f921d5..e7a3349 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -1,5 +1,4 @@
 #include "../../include/MachineLearningCompiler/Tensor.h"
-#include "../main/EinsumTree.h"
 #include "../main/TensorOperation.h"
 #include "TensorUtils.h"
 #include <iostream>
@@ -69,245 +68,3 @@ void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t
     tensor.data[i] = function(tensor, i);
   }
 }
-
-mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree)
-{
-  return internal::einsum<std::reference_wrapper<const Tensor>>(inputs, output, tree);
-}
-
-mlc::Error mlc::einsum(const std::vector<Tensor *> &inputs, Tensor &output, const std::string &tree)
-{
-  return internal::einsum<Tensor *>(inputs, output, tree);
-}
-
-mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction)
-{
-  return internal::einsum<std::reference_wrapper<const Tensor>>({input0, input1}, output, contraction);
-}
-
-mlc::Error mlc::contraction(const Tensor &input0, const Tensor &input1, Tensor &output, const std::string &contraction,
-                            const UnaryType firstTouch, const UnaryType lastTouch)
-{
-  mini_jit::EinsumTree einsumTree(contraction);
-  mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-  if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
-  {
-    mlc::ErrorType type = internal::convertParseError(errorParse);
-    return {type, "Failed during parsing the given einsum tree."};
-  }
-  if (einsumTree.get_root()->left->type != mini_jit::EinsumTree::NodeType::Leaf ||
-      einsumTree.get_root()->right->type != mini_jit::EinsumTree::NodeType::Leaf)
-  {
-    return {mlc::ErrorType::ExpectedSingleContraction, "Expected the given einsum string to be a single string."};
-  }
-
-  std::vector<int64_t> sorted_dim_sizes;
-  internal::get_sorted_dimensions_sizes(einsumTree.get_root(), {input0, input1}, sorted_dim_sizes);
-  einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
-
-  mini_jit::TensorOperation op;
-  mini_jit::TensorConfig config = einsumTree.lower_node(einsumTree.get_root());
-  config.first_touch = internal::convertPrimitiveType(firstTouch);
-  config.last_touch = internal::convertPrimitiveType(lastTouch);
-
-  mini_jit::TensorOperation::error_t error = op.setup(config);
-  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
-  if (errorType != mlc::ErrorType::None)
-  {
-    return {errorType, "Could not generate the kernels for the gemm operation."};
-  }
-
-  op.execute(input0.data, input1.data, output.data);
-  return {ErrorType::None, "Success"};
-}
-
-mlc::Error mlc::gemm(const Tensor &input0, const Tensor &input1, Tensor &output)
-{
-  if (input0.dim_sizes.size() != 2 || input1.dim_sizes.size() != 2 || output.dim_sizes.size() != 2)
-  {
-    return {ErrorType::TensorExpected2DTensor, "GEMM requires input0 and input1 to be 2D tensors and output to be a 2D tensor."};
-  }
-
-  int64_t mSize = static_cast<int64_t>(input0.dim_sizes[1]);
-  int64_t nSize = static_cast<int64_t>(input1.dim_sizes[0]);
-  int64_t kSize = static_cast<int64_t>(input0.dim_sizes[0]);
-
-  if (static_cast<int64_t>(output.dim_sizes[1]) != mSize)
-  {
-    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same m dimension size as the input0."};
-  }
-
-  if (static_cast<int64_t>(output.dim_sizes[0]) != nSize)
-  {
-    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same n dimension size as the input1."};
-  }
-
-  if (static_cast<int64_t>(input1.dim_sizes[1]) != kSize)
-  {
-    return {ErrorType::ExecuteWrongDimension, "Expected the input1 tensor to have the same k dimension size as the input0."};
-  }
-
-  mini_jit::TensorOperation op;
-  mini_jit::TensorConfig config{
-    mini_jit::TensorConfig::prim_t::none,                                                                                // first_touch
-    mini_jit::TensorConfig::prim_t::gemm,                                                                                // main
-    mini_jit::TensorConfig::prim_t::none,                                                                                // last touch
-    {mini_jit::TensorConfig::dim_t::m, mini_jit::TensorConfig::dim_t::n, mini_jit::TensorConfig::dim_t::k},              // dim_types
-    {mini_jit::TensorConfig::exec_t::prim, mini_jit::TensorConfig::exec_t::prim, mini_jit::TensorConfig::exec_t::prim},  // exec_types
-    {mSize, nSize, kSize},                                                                                               // dim_sizes
-    {1, 0, mSize},                                                                                                       // strides_in0
-    {0, kSize, 1},                                                                                                       // strides_in1
-    {1, mSize, 0},                                                                                                       // strides_out
-    mini_jit::TensorConfig::dtype_t::fp32,                                                                               // dtype_t
-  };
-
-  mini_jit::TensorOperation::error_t error = op.setup(config);
-  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
-  if (errorType != mlc::ErrorType::None)
-  {
-    return {errorType, "Could not generate the kernels for the gemm operation."};
-  }
-
-  op.execute(input0.data, input1.data, output.data);
-  return {ErrorType::None, "Success"};
-}
-
-mlc::Error mlc::unary_zero(Tensor &input)
-{
-  int64_t stride = 1;
-  std::vector<int64_t> dimSizes(input.dim_sizes.size());
-  std::vector<int64_t> strides(input.dim_sizes.size());
-
-  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
-  {
-    strides[i] = stride;
-    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
-    stride *= input.dim_sizes[i];
-  }
-
-  mini_jit::TensorOperation op;
-  mini_jit::TensorConfig config{
-    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
-    mini_jit::TensorConfig::prim_t::zero,                                      // main
-    mini_jit::TensorConfig::prim_t::none,                                      // last touch
-    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
-    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
-    dimSizes,                                                                  // dim_sizes
-    strides,                                                                   // strides_in0
-    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
-    strides,                                                                   // strides_out
-    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
-  };
-
-  mini_jit::TensorOperation::error_t error = op.setup(config);
-  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
-  if (errorType != mlc::ErrorType::None)
-  {
-    return {errorType, "Could not generate the kernels for the gemm operation."};
-  }
-
-  op.execute(input.data, nullptr, input.data);
-  return {ErrorType::None, "Success"};
-}
-
-mlc::Error mlc::unary_relu(const Tensor &input, Tensor &output)
-{
-  if (output.dim_sizes.size() != input.dim_sizes.size())
-  {
-    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
-  }
-
-  for (size_t i = 0; i < input.dim_sizes.size(); i++)
-  {
-    if (output.dim_sizes[i] != input.dim_sizes[i])
-    {
-      return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
-    }
-  }
-
-  int64_t stride = 1;
-  std::vector<int64_t> dimSizes(input.dim_sizes.size());
-  std::vector<int64_t> strides(input.dim_sizes.size());
-
-  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
-  {
-    strides[i] = stride;
-    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
-    stride *= input.dim_sizes[i];
-  }
-
-  mini_jit::TensorOperation op;
-  mini_jit::TensorConfig config{
-    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
-    mini_jit::TensorConfig::prim_t::relu,                                      // main
-    mini_jit::TensorConfig::prim_t::none,                                      // last touch
-    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
-    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
-    dimSizes,                                                                  // dim_sizes
-    strides,                                                                   // strides_in0
-    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
-    strides,                                                                   // strides_out
-    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
-  };
-
-  mini_jit::TensorOperation::error_t error = op.setup(config);
-  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
-  if (errorType != mlc::ErrorType::None)
-  {
-    return {errorType, "Could not generate the kernels for the gemm operation."};
-  }
-
-  op.execute(input.data, nullptr, output.data);
-  return {ErrorType::None, "Success"};
-}
-
-mlc::Error mlc::unary_identity(const Tensor &input, Tensor &output)
-{
-  if (output.dim_sizes.size() != input.dim_sizes.size())
-  {
-    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
-  }
-
-  for (size_t i = 0; i < input.dim_sizes.size(); i++)
-  {
-    if (output.dim_sizes[i] != input.dim_sizes[i])
-    {
-      return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
-    }
-  }
-
-  int64_t stride = 1;
-  std::vector<int64_t> dimSizes(input.dim_sizes.size());
-  std::vector<int64_t> strides(input.dim_sizes.size());
-
-  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
-  {
-    strides[i] = stride;
-    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
-    stride *= input.dim_sizes[i];
-  }
-
-  mini_jit::TensorOperation op;
-  mini_jit::TensorConfig config{
-    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
-    mini_jit::TensorConfig::prim_t::copy,                                      // main
-    mini_jit::TensorConfig::prim_t::none,                                      // last touch
-    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
-    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
-    dimSizes,                                                                  // dim_sizes
-    strides,                                                                   // strides_in0
-    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
-    strides,                                                                   // strides_out
-    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
-  };
-
-  mini_jit::TensorOperation::error_t error = op.setup(config);
-  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
-  if (errorType != mlc::ErrorType::None)
-  {
-    return {errorType, "Could not generate the kernels for the gemm operation."};
-  }
-
-  op.execute(input.data, nullptr, output.data);
-  return {ErrorType::None, "Success"};
-}
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 972054c..96920d2 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -2,6 +2,7 @@
 #define MLC_TENSORUTILS_H
 #include "../../include/MachineLearningCompiler/Tensor.h"
 #include "../main/EinsumTree.h"
+#include "../main/release_assert.h"
 #include <cassert>
 #include <cstdint>
 #include <functional>
@@ -16,11 +17,12 @@ namespace mlc
      * @brief Function definition for converting a generic type to a pointer to a mlc::Tensor.
      *
      * @param T The type to convert.
-     * @return mlc::Tensor
+     * @return mlc::Tensor nullptr as it should not be possible to get here.
      */
     template <typename T> constexpr const mlc::Tensor *getTensor(const T &)
     {
       static_assert(false, "No generic conversion of tensor possible.");
+      release_assert(false, "No generic conversion of tensor possible.");
       return nullptr;
     }
 
@@ -35,7 +37,23 @@ namespace mlc
       return tensor;
     }
 
-    // TODO: doc
+    /**
+     * @brief Gets the pointer to the mlc::Tensor.
+     *
+     * @param tensor The tensor to get the pointer from.
+     * @return Pointer to the mlc::Tensor.
+     */
+    template <> constexpr const mlc::Tensor *getTensor<const mlc::Tensor *>(const mlc::Tensor *const &tensor)
+    {
+      return tensor;
+    }
+
+    /**
+     * @brief Gets the pointer to the mlc::Tensor.
+     *
+     * @param tensor The tensor to get the pointer from.
+     * @return Pointer to the mlc::Tensor.
+     */
     template <>
     constexpr const mlc::Tensor *
     getTensor<std::reference_wrapper<const mlc::Tensor>>(const std::reference_wrapper<const mlc::Tensor> &tensor)
@@ -43,7 +61,14 @@ namespace mlc
       return &(tensor.get());
     }
 
-    // TODO: doc
+    /**
+     * @brief Get the dim sizes of the input tensors in increased order of their dimension ids.
+     *
+     * @tparam T The type of the input tensors, either mlc::Tensor* or std::reference_wrapper<const mlc::Tensor>.
+     * @param root The root of the EinsumNode tree.
+     * @param inputs The input tensors.
+     * @param sorted_dim_sizes The vector to store the sorted dimension sizes.
+     */
     template <typename T>
     constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<T> &inputs,
                                                std::vector<int64_t> &sorted_dim_sizes)
@@ -85,7 +110,13 @@ namespace mlc
       }
     }
 
-    // TODO: doc
+    /**
+     * @brief Get the dim sizes of the input tensors in increased order of their dimension ids.
+     *
+     * @param root The root of the EinsumNode tree.
+     * @param inputs The input tensors.
+     * @param sorted_dim_sizes The vector to store the sorted dimension sizes.
+     */
     constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root,
                                                const std::vector<std::reference_wrapper<const mlc::Tensor>> &inputs,
                                                std::vector<int64_t> &sorted_dim_sizes)
@@ -93,14 +124,25 @@ namespace mlc
       get_sorted_dimensions_sizes<std::reference_wrapper<const mlc::Tensor>>(root, inputs, sorted_dim_sizes);
     }
 
-    // TODO: doc
+    /**
+     * @brief Get the dim sizes of the input tensors in increased order of their dimension ids.
+     *
+     * @param root The root of the EinsumNode tree.
+     * @param inputs The input tensors.
+     * @param sorted_dim_sizes The vector to store the sorted dimension sizes.
+     */
     constexpr void get_sorted_dimensions_sizes(const mini_jit::EinsumTree::EinsumNode *root, const std::vector<mlc::Tensor *> &inputs,
                                                std::vector<int64_t> &sorted_dim_sizes)
     {
       get_sorted_dimensions_sizes<mlc::Tensor *>(root, inputs, sorted_dim_sizes);
     }
 
-    // TODO: doc
+    /**
+     * @brief Helper function to convert the parse error of the EinsumTree to the corresponding mlc::ErrorType.
+     *
+     * @param error The parse error of type mini_jit::EinsumTree::ErrorParse.
+     * @return constexpr mlc::ErrorType The error code or ErrorType::None on success.
+     */
     constexpr mlc::ErrorType convertParseError(mini_jit::EinsumTree::ErrorParse error)
     {
       switch (error)
@@ -126,7 +168,12 @@ namespace mlc
       }
     }
 
-    // TODO: doc
+    /**
+     * @brief Converts the error of the EinsumTree execution to the corresponding mlc::ErrorType.
+     *
+     * @param error The error of type mini_jit::EinsumTree::ErrorExecute.
+     * @return constexpr mlc::ErrorType The error code or ErrorType::None on success.
+     */
     constexpr mlc::ErrorType convertErrorExecute(mini_jit::EinsumTree::ErrorExecute error)
     {
       if (static_cast<int64_t>(error) > 100)
@@ -151,6 +198,12 @@ namespace mlc
       }
     }
 
+    /**
+     * @brief Converts the error of the TensorOperation to the corresponding mlc::ErrorType.
+     *
+     * @param error The error of type mini_jit::TensorOperation::error_t.
+     * @return constexpr mlc::ErrorType The converted error of the interface.
+     */
     constexpr mlc::ErrorType convertTensorOperationError(mini_jit::TensorOperation::error_t error)
     {
       switch (error)
@@ -192,40 +245,12 @@ namespace mlc
       }
     }
 
-    // TODO: doc
-    template <typename T> mlc::Error einsum(const std::vector<T> &inputs, mlc::Tensor &output, const std::string &tree)
-    {
-      mini_jit::EinsumTree einsumTree(tree);
-      mini_jit::EinsumTree::ErrorParse errorParse = einsumTree.parse_tree();
-      if (errorParse != mini_jit::EinsumTree::ErrorParse::None)
-      {
-        mlc::ErrorType type = convertParseError(errorParse);
-        return {type, "Failed during parsing the given einsum tree."};
-      }
-
-      std::vector<int64_t> sorted_dim_sizes;
-      get_sorted_dimensions_sizes(einsumTree.get_root(), inputs, sorted_dim_sizes);
-      einsumTree.set_sorted_dim_sizes(sorted_dim_sizes);
-
-      std::vector<void *> tensors(inputs.size() + 1);
-      for (size_t i = 0; i < inputs.size(); i++)
-      {
-        tensors[i] = getTensor<T>(inputs[i])->data;
-        assert(tensors[i] != nullptr);
-      }
-      tensors[inputs.size()] = output.data;
-
-      mini_jit::EinsumTree::ErrorExecute errorExecute = einsumTree.execute(tensors);
-      if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
-      {
-        mlc::ErrorType type = convertErrorExecute(errorExecute);
-        return {type, "Failed during calculation of the einsum tree."};
-      }
-
-      return {mlc::ErrorType::None, "Success"};
-    }
-
-    // TODO: doc
+    /**
+     * @brief Get the size of the given tensor.
+     *
+     * @param tensor The tensor to calculate the size from.
+     * @return constexpr uint64_t The size of the tensor.
+     */
     constexpr uint64_t getTensorSize(const mlc::Tensor *tensor)
     {
       uint64_t size = 1;
@@ -236,7 +261,12 @@ namespace mlc
       return size;
     }
 
-    // TODO: doc
+    /**
+     * @brief Converts a primitive type from the interface unary to a corresponding primitive of the tensor config.
+     *
+     * @param type The unary type to convert.
+     * @return constexpr mini_jit::TensorConfig::prim_t The converted primitive.
+     */
     constexpr mini_jit::TensorConfig::prim_t convertPrimitiveType(mlc::UnaryType type)
     {
       switch (type)
diff --git a/src/interface/Unary.cpp b/src/interface/Unary.cpp
new file mode 100644
index 0000000..c8a07b3
--- /dev/null
+++ b/src/interface/Unary.cpp
@@ -0,0 +1,143 @@
+#include "../../include/MachineLearningCompiler/Tensor.h"
+#include "../main/TensorOperation.h"
+#include "TensorUtils.h"
+
+mlc::Error mlc::unary_zero(Tensor &input)
+{
+  int64_t stride = 1;
+  std::vector<int64_t> dimSizes(input.dim_sizes.size());
+  std::vector<int64_t> strides(input.dim_sizes.size());
+
+  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
+  {
+    strides[i] = stride;
+    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
+    stride *= input.dim_sizes[i];
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
+    mini_jit::TensorConfig::prim_t::zero,                                      // main
+    mini_jit::TensorConfig::prim_t::none,                                      // last touch
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
+    dimSizes,                                                                  // dim_sizes
+    strides,                                                                   // strides_in0
+    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
+    strides,                                                                   // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input.data, nullptr, input.data);
+  return {ErrorType::None, "Success"};
+}
+
+mlc::Error mlc::unary_relu(const Tensor &input, Tensor &output)
+{
+  if (output.dim_sizes.size() != input.dim_sizes.size())
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+  }
+
+  for (size_t i = 0; i < input.dim_sizes.size(); i++)
+  {
+    if (output.dim_sizes[i] != input.dim_sizes[i])
+    {
+      return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+    }
+  }
+
+  int64_t stride = 1;
+  std::vector<int64_t> dimSizes(input.dim_sizes.size());
+  std::vector<int64_t> strides(input.dim_sizes.size());
+
+  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
+  {
+    strides[i] = stride;
+    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
+    stride *= input.dim_sizes[i];
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
+    mini_jit::TensorConfig::prim_t::relu,                                      // main
+    mini_jit::TensorConfig::prim_t::none,                                      // last touch
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
+    dimSizes,                                                                  // dim_sizes
+    strides,                                                                   // strides_in0
+    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
+    strides,                                                                   // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input.data, nullptr, output.data);
+  return {ErrorType::None, "Success"};
+}
+
+mlc::Error mlc::unary_identity(const Tensor &input, Tensor &output)
+{
+  if (output.dim_sizes.size() != input.dim_sizes.size())
+  {
+    return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+  }
+
+  for (size_t i = 0; i < input.dim_sizes.size(); i++)
+  {
+    if (output.dim_sizes[i] != input.dim_sizes[i])
+    {
+      return {ErrorType::ExecuteWrongDimension, "Expected the output tensor to have the same number of dimension as the input."};
+    }
+  }
+
+  int64_t stride = 1;
+  std::vector<int64_t> dimSizes(input.dim_sizes.size());
+  std::vector<int64_t> strides(input.dim_sizes.size());
+
+  for (int64_t i = input.dim_sizes.size() - 1; i >= 0; i--)
+  {
+    strides[i] = stride;
+    dimSizes[i] = static_cast<int64_t>(input.dim_sizes[i]);
+    stride *= input.dim_sizes[i];
+  }
+
+  mini_jit::TensorOperation op;
+  mini_jit::TensorConfig config{
+    mini_jit::TensorConfig::prim_t::none,                                      // first_touch
+    mini_jit::TensorConfig::prim_t::copy,                                      // main
+    mini_jit::TensorConfig::prim_t::none,                                      // last touch
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::dim_t::c),     // dim_types
+    std::vector(input.dim_sizes.size(), mini_jit::TensorConfig::exec_t::seq),  // exec_types
+    dimSizes,                                                                  // dim_sizes
+    strides,                                                                   // strides_in0
+    std::vector<int64_t>(input.dim_sizes.size(), 0),                           // strides_in1
+    strides,                                                                   // strides_out
+    mini_jit::TensorConfig::dtype_t::fp32,                                     // dtype_t
+  };
+
+  mini_jit::TensorOperation::error_t error = op.setup(config);
+  mlc::ErrorType errorType = internal::convertTensorOperationError(error);
+  if (errorType != mlc::ErrorType::None)
+  {
+    return {errorType, "Could not generate the kernels for the gemm operation."};
+  }
+
+  op.execute(input.data, nullptr, output.data);
+  return {ErrorType::None, "Success"};
+}
\ No newline at end of file
diff --git a/src/test/interface/Setup.test.cpp b/src/test/interface/TensorOperation.test.cpp
similarity index 58%
rename from src/test/interface/Setup.test.cpp
rename to src/test/interface/TensorOperation.test.cpp
index fd6f051..9b257e9 100644
--- a/src/test/interface/Setup.test.cpp
+++ b/src/test/interface/TensorOperation.test.cpp
@@ -1,4 +1,4 @@
-#include "../../../include/MachineLearningCompiler/Setup.h"
+#include "../../../include/MachineLearningCompiler/Tensor.h"
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/generators/catch_generators.hpp>
 #include <catch2/generators/catch_generators_range.hpp>
@@ -11,14 +11,13 @@ TEST_CASE("Test interface tensor einsum setup", "[setup][correctness]")
   std::vector<uint64_t> shape2 = {4, 5};
   std::vector<uint64_t> shape3 = {3, 5};
 
-  size_t total_size1 = shape1[0] * shape1[1];
-  size_t total_size2 = shape2[0] * shape2[1];
-  size_t total_size3 = shape3[0] * shape3[1];
-
   mlc::Tensor tensor1(shape1);
   mlc::Tensor tensor2(shape2);
   mlc::Tensor tensor3(shape3);
 
-  mlc::Setup &setup = mlc::einsum_setup({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
-  setup.execute({tensor1, tensor2}, tensor3);
+  mlc::TensorOperation *setup = mlc::einsum_operation({shape1, shape2}, shape3, "[0,1],[1,2]->[0,2]");
+  setup->execute({tensor1, tensor2}, tensor3);
+  setup->execute({tensor1, tensor2}, tensor3);
+  setup->execute({tensor1, tensor2}, tensor3);
+  delete setup;
 }
\ No newline at end of file

From 7f10362afa18d1a429e346bfca472108ab8eaf96 Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Wed, 2 Jul 2025 18:01:56 +0000
Subject: [PATCH 14/17] finished lib + docu

Co-authored-by: Fabian Hofer <Integer-Ctrl@users.noreply.github.com>
---
 README.md                                    |  25 ++
 cmake-library/example-project/CMakeLists.txt |  58 ++++
 cmake-library/example-project/Example.cpp    | 331 +++++++++++++++++++
 cmake-library/user-guide.md                  | 222 +++++++++++++
 include/MachineLearningCompiler/Tensor.h     |  59 +++-
 include/MachineLearningCompiler/UnaryType.h  |   2 +-
 src/interface/Tensor.cpp                     |  97 ++++++
 src/interface/TensorUtils.h                  |  13 +-
 src/test/interface/Tensor.test.cpp           | 128 +++++++
 9 files changed, 931 insertions(+), 4 deletions(-)
 create mode 100644 README.md
 create mode 100644 cmake-library/example-project/CMakeLists.txt
 create mode 100644 cmake-library/example-project/Example.cpp
 create mode 100644 cmake-library/user-guide.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..787c50f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,25 @@
+# Machine Learning Compilers
+
+This repository was created as part of the **Machine Learning Compilers** lecture and lab at Friedrich Schiller University Jena during the summer term 2025. While the lecture focused on theoretical concepts, the lab had a practical orientation, with the goal of implementing a domain-specific compiler for tensor expressions.
+
+The main objective of the lab was to build a Just-In-Time (JIT) compiler from scratch that supports a variety of tensor operations. Tensor compilers automate the transformation of tensor expressions into executable code, aiming for high throughput, low latency, short compile times, flexibility and portability.
+
+The lab involved weekly tasks that guided the development of this compiler. The corresponding code and implementations are part of this repository.
+
+## Overview
+
+This repository includes:
+
+- Implementations of all lab tasks
+- Source code of a functional JIT compiler for tensor operations
+- Modular code structured for reuse and extensibility
+
+The weekly tasks from the lab can be found here: [scalable-analyses](https://github.com/scalable-analyses/pbtc/tree/main/lab)
+
+## Technical Documentation
+
+A detailed technical documentation of our implementation including the design decisions and solutions to the lab tasks, and explanations of the source code is available on our [project website](https://integer-ctrl.github.io/machine-learning-compilers/).
+
+## CMake Library
+
+To make the compiler easy to integrate into other projects, we structured it as a CMake library. This allows users to include and build upon our functionality directly in their own CMake-based projects. More details about the library and how to use it can be found in the [user-guide.md](https://github.com/Integer-Ctrl/machine-learning-compilers/cmake-library/user-guide.md).
diff --git a/cmake-library/example-project/CMakeLists.txt b/cmake-library/example-project/CMakeLists.txt
new file mode 100644
index 0000000..be563d4
--- /dev/null
+++ b/cmake-library/example-project/CMakeLists.txt
@@ -0,0 +1,58 @@
+cmake_minimum_required(VERSION 3.28.0)
+project(ExampleProject VERSION 0.1.0 LANGUAGES C CXX ASM)
+
+# The MachineLearningCompiler library is only supported on Linux on arm.
+if(NOT (UNIX AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm)"))
+   message(FATAL_ERROR "Only arm on Linux is supported.") 
+endif()
+
+
+# Set default build type to Release if not specified
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+get_property(IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if(IS_MULTI_CONFIG)
+    message(NOTICE "Using multi-config generator. Compile with: cmake --build . --config [Debug|Release] --target <target>")
+else()
+    message(NOTICE "Using single-config generator. Generate with: cmake .. -DCMAKE_BUILD_TYPE=[Debug|Release]")
+    if(NOT CMAKE_BUILD_TYPE)
+        set(CMAKE_BUILD_TYPE "Release")
+        message(WARNING "No Build type is set. Using Release!")
+    endif()
+endif()
+
+message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
+
+
+# ===========================================
+# Include the MachineLearningCompiler Library
+# ===========================================
+
+# Option 1: Including the MachineLearningCompiler Library
+
+# Optional: Toggles if included libraries is build as shared or static libraries. Default is ON.
+set(BUILD_SHARED_LIBS ON)
+
+# Optional: Toggles if OpenMP should be used by the library. Default is ON.
+set(MLC_USE_OPENMP ON)
+
+Include(FetchContent)
+FetchContent_Declare(
+    MachineLearningCompiler
+    GIT_REPOSITORY https://github.com/Integer-Ctrl/machine-learning-compilers
+    GIT_TAG        individual-phase
+    EXCLUDE_FROM_ALL
+)
+FetchContent_MakeAvailable(MachineLearningCompiler)
+
+# Option 2: Include it from the the current machine if installed.
+# find_library(mlc::MachineLearningCompiler)
+
+# ===========================================
+
+add_executable(example 
+    Example.cpp
+)
+target_link_libraries(example mlc::MachineLearningCompiler)
\ No newline at end of file
diff --git a/cmake-library/example-project/Example.cpp b/cmake-library/example-project/Example.cpp
new file mode 100644
index 0000000..4cfabd3
--- /dev/null
+++ b/cmake-library/example-project/Example.cpp
@@ -0,0 +1,331 @@
+#include <MachineLearningCompiler/Tensor.h>
+#include <iostream>
+
+/**
+ * Tensor object examples.
+ */
+void example_tensor()
+{
+  // Define tensors with different dimensions. The memory is allocated automatically based on the given dimensions and filled with zeros.
+  mlc::Tensor tensor1D({5});        // 1D tensor with 5 elements
+  mlc::Tensor tensor2D({3, 4});     // 2D tensor with 3 rows and 4 columns
+  mlc::Tensor tensor3D({2, 3, 4});  // 3D tensor with 2 layers, 3 rows and 4 columns
+
+  // Define a tensor with data
+  float data1[] = {1, 2, 3, 4, 5};
+  float data2[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  mlc::Tensor tensorWithData1(data1, {2, 2});     // 2x2 tensor with specific data
+  mlc::Tensor tensorWIthData2(data2, {3, 2, 2});  // 3D tensor with specific data
+
+  // Print dimensions and sizes of the tensors
+  std::cout << "Tensor 1D dim sizes: ";
+  for (const auto &dim : tensor1D.dim_sizes)
+  {
+    std::cout << dim << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "Tensor 2D dim sizes: ";
+  for (const auto &dim : tensor2D.dim_sizes)
+  {
+    std::cout << dim << " ";
+  }
+  std::cout << std::endl;
+
+  // Print the sizes of the tensors
+  std::cout << "Tensor 1D Size: " << tensor1D.size() << std::endl;
+  std::cout << "Tensor 2D Size: " << tensor2D.size() << std::endl;
+  std::cout << "Tensor 3D Size: " << tensor3D.size() << std::endl;
+  std::cout << "Tensor with Data 1 Size: " << tensorWithData1.size() << std::endl;
+  std::cout << "Tensor with Data 2 Size: " << tensorWIthData2.size() << std::endl;
+
+  // Print the strides of the tensors
+  std::cout << "Tensor 1D Strides: ";
+  for (const auto &stride : tensor1D.strides)
+  {
+    std::cout << stride << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "Tensor 2D Strides: ";
+  for (const auto &stride : tensor2D.strides)
+  {
+    std::cout << stride << " ";
+  }
+  std::cout << std::endl;
+
+  // Print the tensors to the console
+  std::cout << tensor1D.to_string("Tensor 1D") << std::endl;
+  std::cout << tensor2D.to_string("Tensor 2D") << std::endl;
+  std::cout << tensor3D.to_string("Tensor 3D") << std::endl;
+  std::cout << tensorWithData1.to_string("Tensor with Data 1") << std::endl;
+  std::cout << tensorWIthData2.to_string("Tensor with Data 2") << std::endl;
+}
+
+/**
+ * Methods that can be used to fill a tensor.
+ */
+void example_fill()
+{
+  // Fill the memory of the tensors with random values
+  mlc::Tensor tensorRandom({3, 3});
+  mlc::fill_random(tensorRandom);
+  std::cout << tensorRandom.to_string("Random") << std::endl;
+
+  // Fill the memory of the tensors with all 1s.
+  mlc::Tensor tensorSingleNumber({3, 3});
+  mlc::fill_number(tensorSingleNumber, 1.43);
+  std::cout << tensorSingleNumber.to_string("Ones") << std::endl;
+
+  // Fill the memory of the tensors with counting upwards data starting from 0.
+  mlc::Tensor tensorCountingUp({3, 3});
+  mlc::fill_counting_up(tensorCountingUp, 0, 1.0);
+  std::cout << tensorCountingUp.to_string("Counting Up") << std::endl;
+
+  // Fill the memory of the tensors with counting downwards data starting from 5.
+  mlc::Tensor tensorCountingDown({3, 3});
+  mlc::fill_counting_down(tensorCountingDown, 5, 0.1);
+  std::cout << tensorCountingDown.to_string("Counting Down") << std::endl;
+
+  // Fill the memory of the tensor based on a user defined expression. The tensor itself and current index of the data that is currently
+  // filled are given as additional parameter.
+  // Here the tensor is filled with 1 2 3, 1 2 3, 1 2 3
+  mlc::Tensor tensorLambda({3, 3});
+  mlc::fill_lambda(tensorLambda, [](const mlc::Tensor &self, size_t index) { return index % self.strides[self.strides.size() - 1]; });
+  std::cout << tensorLambda.to_string("Lambda 1 2 3") << std::endl;
+
+  // We can also fill the tensor using outside defined variable.
+  size_t size = tensorLambda.size();
+  mlc::fill_lambda(tensorLambda, [&size](const mlc::Tensor &self, size_t index) { return size; });
+  std::cout << tensorLambda.to_string("Lambda Outside") << std::endl;
+}
+
+/**
+ * A GEneral Matrix Matrix multiplication requires the tensors to be in a matrix shape i.e. exactly 2 dimensions.
+ */
+void example_gemm()
+{
+  mlc::Tensor in0({5, 3});  // IDs: 0,1
+  mlc::Tensor in1({2, 5});  // IDs: 2,0
+  mlc::Tensor out({2, 3});  // IDs: 2,1
+
+  // Fill the memory of the tensors with random values
+  mlc::fill_counting_up(in0, 0, 1);
+  mlc::fill_counting_up(in1, 0, 1);
+
+  mlc::Error error = mlc::gemm(in0, in1, out);
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+
+  std::cout << in0.to_string("in0") << std::endl;
+  std::cout << in1.to_string("in1") << std::endl;
+  std::cout << out.to_string("out") << std::endl;
+}
+
+/**
+ * A unary operation zero, identity and ReLU can be performed on a Tensor.
+ */
+void example_unary()
+{
+  // Performs a zero unary
+  mlc::Tensor tensorZero({3, 3});
+  mlc::fill_random(tensorZero);
+  mlc::Error error = mlc::unary_zero(tensorZero);
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+  std::cout << tensorZero.to_string("Unary Zero") << std::endl;
+
+  // Performs a identity unary
+  mlc::Tensor tensorIdentityIn({3, 3});
+  mlc::Tensor tensorIdentityOut({3, 3});
+  mlc::fill_random(tensorIdentityIn);
+  mlc::fill_number(tensorIdentityIn, 0);
+  error = mlc::unary_identity(tensorIdentityIn, tensorIdentityOut);  // identity = copy from input to output
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+  std::cout << tensorIdentityOut.to_string("Unary Identity Input") << std::endl;
+  std::cout << tensorIdentityOut.to_string("Unary Identity Output") << std::endl;
+
+  // Performs a ReLU unary
+  mlc::Tensor tensorReluIn({3, 3});
+  mlc::Tensor tensorReluOut({3, 3});
+  // Fills even indices with positive and odd indices with negative numbers
+  mlc::fill_lambda(tensorReluIn, [](const mlc::Tensor &, size_t index) { return index * (2 * (index % 2) - 1); });
+  mlc::fill_number(tensorReluOut, 0);
+  error = mlc::unary_relu(tensorReluIn, tensorReluOut);  // ReLU = max(x, 0)
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+  std::cout << tensorReluIn.to_string("Unary ReLU Input") << std::endl;
+  std::cout << tensorReluOut.to_string("Unary ReLU Output") << std::endl;
+}
+
+/**
+ * A contraction of two tensors and add the result to the output.
+ */
+void example_contraction()
+{
+  mlc::Tensor in0({5, 4, 3});     // IDs: 0,1,2
+  mlc::Tensor in1({5, 2, 4});     // IDs: 3,4,1
+  mlc::Tensor out({5, 5, 2, 3});  // IDs: 0,3,4,2
+
+  mlc::fill_counting_up(in0, 0, 1);
+  mlc::fill_counting_down(in1, 0, 1);
+  mlc::fill_number(in0, 1'000'000);
+
+  mlc::Error error = mlc::contraction(in0, in1, out, "[0,1,2],[3,4,1]->[0,3,4,2]");
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+
+  std::cout << in0.to_string("in0") << std::endl;
+  std::cout << in1.to_string("in1") << std::endl;
+  std::cout << out.to_string("out") << std::endl;
+}
+
+/**
+ * A contraction of two tensors with unarys that are executed before (first touch) or after (last touch) the contraction on the output
+ * tensor.
+ */
+void example_contraction_first_last_touch()
+{
+  mlc::Tensor in0({5, 4, 3});     // IDs: 0,1,2
+  mlc::Tensor in1({5, 2, 4});     // IDs: 3,4,1
+  mlc::Tensor out({5, 5, 2, 3});  // IDs: 0,3,4,2
+
+  mlc::fill_counting_up(in0, 0, 1);
+  mlc::fill_counting_down(in1, 0, 1);
+  // The out is default initialized with zeros.
+
+  mlc::Error error = mlc::contraction(in0, in1, out, "[0,1,2],[3,4,1]->[0,3,4,2]", mlc::UnaryType::None, mlc::UnaryType::ReLU);
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+
+  std::cout << in0.to_string("in0") << std::endl;
+  std::cout << in1.to_string("in1") << std::endl;
+  std::cout << out.to_string("out") << std::endl;
+}
+
+/**
+ * A simple einsum operation on three input tensors. The result is added to the output.
+ */
+void example_einsum()
+{
+  mlc::Tensor in0({5, 3});  // IDs: 0,1
+  mlc::Tensor in1({2, 5});  // IDs: 2,0
+  mlc::Tensor in2({3, 7});  // IDs: 1,3
+  mlc::Tensor out({2, 7});  // IDs: 2,3
+
+  mlc::fill_counting_up(in0, 0, 1);
+  mlc::fill_number(in1, 1);
+  mlc::fill_counting_down(in2, 0, 1);
+  mlc::fill_number(out, 1'000);
+
+  // Execute the defined einsum tree on the tensors.
+  mlc::Error error = mlc::einsum({in0, in1, in2}, out, "[[0,1],[2,0]->[2,1]],[1,3]->[2,3]");
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    return;
+  }
+
+  std::cout << in0.to_string("in0") << std::endl;
+  std::cout << in1.to_string("in1") << std::endl;
+  std::cout << in2.to_string("in2") << std::endl;
+  std::cout << out.to_string("out") << std::endl;
+}
+
+/**
+ * A einsum expression that is first defined by the shapes of the input and ouput tensors and can be multiple time called on any input and
+ * output tensors that matches the same shape. This can be used to save the costs to setup and optimize the given einsum tree. The result is
+ * added to the output.
+ */
+void example_einsum_operation()
+{
+  mlc::Tensor in0({5, 3});  // IDs: 0,1
+  mlc::Tensor in1({2, 5});  // IDs: 2,0
+  mlc::Tensor in2({3, 7});  // IDs: 1,3
+  mlc::Tensor out({2, 7});  // IDs: 2,3
+
+  mlc::fill_counting_down(in0, 0, 1);
+  mlc::fill_number(in1, 1);
+  mlc::fill_counting_down(in2, 0, 0.5);
+  mlc::fill_number(out, 1'000);
+
+  // Generates a tensor operation with fixed input and ouput tensor shapes.
+  mlc::TensorOperation *op =
+    mlc::einsum_operation({in0.dim_sizes, in1.dim_sizes, in2.dim_sizes}, out.dim_sizes, "[[0,1],[2,0]->[2,1]],[1,3]->[2,3]");
+
+  // Process any error that may occurs during the setup of the operation.
+  mlc::Error error = op->getSetupError();
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    delete op;
+    return;
+  }
+
+  // Execute the operation and check for any error that can happen during execution.
+  error = op->execute({in0, in1, in2}, out);
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    delete op;
+    return;
+  }
+
+  std::cout << in0.to_string("in0") << std::endl;
+  std::cout << in1.to_string("in1") << std::endl;
+  std::cout << in2.to_string("in2") << std::endl;
+  std::cout << out.to_string("out") << std::endl;
+
+  // Create new tensors of the same shape.
+  mlc::Tensor in0_2(in0.dim_sizes);  // IDs: 0,1
+  mlc::Tensor in1_2(in1.dim_sizes);  // IDs: 2,0
+  mlc::Tensor in2_2(in2.dim_sizes);  // IDs: 1,3
+  mlc::Tensor out_2(out.dim_sizes);  // IDs: 2,3
+
+  mlc::fill_random(in0_2);
+  mlc::fill_random(in1_2);
+  mlc::fill_random(in2_2);
+  mlc::fill_random(out_2);
+
+  // Execute the operation again but on different tensors of the same size.
+  error = op->execute({in0_2, in1_2, in2_2}, out_2);
+  if (error.type != mlc::ErrorType::None)
+  {
+    std::cout << error.message << std::endl;
+    delete op;
+    return;
+  }
+
+  delete op;
+}
+
+int main(int argc, const char **argv)
+{
+  example_tensor();
+  example_fill();
+  example_gemm();
+  example_unary();
+  example_contraction();
+  example_contraction_first_last_touch();
+  example_einsum();
+  example_einsum_operation();
+
+  return 0;
+}
\ No newline at end of file
diff --git a/cmake-library/user-guide.md b/cmake-library/user-guide.md
new file mode 100644
index 0000000..9da6443
--- /dev/null
+++ b/cmake-library/user-guide.md
@@ -0,0 +1,222 @@
+# CMake Library
+
+In this user guide, we will cover our CMake library we made from the machine learning compiler project. This library was designed to simplify the usage of our machine learning compiler and to provide an easy to use interface for users.
+
+## Overview
+
+We will guide you through the process of integrating our CMake library into your project, highlight its features, and provide an example project to demonstrate its usage.
+
+- [Library Usage](#library-usage)
+  - [Integration into CMakeLists](#integration-into-cmakelists)
+  - [Installing the Library](#installing-the-library)
+- [Library Features](#library-features)
+  - [Tensor Object](#tensor-object)
+  - [Tensor Expressions](#tensor-expressions)
+    - [GEMM](#gemm)
+    - [Unary Operations](#unary-operations)
+    - [Contraction](#contraction)
+    - [Einsum](#einsum)
+- [Example Project](#example-project)
+
+# Library Usage
+
+### Integration into CMakeLists
+
+To integrate our CMake library into your project you can choose between two methods:
+
+1. Directly fetch the content of this library from github and build it with your cmake:
+
+    ```cmake
+    # Optional: Toggles if included libraries is build as shared or static libraries. Default is ON.
+    set(BUILD_SHARED_LIBS OFF)
+
+    # Optional: Toggles if OpenMP should be used by the library. Default is ON.
+    set(MLC_USE_OPENMP ON)
+
+    Include(FetchContent)
+    FetchContent_Declare(
+        MachineLearningCompiler
+        GIT_REPOSITORY https://github.com/Integer-Ctrl/machine-learning-compilers
+        GIT_TAG        individual-phase
+        EXCLUDE_FROM_ALL
+    )
+    FetchContent_MakeAvailable(MachineLearningCompiler)
+    ```
+
+    If needed, you can specify two CMake options:
+
+    1. `BUILD_SHARED_LIBS`: This option toggles if the included libraries are built as shared or static libraries. The default is `ON`, meaning shared libraries will be built.
+    2. `MLC_USE_OPENMP`: This option toggles if OpenMP should be used by the library. The default is `ON`, meaning OpenMP will be used for parallelization if available.
+
+2. Include it from the the current machine if installed on the system:
+
+    ```cmake
+    find_library(mlc::MachineLearningCompiler)
+    ```
+
+    If you want to install the library on your system, you can do this by following [Installing the Library](#installing-the-library).
+
+### Installing the Library
+
+  1. Clone the repository `git clone https://github.com/Integer-Ctrl/machine-learning-compilers.git`
+  2. Navigate to the directory `cd machine-learning-compilers`
+  3. Create a build directory `mkdir build && cd build`
+  4. Run CMake to configure the build `cmake ..` \
+    Optionally, you can specify the install directory with `cmake .. -DCMAKE_INSTALL_PREFIX=<installation_path>` (see [CMAKE_INSTALL_PREFIX](https://cmake.org/cmake/help/latest/variable/CMAKE_INSTALL_PREFIX.html))
+  5. Install the library `cmake --build . --target install`
+
+  Now you can use the library in your CMake project by using the `find_library` command as shown in [Integration into CMakeLists](#integration-into-cmakelists).
+
+## Library Features
+
+In this section, we will cover the features of our CMake library. The library provides a simple interface to work with tensors and tensor expressions. It supports various tensor operations such as GEMM, unary operations, contraction, and einsum.
+
+### Tensor Object
+
+The library provides a `Tensor` class that represents a multi-dimensional array of data. This class is used as the input type for all tensor operations. Since the tensor compiler only supports unit-stride tensors, meaning elements must be stored contiguously in memory without gaps, strides can not be explicitly defined. Instead, they are automatically computed based on the tensor’s dimensions.
+
+There are two ways to create a tensor. The first is to create a tensor with data and the suitable dimension sizes. The second is to create a tensor only by specifying the dimension sizes, which will allocate the data internally and fill it with zeros.
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+float data[] = {1, 2, 3, 4};
+
+mlc::Tensor tensor({2, 3, 4}); // 3D tensor with 2 layers, 3 rows and 4 columns initialized with zeros
+mlc::Tensor tensorWithData1(data, {2, 2}); // 2D tensor with specific data
+
+std::cout << "Tensor dimensions: " << tensor.dim_sizes << std::endl; // Dimensions of the tensor
+std::cout << "Tensor strides: " << tensor.strides << std::endl; // Strides of the tensor
+std::cout << tensor.to_string("Tensor") << std::endl; // String representation of the tensor
+```
+
+To fill a tensor with data a variety of functions are provided. Below are all available functions to fill a tensor with data:
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor tensor({2, 3, 4}); // 3D tensor with 2 layers, 3 rows and 4 columns initialized with zeros
+size_t size = tensor.size();
+
+mlc::fill_random(tensor); // Fill the tensor with random values
+mlc::fill_number(tensor, 3.2); // Fill the tensor with a single number, in this case 3.2
+mlc::fill_counting_up(tensor, 0.1, 0.1); // Fill the tensor with counting up values starting from 4 and increasing by 0.1
+mlc::fill_counting_down(tensor, 5, 1); // Fill the tensor with counting down values starting from 5 reducing by 1
+mlc::fill_lambda(tensor, [&size](const mlc::Tensor &self, size_t index) { return size; }); // Fill the tensor with a user defined function, in this case the size of the tensor
+```
+
+### Tensor Expressions
+
+Next we will cover the tensor expressions which the library provides. All tensor expressions return an `mlc::Error` object which contains the result of the operation. If the operation was successful, the `type` field of the `Error` object will be set to `mlc::ErrorType::None`. If there was an error, the `type` field will contain the type of error that occurred.
+
+#### GEMM
+
+To perform a general matrix-matrix multiplication (GEMM), three tensors are required: two input tensors and one output tensor. The input tensors must have compatible dimensions for matrix multiplication, and the output tensor must have the correct dimensions to store the result.
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor in0({5, 3});  // IDs: 0,1
+mlc::Tensor in1({2, 5});  // IDs: 2,0
+mlc::Tensor out({2, 3});  // IDs: 2,1
+
+mlc::Error error = mlc::gemm(in0, in1, out);
+```
+
+#### Unary Operations
+
+Our library supports three unary operations: **zero**, **identity** and **ReLU** (Rectified Linear Unit). **zero** receive one input tensor and produce one output tensor, while **ReLU** and **identity** receive one input tensor and and one output tensor which will be filled with the same data as the input tensor but with the ReLU or identity operation applied.
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor in({2, 2});
+mlc::Tensor out({2, 2});
+
+mlc::Error error = mlc::unary_zero(in);
+mlc::Error error = mlc::unary_identity(in, out);
+mlc::Error error = mlc::unary_relu(in, out);
+```
+
+#### Contraction
+
+To get more advanced, lets look at the contraction operation. This operation allows you to perform a contraction of two tensors based on a user defined expression. The expression defines which dimensions of the input tensors are contracted (reduce dimensions) and which dimensions are retained (output dimensions) in the output tensor. 
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor in0({5, 4, 3});     // IDs: 0,1,2
+mlc::Tensor in1({5, 2, 4});     // IDs: 3,4,1
+mlc::Tensor out({5, 5, 2, 3});  // IDs: 0,3,4,
+
+mlc::Error error = mlc::contraction(in0, in1, out, "[0,1,2],[3,4,1]->[0,3,4,2]");
+```
+
+In the example above, the contraction operation takes two input tensors `in0` and `in1`, and produces an output tensor `out`. The expression `"[0,1,2],[3,4,1]->[0,3,4,2]"` defines that the dimensions with IDs `0`, `2`, `3`and `4` are retained in the output tensor, while the dimensions with IDs `1` is contracted. The output tensor will have the dimensions `[5, 5, 2, 3]`.
+
+To further advance the contraction operation, a first touch primitive and a last touch primitive can be specified. The first touch primitive is applied to the output tensor before the contraction operation, while the last touch primitive is applied to the output tensor after the contraction operation. The supported primitives are `mlc::UnaryType::None`, `mlc::UnaryType::Zero`, `mlc::UnaryType::Identity` and `mlc::UnaryType::ReLu`.
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor in0({5, 4, 3});     // IDs: 0,1,2
+mlc::Tensor in1({5, 2, 4});     // IDs: 3,4,1
+mlc::Tensor out({5, 5, 2, 3});  // IDs: 0,3,4,2
+
+mlc::Error error = mlc::contraction(in0, in1, out, "[0,1,2],[3,4,1]->[0,3,4,2]", mlc::UnaryType::None, mlc::UnaryType::ReLU);
+```
+
+In the example above, the first touch primitive is set to `mlc::UnaryType::None`, meaning that no operation is applied to the output tensor before the contraction operation. The last touch primitive is set to `mlc::UnaryType::ReLU`, meaning that the **ReLU** operation is applied to the output tensor after the contraction operation.
+
+#### Einsum
+
+The last operation we will cover is the einsum operation, better known as **Einsum Tree**. This operation allows you to perform a contraction of multiple tensors based on a user defined expression. The expression defines which dimensions of the input tensors are contracted (reduce dimensions) and which dimensions are retained (output dimensions) in the output tensor. The expression is similar to the one used in the contraction operation, but it can handle multiple input tensors and a single output tensor. This allows you to perform multiple contractions in a single operation.
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor in0({5, 3});  // IDs: 0,1
+mlc::Tensor in1({2, 5});  // IDs: 2,0
+mlc::Tensor in2({3, 7});  // IDs: 1,3
+mlc::Tensor out({2, 7});  // IDs: 2,3
+
+mlc::Error error = mlc::einsum({in0, in1, in2}, out, "[[0,1],[2,0]->[2,1]],[1,3]->[2,3]");
+```
+
+The example above shows a einsum tree with three input tensors (leafs), one output tensor (root) and two contraction operations. The first contraction operation is defined by the expression `[[0,1],[2,0]->[2,1]]`, using the first two input tensors `in0`and `in1`. The second contraction operation uses the intermediate output of the first contraction and the third input tensor `in2`, defined by the expression `[2,1]],[1,3]->[2,3]`.
+
+Einsum trees can be increase in complexity very quickly, so jitting the expression every time can create an overhead. To avoid this it is possible to create a einsum tree once and reuse it. Therefore, the library provides the function `mlc::einsum_operation` which receives the shapes of the input tensors and the output tensor, as well as the expression. This function returns an `mlc::TensorOperation` object which can be used to execute the einsum tree multiple times with different input tensors.
+
+```cpp
+#include <MachineLearningCompiler/Tensor.h>
+
+mlc::Tensor in0({5, 3});  // IDs: 0,1
+mlc::Tensor in1({2, 5});  // IDs: 2,0
+mlc::Tensor in2({3, 7});  // IDs: 1,3
+mlc::Tensor out({2, 7});  // IDs: 2,3
+
+mlc::Tensor in0_2(in0.dim_sizes);  // IDs: 0,1
+mlc::Tensor in1_2(in1.dim_sizes);  // IDs: 2,0
+mlc::Tensor in2_2(in2.dim_sizes);  // IDs: 1,3
+mlc::Tensor out_2(out.dim_sizes);  // IDs: 2,3
+
+// Generates a tensor operation with fixed input and ouput tensor shapes.
+mlc::TensorOperation *op = mlc::einsum_operation({in0.dim_sizes, in1.dim_sizes, in2.dim_sizes}, out.dim_sizes, "[[0,1],[2,0]->[2,1]],[1,3]->[2,3]");
+
+// Process any error that may occurs during the setup of the operation.
+mlc::Error error = op->getSetupError();
+
+// Execute the operation.
+error = op->execute({in0, in1, in2}, out);
+
+// Execute the operation again but on different tensors of the same size.
+error = op->execute({in0_2, in1_2, in2_2}, out_2);
+
+delete op; // Don't forget to delete the operation object after you are done with it.
+```
+
+**Important**: Don't forget to delete the `mlc::TensorOperation` object after you are done with it to avoid memory leaks.
+
+## Example Project
+
+To demonstrate the usage of our CMake library, we have created an example project. This project showcases the features which we introduced in the previous section. You can find the example project in the `cmake-library/example-project` directory. There you can have a look at the `CMakeLists.txt` file and the `Example.cpp` file which contains the example code.
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index 2030cbe..64cf7ce 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -14,6 +14,7 @@ namespace mlc
     bool ownsData = false;
     float *data = nullptr;
     std::vector<uint64_t> dim_sizes;
+    std::vector<uint64_t> strides;
 
     // deletes the default constructor
     Tensor() = delete;
@@ -27,7 +28,18 @@ namespace mlc
      * @param data The pointer to the data array.
      * @param dim_sizes The dimension sizes sorted by stride in descending order.
      */
-    Tensor(float *data, const std::vector<uint64_t> &dim_sizes) : data(data), dim_sizes(dim_sizes) {};
+    Tensor(float *data, const std::vector<uint64_t> &dim_sizes) : data(data), dim_sizes(dim_sizes)
+    {
+      strides.resize(dim_sizes.size());
+      if (!dim_sizes.empty())
+      {
+        strides[dim_sizes.size() - 1] = 1;
+        for (size_t i = dim_sizes.size() - 1; i > 0; --i)
+        {
+          strides[i - 1] = strides[i] * dim_sizes[i];
+        }
+      }
+    };
 
     /**
      * @brief Construct a new Tensor with the dimension sizes sorted by stride in descending order.
@@ -41,8 +53,18 @@ namespace mlc
       {
         size *= dim;
       }
-      data = new float[size];
+      data = new float[size]{0};
       ownsData = true;
+
+      strides.resize(dim_sizes.size());
+      if (!dim_sizes.empty())
+      {
+        strides[dim_sizes.size() - 1] = 1;
+        for (size_t i = dim_sizes.size() - 1; i > 0; --i)
+        {
+          strides[i - 1] = strides[i] * dim_sizes[i];
+        }
+      }
     };
 
     /**
@@ -56,6 +78,21 @@ namespace mlc
         data = nullptr;
       }
     }
+
+    /**
+     * @brief Converts the tensor into its string representation.
+     *
+     * @param name Name of the tensor that is printed
+     * @return std::string The string representation of the tensor.
+     */
+    std::string to_string(std::string name = "tensor");
+
+    /**
+     * @brief Returns the number of elements the tensor has.
+     *
+     * @return uint64_t The number of elements in the tensor.
+     */
+    uint64_t size();
   };
 
   class TensorOperation
@@ -106,6 +143,24 @@ namespace mlc
    */
   void fill_number(Tensor &tensor, float number);
 
+  /**
+   * @brief Fills the tensor with counting upwards numbers.
+   *
+   * @param tensor The tensor to fill.
+   * @param start The number to start counting from.
+   * @param step The amount to increase everytime.
+   */
+  void fill_counting_up(Tensor &tensor, float start, float step);
+
+  /**
+   * @brief Fills the tensor with counting downwards numbers.
+   *
+   * @param tensor The tensor to fill.
+   * @param start The number to start counting from.
+   * @param step The amount to decrease everytime.
+   */
+  void fill_counting_down(Tensor &tensor, float start, float step);
+
   /**
    * @brief Fills the tensor based on the given function.
    *
diff --git a/include/MachineLearningCompiler/UnaryType.h b/include/MachineLearningCompiler/UnaryType.h
index 9aefbaa..63f798c 100644
--- a/include/MachineLearningCompiler/UnaryType.h
+++ b/include/MachineLearningCompiler/UnaryType.h
@@ -8,7 +8,7 @@ namespace mlc
   {
     None = 0,
     Zero = 1,
-    ReLu = 2,
+    ReLU = 2,
     Identity = 3,
   };
 }  // namespace mlc
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index e7a3349..39cce2d 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -51,6 +51,42 @@ void mlc::fill_number(Tensor &tensor, float number)
   }
 }
 
+void mlc::fill_counting_up(Tensor &tensor, float start, float step)
+{
+  if (tensor.dim_sizes.size() == 0)
+  {
+    return;
+  }
+
+  int64_t size = internal::getTensorSize(&tensor);
+
+#ifdef MLC_USE_OPENMP
+#pragma omp parallel for
+#endif
+  for (int64_t i = 0; i < size; i++)
+  {
+    tensor.data[i] = start + i * step;
+  }
+}
+
+void mlc::fill_counting_down(Tensor &tensor, float start, float step)
+{
+  if (tensor.dim_sizes.size() == 0)
+  {
+    return;
+  }
+
+  int64_t size = internal::getTensorSize(&tensor);
+
+#ifdef MLC_USE_OPENMP
+#pragma omp parallel for
+#endif
+  for (int64_t i = 0; i < size; i++)
+  {
+    tensor.data[i] = start - i * step;
+  }
+}
+
 void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t)> function)
 {
   if (tensor.dim_sizes.size() == 0)
@@ -68,3 +104,64 @@ void mlc::fill_lambda(Tensor &tensor, std::function<float(const Tensor &, size_t
     tensor.data[i] = function(tensor, i);
   }
 }
+
+void mlc::internal::tensor_dim_to_string(mlc::Tensor *tensor, std::string &str, size_t dim, size_t offset, std::string indent)
+{
+  if (dim == tensor->dim_sizes.size() - 1)
+  {
+    str += "[";
+    for (size_t i = 0; i < tensor->dim_sizes[dim]; ++i)
+    {
+      if (i > 0)
+      {
+        str += ", ";
+      }
+      if (tensor->data == nullptr)
+      {
+        str += "-";
+      }
+      else
+      {
+        str += std::to_string(tensor->data[offset + i]);
+      }
+    }
+    str += "]";
+  }
+  else
+  {
+    str += "[";
+    indent += " ";
+
+    for (size_t i = 0; i < tensor->dim_sizes[dim]; ++i)
+    {
+      if (i > 0)
+      {
+        str += ",\n" + indent;
+      }
+
+      tensor_dim_to_string(tensor, str, dim + 1, offset + i * tensor->strides[dim], indent);
+    }
+    str += "]";
+  }
+}
+
+std::string mlc::Tensor::to_string(std::string name)
+{
+  std::string str;
+  str += name + "(";
+  if (dim_sizes.empty())
+  {
+    str += "[]";
+  }
+  else
+  {
+    internal::tensor_dim_to_string(this, str, 0, 0, "");
+  }
+  str += ")";
+  return str;
+}
+
+uint64_t mlc::Tensor::size()
+{
+  return internal::getTensorSize(this);
+}
diff --git a/src/interface/TensorUtils.h b/src/interface/TensorUtils.h
index 96920d2..18a2029 100644
--- a/src/interface/TensorUtils.h
+++ b/src/interface/TensorUtils.h
@@ -277,12 +277,23 @@ namespace mlc
         return mini_jit::TensorConfig::prim_t::copy;
       case mlc::UnaryType::Zero:
         return mini_jit::TensorConfig::prim_t::zero;
-      case mlc::UnaryType::ReLu:
+      case mlc::UnaryType::ReLU:
         return mini_jit::TensorConfig::prim_t::relu;
       default:
         return mini_jit::TensorConfig::prim_t::none;
       }
     }
+
+    /**
+     * @brief Recursively converts the given tensor into a string format.
+     *
+     * @param tensor The tensor to convert.
+     * @param str The string to write to.
+     * @param dim The current processed dimension.
+     * @param offset The offset from the data to be processed.
+     * @param indent The indentation of the current dimension.
+     */
+    void tensor_dim_to_string(mlc::Tensor *tensor, std::string &str, size_t dim, size_t offset, std::string indent);
   }  // namespace internal
 }  // namespace mlc
 #endif  // MLC_TENSORUTILS_H
\ No newline at end of file
diff --git a/src/test/interface/Tensor.test.cpp b/src/test/interface/Tensor.test.cpp
index f91700b..c70a609 100644
--- a/src/test/interface/Tensor.test.cpp
+++ b/src/test/interface/Tensor.test.cpp
@@ -50,6 +50,50 @@ TEST_CASE("Test interface tensor fill_number", "[tensor][correctness]")
   }
 }
 
+TEST_CASE("Test interface tensor fill_counting_up", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  float *data1 = new float[total_size1];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = std::nanf("1");
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::fill_counting_up(tensor1, 5, 0.5);
+
+  for (size_t i = 0; i < total_size1; i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor1.data[i] == (0.5f * i + 5));
+  }
+}
+
+TEST_CASE("Test interface tensor fill_counting_down", "[tensor][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+
+  size_t total_size1 = shape1[0] * shape1[1];
+  float *data1 = new float[total_size1];
+
+  for (size_t i = 0; i < total_size1; ++i)
+  {
+    data1[i] = std::nanf("1");
+  }
+
+  mlc::Tensor tensor1(data1, shape1);
+  mlc::fill_counting_down(tensor1, 5, 1.0);
+
+  for (int64_t i = 0; i < static_cast<int64_t>(total_size1); i++)
+  {
+    CAPTURE(i);
+    REQUIRE(tensor1.data[i] == (-i + 5));
+  }
+}
+
 TEST_CASE("Test interface tensor fill_lambda", "[tensor][correctness]")
 {
   std::vector<uint64_t> shape1 = {3, 4};
@@ -103,6 +147,16 @@ TEST_CASE("Test interface tensor einsum reference", "[tensor][correctness]")
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
+  REQUIRE(tensor1.strides.size() == 2);
+  REQUIRE(tensor1.strides[0] == 4);
+  REQUIRE(tensor1.strides[1] == 1);
+  REQUIRE(tensor2.strides.size() == 2);
+  REQUIRE(tensor2.strides[0] == 5);
+  REQUIRE(tensor2.strides[1] == 1);
+  REQUIRE(tensor3.strides.size() == 2);
+  REQUIRE(tensor3.strides[0] == 5);
+  REQUIRE(tensor3.strides[1] == 1);
+
   mlc::Error err = mlc::einsum({tensor1, tensor2}, tensor3, "[0,1],[1,2]->[0,2]");
   REQUIRE(err.type == mlc::ErrorType::None);
 
@@ -143,6 +197,16 @@ TEST_CASE("Test interface tensor einsum pointer", "[tensor][correctness]")
   mlc::Tensor tensor3(data2, shape3);
   std::vector<mlc::Tensor *> inputs{&tensor1, &tensor2};
 
+  REQUIRE(tensor1.strides.size() == 2);
+  REQUIRE(tensor1.strides[0] == 4);
+  REQUIRE(tensor1.strides[1] == 1);
+  REQUIRE(tensor2.strides.size() == 2);
+  REQUIRE(tensor2.strides[0] == 5);
+  REQUIRE(tensor2.strides[1] == 1);
+  REQUIRE(tensor3.strides.size() == 2);
+  REQUIRE(tensor3.strides[0] == 5);
+  REQUIRE(tensor3.strides[1] == 1);
+
   CAPTURE(inputs);
   mlc::Error err = mlc::einsum(inputs, tensor3, "[0,1],[1,2]->[0,2]");
   REQUIRE(err.type == mlc::ErrorType::None);
@@ -183,6 +247,16 @@ TEST_CASE("Test interface tensor contraction", "[tensor][correctness]")
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
+  REQUIRE(tensor1.strides.size() == 2);
+  REQUIRE(tensor1.strides[0] == 4);
+  REQUIRE(tensor1.strides[1] == 1);
+  REQUIRE(tensor2.strides.size() == 2);
+  REQUIRE(tensor2.strides[0] == 5);
+  REQUIRE(tensor2.strides[1] == 1);
+  REQUIRE(tensor3.strides.size() == 2);
+  REQUIRE(tensor3.strides[0] == 5);
+  REQUIRE(tensor3.strides[1] == 1);
+
   mlc::Error err = mlc::contraction(tensor1, tensor2, tensor3, "[0,1],[1,2]->[0,2]");
   REQUIRE(err.type == mlc::ErrorType::None);
 
@@ -222,6 +296,16 @@ TEST_CASE("Test interface tensor gemm", "[tensor][correctness]")
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
+  REQUIRE(tensor1.strides.size() == 2);
+  REQUIRE(tensor1.strides[0] == 3);
+  REQUIRE(tensor1.strides[1] == 1);
+  REQUIRE(tensor2.strides.size() == 2);
+  REQUIRE(tensor2.strides[0] == 4);
+  REQUIRE(tensor2.strides[1] == 1);
+  REQUIRE(tensor3.strides.size() == 2);
+  REQUIRE(tensor3.strides[0] == 3);
+  REQUIRE(tensor3.strides[1] == 1);
+
   mlc::Error err = mlc::gemm(tensor1, tensor2, tensor3);
   REQUIRE(err.type == mlc::ErrorType::None);
 
@@ -261,6 +345,17 @@ TEST_CASE("Test interface tensor gemm failure", "[tensor][correctness]")
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
+  REQUIRE(tensor1.strides.size() == 3);
+  REQUIRE(tensor1.strides[0] == 20);
+  REQUIRE(tensor1.strides[1] == 5);
+  REQUIRE(tensor1.strides[2] == 1);
+  REQUIRE(tensor2.strides.size() == 2);
+  REQUIRE(tensor2.strides[0] == 5);
+  REQUIRE(tensor2.strides[1] == 1);
+  REQUIRE(tensor3.strides.size() == 2);
+  REQUIRE(tensor3.strides[0] == 5);
+  REQUIRE(tensor3.strides[1] == 1);
+
   mlc::Error err = mlc::gemm(tensor1, tensor2, tensor3);
   REQUIRE(err.type == mlc::ErrorType::TensorExpected2DTensor);
 
@@ -284,6 +379,11 @@ TEST_CASE("Test interface tensor unary zero", "[tensor][correctness]")
 
   mlc::Tensor tensor1(data1, shape1);
 
+  REQUIRE(tensor1.strides.size() == 3);
+  REQUIRE(tensor1.strides[0] == 20);
+  REQUIRE(tensor1.strides[1] == 5);
+  REQUIRE(tensor1.strides[2] == 1);
+
   mlc::Error err = mlc::unary_zero(tensor1);
   REQUIRE(err.type == mlc::ErrorType::None);
 
@@ -319,6 +419,15 @@ TEST_CASE("Test interface tensor unary relu", "[tensor][correctness]")
   mlc::Tensor tensor1(data1, shape1);
   mlc::Tensor tensor2(data2, shape2);
 
+  REQUIRE(tensor1.strides.size() == 3);
+  REQUIRE(tensor1.strides[0] == 20);
+  REQUIRE(tensor1.strides[1] == 5);
+  REQUIRE(tensor1.strides[2] == 1);
+  REQUIRE(tensor2.strides.size() == 3);
+  REQUIRE(tensor2.strides[0] == 20);
+  REQUIRE(tensor2.strides[1] == 5);
+  REQUIRE(tensor2.strides[2] == 1);
+
   mlc::Error err = mlc::unary_relu(tensor1, tensor2);
   REQUIRE(err.type == mlc::ErrorType::None);
 
@@ -355,6 +464,15 @@ TEST_CASE("Test interface tensor unary identity", "[tensor][correctness]")
   mlc::Tensor tensor1(data1, shape1);
   mlc::Tensor tensor2(data2, shape2);
 
+  REQUIRE(tensor1.strides.size() == 3);
+  REQUIRE(tensor1.strides[0] == 20);
+  REQUIRE(tensor1.strides[1] == 5);
+  REQUIRE(tensor1.strides[2] == 1);
+  REQUIRE(tensor2.strides.size() == 3);
+  REQUIRE(tensor2.strides[0] == 20);
+  REQUIRE(tensor2.strides[1] == 5);
+  REQUIRE(tensor2.strides[2] == 1);
+
   mlc::Error err = mlc::unary_identity(tensor1, tensor2);
   REQUIRE(err.type == mlc::ErrorType::None);
 
@@ -399,6 +517,16 @@ TEST_CASE("Test interface tensor contraction first+last", "[tensor][correctness]
   mlc::Tensor tensor2(data2, shape2);
   mlc::Tensor tensor3(data2, shape3);
 
+  REQUIRE(tensor1.strides.size() == 2);
+  REQUIRE(tensor1.strides[0] == 4);
+  REQUIRE(tensor1.strides[1] == 1);
+  REQUIRE(tensor2.strides.size() == 2);
+  REQUIRE(tensor2.strides[0] == 5);
+  REQUIRE(tensor2.strides[1] == 1);
+  REQUIRE(tensor3.strides.size() == 2);
+  REQUIRE(tensor3.strides[0] == 5);
+  REQUIRE(tensor3.strides[1] == 1);
+
   mlc::Error err = mlc::contraction(tensor1, tensor2, tensor3, "[0,1],[1,2]->[0,2]", mlc::UnaryType::None, mlc::UnaryType::None);
   REQUIRE(err.type == mlc::ErrorType::None);
 

From 0f407c2740da4536f25f612b3308424545095b5a Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Wed, 2 Jul 2025 18:55:23 +0000
Subject: [PATCH 15/17] fix: einsum operation

---
 CMakeLists.txt                              |  1 -
 cmake-library/example-project/Example.cpp   | 45 +++++++++++++++------
 include/MachineLearningCompiler/Tensor.h    |  3 --
 src/interface/Einsum.cpp                    |  8 +++-
 src/interface/Tensor.cpp                    |  2 +-
 src/test/interface/Tensor.test.cpp          | 26 ++++++++++++
 src/test/interface/TensorOperation.test.cpp | 23 -----------
 7 files changed, 65 insertions(+), 43 deletions(-)
 delete mode 100644 src/test/interface/TensorOperation.test.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc5d883..1d7b05d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -286,7 +286,6 @@ set(SRC_INTERFACE_FILES
 set(TEST_INTERFACE_FILES
     TensorUtils.test.cpp
     Tensor.test.cpp
-    TensorOperation.test.cpp
 )
 
 foreach(file ${SRC_MAIN_FILES})
diff --git a/cmake-library/example-project/Example.cpp b/cmake-library/example-project/Example.cpp
index 4cfabd3..81d8d5f 100644
--- a/cmake-library/example-project/Example.cpp
+++ b/cmake-library/example-project/Example.cpp
@@ -89,7 +89,8 @@ void example_fill()
   // filled are given as additional parameter.
   // Here the tensor is filled with 1 2 3, 1 2 3, 1 2 3
   mlc::Tensor tensorLambda({3, 3});
-  mlc::fill_lambda(tensorLambda, [](const mlc::Tensor &self, size_t index) { return index % self.strides[self.strides.size() - 1]; });
+  mlc::fill_lambda(tensorLambda, 
+    [](const mlc::Tensor &self, size_t index) { return index % self.strides[0] + 1; });
   std::cout << tensorLambda.to_string("Lambda 1 2 3") << std::endl;
 
   // We can also fill the tensor using outside defined variable.
@@ -143,7 +144,7 @@ void example_unary()
   mlc::Tensor tensorIdentityIn({3, 3});
   mlc::Tensor tensorIdentityOut({3, 3});
   mlc::fill_random(tensorIdentityIn);
-  mlc::fill_number(tensorIdentityIn, 0);
+  mlc::fill_number(tensorIdentityOut, 0);
   error = mlc::unary_identity(tensorIdentityIn, tensorIdentityOut);  // identity = copy from input to output
   if (error.type != mlc::ErrorType::None)
   {
@@ -157,7 +158,8 @@ void example_unary()
   mlc::Tensor tensorReluIn({3, 3});
   mlc::Tensor tensorReluOut({3, 3});
   // Fills even indices with positive and odd indices with negative numbers
-  mlc::fill_lambda(tensorReluIn, [](const mlc::Tensor &, size_t index) { return index * (2 * (index % 2) - 1); });
+  mlc::fill_lambda(tensorReluIn, 
+    [](const mlc::Tensor &, int64_t index) { return index * (2 * (index % 2) - 1); });
   mlc::fill_number(tensorReluOut, 0);
   error = mlc::unary_relu(tensorReluIn, tensorReluOut);  // ReLU = max(x, 0)
   if (error.type != mlc::ErrorType::None)
@@ -180,7 +182,7 @@ void example_contraction()
 
   mlc::fill_counting_up(in0, 0, 1);
   mlc::fill_counting_down(in1, 0, 1);
-  mlc::fill_number(in0, 1'000'000);
+  mlc::fill_number(out, 1'000'000);
 
   mlc::Error error = mlc::contraction(in0, in1, out, "[0,1,2],[3,4,1]->[0,3,4,2]");
   if (error.type != mlc::ErrorType::None)
@@ -205,7 +207,7 @@ void example_contraction_first_last_touch()
   mlc::Tensor out({5, 5, 2, 3});  // IDs: 0,3,4,2
 
   mlc::fill_counting_up(in0, 0, 1);
-  mlc::fill_counting_down(in1, 0, 1);
+  mlc::fill_counting_down(in1, 20, 1);
   // The out is default initialized with zeros.
 
   mlc::Error error = mlc::contraction(in0, in1, out, "[0,1,2],[3,4,1]->[0,3,4,2]", mlc::UnaryType::None, mlc::UnaryType::ReLU);
@@ -318,14 +320,31 @@ void example_einsum_operation()
 
 int main(int argc, const char **argv)
 {
-  example_tensor();
-  example_fill();
-  example_gemm();
-  example_unary();
-  example_contraction();
-  example_contraction_first_last_touch();
-  example_einsum();
-  example_einsum_operation();
+  size_t sep = 20;
+
+  // std::cout << std::string(sep, '=') << std::endl << "Tensors" << std::endl << std::string(sep, '=') << std::endl;
+  // example_tensor();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Fill Tensors" << std::endl << std::string(sep, '=') << std::endl;
+  // example_fill();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "GEMM Operation" << std::endl << std::string(sep, '=') << std::endl;
+  // example_gemm();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Unary Operation" << std::endl << std::string(sep, '=') << std::endl;
+  // example_unary();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Contraction" << std::endl << std::string(sep, '=') << std::endl;
+  // example_contraction();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Contraction First & Last Touch" << std::endl << std::string(sep, '=') << std::endl;
+  // example_contraction_first_last_touch();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Einsum" << std::endl << std::string(sep, '=') << std::endl;
+  // example_einsum();
+
+  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Einsum Operation" << std::endl << std::string(sep, '=') << std::endl;
+  // example_einsum_operation();
 
   return 0;
 }
\ No newline at end of file
diff --git a/include/MachineLearningCompiler/Tensor.h b/include/MachineLearningCompiler/Tensor.h
index 64cf7ce..ca1c9a4 100644
--- a/include/MachineLearningCompiler/Tensor.h
+++ b/include/MachineLearningCompiler/Tensor.h
@@ -19,9 +19,6 @@ namespace mlc
     // deletes the default constructor
     Tensor() = delete;
 
-    // deletes the copy constructor
-    Tensor(const Tensor &) = delete;
-
     /**
      * @brief Construct a new Tensor with with a pointer to memory and the dimension sizes sorted in by stride in descending order.
      *
diff --git a/src/interface/Einsum.cpp b/src/interface/Einsum.cpp
index 9e8418d..f7e964f 100644
--- a/src/interface/Einsum.cpp
+++ b/src/interface/Einsum.cpp
@@ -1,6 +1,7 @@
 #include "Einsum.h"
 #include "../../include/MachineLearningCompiler/Tensor.h"
 #include "../main/EinsumTree.h"
+#include "utility"
 
 mlc::Error mlc::einsum(const std::vector<std::reference_wrapper<const Tensor>> &inputs, Tensor &output, const std::string &tree)
 {
@@ -69,12 +70,15 @@ mlc::Error mlc::EinsumOperation::execute(const std::vector<const Tensor *> &inpu
 mlc::TensorOperation *mlc::einsum_operation(const std::vector<std::vector<uint64_t>> &inputs, const std::vector<uint64_t> &output,
                                             const std::string &tree)
 {
+  std::vector<Tensor> rawTensor;
   std::vector<std::reference_wrapper<const Tensor>> inputTensors;
+  rawTensor.reserve(inputs.size());
+  inputTensors.reserve(inputs.size());
   for (const auto &shape : inputs)
   {
     // Create a dummy tensor with the given shape
-    Tensor tensor(nullptr, shape);
-    inputTensors.push_back(tensor);
+    rawTensor.emplace_back(nullptr, shape);
+    inputTensors.push_back(rawTensor.back());
   }
 
   Tensor outputTensor(output);
diff --git a/src/interface/Tensor.cpp b/src/interface/Tensor.cpp
index 39cce2d..0de161b 100644
--- a/src/interface/Tensor.cpp
+++ b/src/interface/Tensor.cpp
@@ -148,7 +148,7 @@ void mlc::internal::tensor_dim_to_string(mlc::Tensor *tensor, std::string &str,
 std::string mlc::Tensor::to_string(std::string name)
 {
   std::string str;
-  str += name + "(";
+  str += name + "(\n";
   if (dim_sizes.empty())
   {
     str += "[]";
diff --git a/src/test/interface/Tensor.test.cpp b/src/test/interface/Tensor.test.cpp
index c70a609..5d26805 100644
--- a/src/test/interface/Tensor.test.cpp
+++ b/src/test/interface/Tensor.test.cpp
@@ -533,4 +533,30 @@ TEST_CASE("Test interface tensor contraction first+last", "[tensor][correctness]
   delete[] data1;
   delete[] data2;
   delete[] data3;
+}
+
+TEST_CASE("Test interface tensor einsum operation", "[setup][correctness]")
+{
+  std::vector<uint64_t> shape1 = {3, 4};
+  std::vector<uint64_t> shape2 = {4, 5};
+  std::vector<uint64_t> shape3 = {3, 5};
+
+  mlc::Tensor tensor1(shape1);
+  mlc::Tensor tensor2(shape2);
+  mlc::Tensor tensor3(shape3);
+
+  mlc::TensorOperation *setup = mlc::einsum_operation({shape1, shape2}, shape3, "[0,1],[1,2]->[0,2]");
+
+  mlc::Error error = setup->execute({tensor1, tensor2}, tensor3);
+  INFO(error.message);
+  REQUIRE(error.type == mlc::ErrorType::None);
+
+  error = setup->execute({tensor1, tensor2}, tensor3);
+  INFO(error.message);
+  REQUIRE(error.type == mlc::ErrorType::None);
+
+  error = setup->execute({tensor1, tensor2}, tensor3);
+  INFO(error.message);
+  REQUIRE(error.type == mlc::ErrorType::None);
+  delete setup;
 }
\ No newline at end of file
diff --git a/src/test/interface/TensorOperation.test.cpp b/src/test/interface/TensorOperation.test.cpp
deleted file mode 100644
index 9b257e9..0000000
--- a/src/test/interface/TensorOperation.test.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "../../../include/MachineLearningCompiler/Tensor.h"
-#include <catch2/catch_test_macros.hpp>
-#include <catch2/generators/catch_generators.hpp>
-#include <catch2/generators/catch_generators_range.hpp>
-#include <cmath>
-#include <vector>
-
-TEST_CASE("Test interface tensor einsum setup", "[setup][correctness]")
-{
-  std::vector<uint64_t> shape1 = {3, 4};
-  std::vector<uint64_t> shape2 = {4, 5};
-  std::vector<uint64_t> shape3 = {3, 5};
-
-  mlc::Tensor tensor1(shape1);
-  mlc::Tensor tensor2(shape2);
-  mlc::Tensor tensor3(shape3);
-
-  mlc::TensorOperation *setup = mlc::einsum_operation({shape1, shape2}, shape3, "[0,1],[1,2]->[0,2]");
-  setup->execute({tensor1, tensor2}, tensor3);
-  setup->execute({tensor1, tensor2}, tensor3);
-  setup->execute({tensor1, tensor2}, tensor3);
-  delete setup;
-}
\ No newline at end of file

From 48ca989ac15260a98b8964537447e8c854e2593c Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Wed, 2 Jul 2025 19:01:36 +0000
Subject: [PATCH 16/17] fix: einsum operation

---
 cmake-library/example-project/Example.cpp | 47 +++++++++++++----------
 src/interface/Einsum.h                    |  2 +-
 2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/cmake-library/example-project/Example.cpp b/cmake-library/example-project/Example.cpp
index 81d8d5f..7a801ce 100644
--- a/cmake-library/example-project/Example.cpp
+++ b/cmake-library/example-project/Example.cpp
@@ -301,10 +301,10 @@ void example_einsum_operation()
   mlc::Tensor in2_2(in2.dim_sizes);  // IDs: 1,3
   mlc::Tensor out_2(out.dim_sizes);  // IDs: 2,3
 
-  mlc::fill_random(in0_2);
-  mlc::fill_random(in1_2);
-  mlc::fill_random(in2_2);
-  mlc::fill_random(out_2);
+  mlc::fill_counting_up(in0_2, 10.5f, 33);
+  mlc::fill_number(in1_2, 13);
+  mlc::fill_counting_down(in2_2, 5, 2);
+  mlc::fill_number(out_2, -111);
 
   // Execute the operation again but on different tensors of the same size.
   error = op->execute({in0_2, in1_2, in2_2}, out_2);
@@ -315,36 +315,41 @@ void example_einsum_operation()
     return;
   }
 
+  std::cout << in0_2.to_string("in0_2") << std::endl;
+  std::cout << in1_2.to_string("in1_2") << std::endl;
+  std::cout << in2_2.to_string("in2_2") << std::endl;
+  std::cout << out_2.to_string("out_2") << std::endl;
+
   delete op;
 }
 
 int main(int argc, const char **argv)
 {
-  size_t sep = 20;
+  size_t sep = 50;
 
-  // std::cout << std::string(sep, '=') << std::endl << "Tensors" << std::endl << std::string(sep, '=') << std::endl;
-  // example_tensor();
+  std::cout << std::string(sep, '=') << std::endl << "Tensors" << std::endl << std::string(sep, '=') << std::endl;
+  example_tensor();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Fill Tensors" << std::endl << std::string(sep, '=') << std::endl;
-  // example_fill();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "Fill Tensors" << std::endl << std::string(sep, '=') << std::endl;
+  example_fill();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "GEMM Operation" << std::endl << std::string(sep, '=') << std::endl;
-  // example_gemm();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "GEMM Operation" << std::endl << std::string(sep, '=') << std::endl;
+  example_gemm();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Unary Operation" << std::endl << std::string(sep, '=') << std::endl;
-  // example_unary();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "Unary Operation" << std::endl << std::string(sep, '=') << std::endl;
+  example_unary();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Contraction" << std::endl << std::string(sep, '=') << std::endl;
-  // example_contraction();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "Contraction" << std::endl << std::string(sep, '=') << std::endl;
+  example_contraction();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Contraction First & Last Touch" << std::endl << std::string(sep, '=') << std::endl;
-  // example_contraction_first_last_touch();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "Contraction First & Last Touch" << std::endl << std::string(sep, '=') << std::endl;
+  example_contraction_first_last_touch();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Einsum" << std::endl << std::string(sep, '=') << std::endl;
-  // example_einsum();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "Einsum" << std::endl << std::string(sep, '=') << std::endl;
+  example_einsum();
 
-  // std::cout << std::endl << std::string(sep, '=') << std::endl << "Einsum Operation" << std::endl << std::string(sep, '=') << std::endl;
-  // example_einsum_operation();
+  std::cout << std::endl << std::string(sep, '=') << std::endl << "Einsum Operation" << std::endl << std::string(sep, '=') << std::endl;
+  example_einsum_operation();
 
   return 0;
 }
\ No newline at end of file
diff --git a/src/interface/Einsum.h b/src/interface/Einsum.h
index 83368d2..a10a7d2 100644
--- a/src/interface/Einsum.h
+++ b/src/interface/Einsum.h
@@ -86,7 +86,7 @@ namespace mlc
     if (errorExecute != mini_jit::EinsumTree::ErrorExecute::None)
     {
       mlc::ErrorType type = internal::convertErrorExecute(errorExecute);
-      return {type, ""};  // TODO add error message
+      return {type, "Failed to execute the einsum operation."};
     }
 
     return {mlc::ErrorType::None, "Success"};

From f1f4eb5f0883bde610b8a173dcd21552e4fdc8cf Mon Sep 17 00:00:00 2001
From: RivinHD <58261670+RivinHD@users.noreply.github.com>
Date: Wed, 2 Jul 2025 19:11:22 +0000
Subject: [PATCH 17/17] added todo

---
 cmake-library/example-project/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake-library/example-project/CMakeLists.txt b/cmake-library/example-project/CMakeLists.txt
index be563d4..0b850c1 100644
--- a/cmake-library/example-project/CMakeLists.txt
+++ b/cmake-library/example-project/CMakeLists.txt
@@ -42,7 +42,7 @@ Include(FetchContent)
 FetchContent_Declare(
     MachineLearningCompiler
     GIT_REPOSITORY https://github.com/Integer-Ctrl/machine-learning-compilers
-    GIT_TAG        individual-phase
+    GIT_TAG        individual-phase #TODO change
     EXCLUDE_FROM_ALL
 )
 FetchContent_MakeAvailable(MachineLearningCompiler)