From ded08aa0076402151ccc1ae7ced674b7005ec092 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 30 Mar 2025 15:29:57 +0200
Subject: [PATCH 01/31] Add b_instrset_detect to namespace

---
 include/instrset_detect.cc | 7 +++++--
 include/instrset_detect.hh | 6 +++---
 2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/include/instrset_detect.cc b/include/instrset_detect.cc
index a023773..49618c3 100644
--- a/include/instrset_detect.cc
+++ b/include/instrset_detect.cc
@@ -1,6 +1,9 @@
 #include "instrset_detect.hh"
 
+namespace bparser {
+
+	int b_instrset_detect(void) {
+		return instrset_detect();
+	}
 
-int b_instrset_detect(void) {
-	return instrset_detect();
 }
\ No newline at end of file
diff --git a/include/instrset_detect.hh b/include/instrset_detect.hh
index 10430a6..b215c1e 100644
--- a/include/instrset_detect.hh
+++ b/include/instrset_detect.hh
@@ -15,7 +15,7 @@
 
 #include "config.hh"
 #include "instrset.h"
-
-EXPORT int b_instrset_detect(void);
-
+namespace bparser {
+	EXPORT int b_instrset_detect(void);
+}
 #endif
\ No newline at end of file

From d17d873ef977f4d5104f6f970f38fcec84d74e40 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 30 Mar 2025 20:21:19 +0200
Subject: [PATCH 02/31] arena_resource

---
 CMakeLists.txt             |   5 +-
 include/arena_alloc.hh     |  45 ++++++---
 include/arena_resource.hh  | 181 +++++++++++++++++++++++++++++++++++++
 include/config.hh          |   8 +-
 include/instrset_detect.hh |  10 +-
 5 files changed, 227 insertions(+), 22 deletions(-)
 create mode 100644 include/arena_resource.hh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 627b7e3..5eb10f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -160,7 +160,7 @@ if (NOT Boost_FOUND)
   if (NOT EXTERNAL_PROJECT_DIR)
    unset(BOOST_ROOT)
   endif()
-  find_package( Boost 1.58.0 REQUIRED)
+  find_package( Boost 1.58.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED)
 endif()
 
 message(STATUS "-------------------------------------------------------")
@@ -198,6 +198,7 @@ add_library(bparser SHARED
     ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_AVX512.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_double.cc
 )
+set_target_properties(bparser PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DBPARSER_DLL")
 
 
 
@@ -259,6 +260,6 @@ endmacro()
 define_test(test_parser bparser)
 define_test(test_array)
 define_test(test_grammar bparser)
-define_test(test_processor)
+define_test(test_processor bparser)
 define_test(test_speed bparser)  
 define_test(test_simd)
diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh
index 31196ab..bb2efc4 100644
--- a/include/arena_alloc.hh
+++ b/include/arena_alloc.hh
@@ -12,6 +12,7 @@
 #include <utility>
 #include <malloc.h>
 #include "aligned_alloc.hh"
+#include "arena_resource.hh"
 
 namespace bparser {
 
@@ -20,56 +21,72 @@ inline size_t align_size(size_t al, size_t size) {
 	return (size + al -1) / al * al;
 }
 
-struct ArenaAlloc {
+struct ArenaAlloc : public PatchArena {
+	
+#define SIZE (align_size(alignment,size))
+
+
 	ArenaAlloc(std::size_t alignment, std::size_t size)
-	: alignment_(alignment),
-	  size_(0)
+	//: alignment_(alignment),
+	//  size_(0)
+	: PatchArena(align_alloc(alignment, SIZE), SIZE, alignment)
 	{
-		size_ = align_size(alignment_, size);
+		/*size_ = align_size(alignment_, size);
 		base_ = (char*)align_alloc(alignment_, size_);
 		BP_ASSERT(base_ != nullptr);
 		ptr_ = base_;
 		//std::cout << "arena begin: " << (void *)base_ << " end: " << end() << std::endl;
+		*/
+		size_ = buffer_size_;
 	}
+#undef SIZE
 	
 	~ArenaAlloc() {
         destroy();
     }
 
 	void destroy() {
-		align_free(base_);
+		//align_free(base_);
+		align_free(PatchArena::buffer_);
 	}
 
-	void *end() {
+	/*void* end() {
 		return base_ + size_;
-	}
+	}*/
 
-	void * allocate(std::size_t size) {
+	/*void* allocate(std::size_t size) { //defined in PatchArena, std::pmr::memory_resource
+		
 		size = align_size(alignment_, size);
 		void * ptr = ptr_;
 		ptr_ += size;
 		BP_ASSERT(ptr_ <= end());
 		//std::cout << "allocated: " << ptr << " end: " << (void *)ptr_ << " aend: " << end() << "\n";
 		return ptr;
-	}
+		
+	}*/
 
 	template <class T, typename... Args>
-	T * create(Args&&... args) {
+	T* create(Args&&... args) {
+		
 		void * ptr = allocate(sizeof(T));
 		return new (ptr) T(std::forward<Args>(args)...);
+		
 	}
 
 	template <class T>
-	T * create_array(uint n_items) {
+	T* create_array(uint n_items) {
+		/*
 		void * ptr = allocate(sizeof(T) * n_items);
 		return new (ptr) T[n_items];
+		*/
+		return PatchArena::allocate_simd<T>(n_items);
 	}
 
 
-	std::size_t alignment_;
+	//std::size_t alignment_;
 	std::size_t size_;
-	char * base_;
-	char * ptr_;
+	//char * base_;
+	//char * ptr_;
 };
 
 } // namespace bparser
diff --git a/include/arena_resource.hh b/include/arena_resource.hh
new file mode 100644
index 0000000..5fd054a
--- /dev/null
+++ b/include/arena_resource.hh
@@ -0,0 +1,181 @@
+/*!
+ *
+﻿ * Copyright (C) 2015 Technical University of Liberec.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 3 as published by the
+ * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *
+ * @file    arena_resource.hh
+ */
+
+#ifndef ARENA_RESOURCE_HH_
+#define ARENA_RESOURCE_HH_
+
+#include <memory_resource>
+#include <vector>
+#include <iostream>
+#include <new>
+#include <stdexcept>   // !! Use Flow exception mechanism
+
+//#include "system/asserts.hh"
+#include "assert.hh"
+
+
+// Final proposal of Arena
+// TODO shared_ptr out of class, pass pointer to data, describe how to use
+template <class Resource>
+class PatchArenaResource : public std::pmr::memory_resource {
+protected:
+    /// Returns different upstream resource in debug / release mode
+	static inline std::pmr::memory_resource* upstream_resource() {
+#ifdef DEBUG
+        return std::pmr::null_memory_resource();
+#else
+        return std::pmr::get_default_resource();
+#endif
+    }
+
+public:
+    //DECLARE_EXCEPTION( ExcArenaAllocation,
+    //        << "Allocation of ArenaResource failed. Please check if correct type of upstream is used.");
+#define EXC_ARENA_ALLOCATION "Allocation of ArenaResource failed. Please check if correct type of upstream is used."
+
+    /// Same as previous but doesn't construct buffer implicitly.
+	PatchArenaResource(void *buffer, size_t buffer_size, size_t simd_alignment, std::pmr::memory_resource* upstream = PatchArenaResource<Resource>::upstream_resource())
+    : upstream_( upstream ),
+      buffer_(buffer),
+      buffer_size_(buffer_size),
+      resource_(buffer_, buffer_size, upstream_),
+      simd_alignment_(simd_alignment),
+      full_data_(false)
+    {
+        //ASSERT_PERMANENT_EQ( (buffer_size%simd_alignment), 0 );
+        BP_ASSERT( (buffer_size % simd_alignment) == 0 );
+    }
+
+
+    ~PatchArenaResource() = default; // virtual, call destructor buffer_ = default_resource, (resource_)
+
+    /// Compute and print free space and used space of arena buffer. Development method
+    inline void print_space() {
+        void *p = this->raw_allocate(1, simd_alignment_);
+        size_t used_size = (char *)p - (char *)buffer_;
+        size_t free_space = buffer_size_ - used_size;
+        std::cout << "Allocated space of arena is " << used_size << " B, free space is " << free_space << " B." << std::endl;
+    }
+
+
+    /// Getter for resource
+    Resource &resource() {
+    	return resource_;
+    }
+
+    /// Allocate and return data pointer of n_item array of type T (alignment to length 8 bytes)
+    template <class T>
+    T* allocate_8(size_t n_items) {
+        size_t bytes = sizeof(T) * n_items;
+        return (T*)this->raw_allocate(bytes, 8);
+    }
+
+    /// Allocate and return data pointer of n_item array of type T (alignment to length given by simd_alignment constructor argument)
+    template <class T>
+    T* allocate_simd(size_t n_items) {
+        size_t bytes = sizeof(T) * n_items;
+        return (T*)this->raw_allocate(bytes, simd_alignment_);
+    }
+
+    // Reset allocated data
+    void reset() {
+        resource_.release();
+        full_data_ = false;
+#ifdef DEBUG
+    	char *c_buffer = (char *)buffer_;
+    	for (size_t i=0; i<buffer_size_; ++i)
+    	    c_buffer[i] = 0;
+#endif
+    }
+
+protected:
+    void* raw_allocate(size_t bytes, size_t alignment) {
+        //ASSERT(!full_data_).error("Allocation of new data is not possible because child arena was created.");
+        BP_ASSERT( !full_data_ );
+        //ASSERT_EQ(buffer_size_%alignment, 0);
+        BP_ASSERT( (buffer_size_ % alignment) == 0 );
+
+    	try {
+            void* p = resource_.allocate(bytes, alignment);
+            return p;
+    	} catch ( std::bad_alloc& ) {
+            //THROW( ExcArenaAllocation() );
+            using bparser::Exception;
+            Throw() << EXC_ARENA_ALLOCATION;
+            
+    	}
+        return nullptr;
+    }
+
+    /// Override do_allocate to handle allocation logic
+    void* do_allocate(size_t bytes, size_t alignment) override {
+        return raw_allocate(bytes, alignment);
+    }
+
+    /// Override do_deallocate (no-op for monotonic buffer)
+    void do_deallocate(void* p, size_t bytes, size_t alignment) override {
+        upstream_->deallocate(p, bytes, alignment);
+    }
+
+    /// Override do_is_equal for memory resource comparison
+    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
+        return this == &other;
+    }
+
+    std::pmr::memory_resource* upstream_;   ///< Pointer to upstream
+    void* buffer_;                          ///< Pointer to buffer
+    size_t buffer_size_;                    ///< Size of buffer
+    Resource resource_;                     ///< Resource of arena
+    size_t simd_alignment_;                 ///< Size of SIMD alignment
+    bool full_data_;                        ///< Flag signs full data (child arena is created)
+};
+
+
+template <class Resource>
+class AssemblyArenaResource : public PatchArenaResource<Resource> {
+public:
+    /// Constructor. Creates assembly arena
+	AssemblyArenaResource(size_t buffer_size, size_t simd_alignment, std::pmr::memory_resource* upstream = PatchArenaResource<Resource>::upstream_resource())
+    : PatchArenaResource<Resource>( std::pmr::get_default_resource()->allocate(buffer_size, simd_alignment), buffer_size, simd_alignment, upstream ) {}
+
+	virtual ~AssemblyArenaResource() {
+	    this->do_deallocate(this->buffer_, this->buffer_size_, this->simd_alignment_);
+	}
+
+    /**
+     * Create and return child arena.
+     *
+     * Child arena is created in free space of actual arena.
+     * Actual arena is marked as full (flag full_data_) and cannot allocate new data.
+     */
+	PatchArenaResource<Resource> *get_child_arena() {
+        void *p = this->raw_allocate(1, this->simd_alignment_);
+        size_t used_size = (char *)p - (char *)this->buffer_;
+        size_t free_space = this->buffer_size_ - used_size;
+        this->full_data_ = true;
+        return new PatchArenaResource<Resource>(p, free_space, this->simd_alignment_);
+    }
+
+
+};
+
+
+
+using AssemblyArena = AssemblyArenaResource<std::pmr::monotonic_buffer_resource>;
+using PatchArena = PatchArenaResource<std::pmr::monotonic_buffer_resource>;
+
+
+#endif /* ARENA_RESOURCE_HH_ */
diff --git a/include/config.hh b/include/config.hh
index d1db252..91cf3fe 100644
--- a/include/config.hh
+++ b/include/config.hh
@@ -33,9 +33,13 @@ typedef unsigned int uint;
 #endif
 
 #if defined(_WIN32)
-# define EXPORT __declspec(dllexport)
+# if defined(BPARSER_DLL)
+#  define EXPORT __declspec(dllexport)
+# else
+#  define EXPORT __declspec(dllimport)
+# endif
 #else
-#define EXPORT
+# define EXPORT
 #endif
 
 #if defined(_WIN32)
diff --git a/include/instrset_detect.hh b/include/instrset_detect.hh
index b215c1e..5cd0b54 100644
--- a/include/instrset_detect.hh
+++ b/include/instrset_detect.hh
@@ -10,12 +10,14 @@
  * Wraps the third party library function for DLL export reasons.
  */
 
-#ifndef INCLUDE_INSTRSET_DETECT_HH
-#define INCLUDE_INSTRSET_DETECT_HH
+#ifndef INCLUDE_INSTRSET_DETECT_HH_
+#define INCLUDE_INSTRSET_DETECT_HH_
 
 #include "config.hh"
 #include "instrset.h"
-namespace bparser {
+namespace bparser{
+
 	EXPORT int b_instrset_detect(void);
+
 }
-#endif
\ No newline at end of file
+#endif //!INCLUDE_INSTRSET_DETECT_HH_
\ No newline at end of file

From 21d8a3964f6af23af5bd6bcf162ac147655475b5 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 30 Mar 2025 20:30:24 +0200
Subject: [PATCH 03/31] note

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5eb10f7..00a9a14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -260,6 +260,6 @@ endmacro()
 define_test(test_parser bparser)
 define_test(test_array)
 define_test(test_grammar bparser)
-define_test(test_processor bparser)
+define_test(test_processor bparser) #is it broken? -LV
 define_test(test_speed bparser)  
 define_test(test_simd)

From 975d254f289934c5cac97a086294bdefb9c27879 Mon Sep 17 00:00:00 2001
From: Jan Brezina <jan.brezina@tul.cz>
Date: Mon, 31 Mar 2025 17:20:07 +0200
Subject: [PATCH 04/31] Install Eigen in github action

---
 .github/workflows/ccpp.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index fcb5beb..b8ade54 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -265,6 +265,10 @@ jobs:
           # Bparser dependency
           sudo apt-get install -y libboost-all-dev
 
+          # install eigen
+          wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\
+          tar -xzf eigen-3.4.0.tar.gz &&\
+          mv eigen-3.4.0 /usr/local
           
           
 

From e9ee1f4c44f477baaece70563e19178d9ce10161 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 6 Apr 2025 15:30:09 +0200
Subject: [PATCH 05/31] ArenaAlloc is now a PatchArena wrapper

---
 include/arena_alloc.hh      | 43 +++++++++++++++++++++++++------------
 include/create_processor.hh |  2 +-
 include/grammar.impl.hh     |  3 ++-
 include/parser.hh           |  2 +-
 include/processor.hh        | 14 +++++++++---
 5 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh
index bb2efc4..5084f16 100644
--- a/include/arena_alloc.hh
+++ b/include/arena_alloc.hh
@@ -21,49 +21,57 @@ inline size_t align_size(size_t al, size_t size) {
 	return (size + al -1) / al * al;
 }
 
-struct ArenaAlloc : public PatchArena {
+struct ArenaAlloc {
 	
-#define SIZE (align_size(alignment,size))
-
 
+	//Creates a wrapper of PatchArena for backwards compatibility with BParser
+	ArenaAlloc(PatchArena& existing_arena) : arena(&existing_arena),buffer(nullptr) {
+		;
+	}
+	//Creates a wrapper with a new PatchArena with the specified memory alignment and size
+	//However AssemblyArena might be the correct class to create 
 	ArenaAlloc(std::size_t alignment, std::size_t size)
 	//: alignment_(alignment),
 	//  size_(0)
-	: PatchArena(align_alloc(alignment, SIZE), SIZE, alignment)
+	: size_(align_size(alignment, size)) //We cannot access this->arena->buffer_size_. However it *should* not change and is currently only used by one assert. Maybe create getter? -LV
 	{
+		buffer = align_alloc(alignment, size_);
+		arena = new PatchArena(buffer, size_, alignment);
 		/*size_ = align_size(alignment_, size);
 		base_ = (char*)align_alloc(alignment_, size_);
 		BP_ASSERT(base_ != nullptr);
 		ptr_ = base_;
 		//std::cout << "arena begin: " << (void *)base_ << " end: " << end() << std::endl;
 		*/
-		size_ = buffer_size_;
 	}
-#undef SIZE
 	
 	~ArenaAlloc() {
         destroy();
     }
 
-	void destroy() {
+	inline void destroy() {
 		//align_free(base_);
-		align_free(PatchArena::buffer_);
+		if (buffer != nullptr) {
+			align_free(buffer);
+		}
 	}
 
 	/*void* end() {
 		return base_ + size_;
 	}*/
 
-	/*void* allocate(std::size_t size) { //defined in PatchArena, std::pmr::memory_resource
-		
+	inline void* allocate(std::size_t size) {
+		/*
 		size = align_size(alignment_, size);
 		void * ptr = ptr_;
 		ptr_ += size;
 		BP_ASSERT(ptr_ <= end());
 		//std::cout << "allocated: " << ptr << " end: " << (void *)ptr_ << " aend: " << end() << "\n";
 		return ptr;
+		*/
+		return arena->allocate(size);
 		
-	}*/
+	}
 
 	template <class T, typename... Args>
 	T* create(Args&&... args) {
@@ -79,14 +87,21 @@ struct ArenaAlloc : public PatchArena {
 		void * ptr = allocate(sizeof(T) * n_items);
 		return new (ptr) T[n_items];
 		*/
-		return PatchArena::allocate_simd<T>(n_items);
+		return arena->allocate_simd<T>(n_items);
 	}
 
-
+	inline std::size_t get_size() {
+		return size_; //arena->buffer_size would be more appropriate
+	}
+	
 	//std::size_t alignment_;
-	std::size_t size_;
+	//std::size_t size_;
 	//char * base_;
 	//char * ptr_;
+protected:
+	PatchArena* arena;
+	void* buffer;
+	std::size_t size_;
 };
 
 } // namespace bparser
diff --git a/include/create_processor.hh b/include/create_processor.hh
index a4f10e4..fecb051 100644
--- a/include/create_processor.hh
+++ b/include/create_processor.hh
@@ -25,7 +25,7 @@ namespace bparser{
         }
     }
 
-    ProcessorBase * ProcessorBase::create_processor(ExpressionDAG &se, uint vector_size, uint simd_size, ArenaAllocPtr arena) {
+    ProcessorBase * ProcessorBase::create_processor(ExpressionDAG &se, uint vector_size, uint simd_size, PatchArenaPtr arena) {
         if (simd_size == 0) {
             simd_size = get_simd_size();
         }
diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh
index 037c8ff..a356ccd 100644
--- a/include/grammar.impl.hh
+++ b/include/grammar.impl.hh
@@ -17,7 +17,8 @@
 #include <string>
 
 #include <boost/math/constants/constants.hpp>
-#include <boost/spirit/include/phoenix.hpp>
+//#include <boost/spirit/include/phoenix.hpp>
+#include <boost/phoenix.hpp>
 
 
 //#define BOOST_SPIRIT_NO_PREDEFINED_TERMINALS
diff --git a/include/parser.hh b/include/parser.hh
index e233927..ca8bde1 100644
--- a/include/parser.hh
+++ b/include/parser.hh
@@ -169,7 +169,7 @@ public:
     ///
     /// All variable names have to be set before this call.
     /// TODO: set result variable
-    void compile(std::shared_ptr<ArenaAlloc> arena = nullptr) {
+    void compile(std::shared_ptr<PatchArena> arena = nullptr) {
     	destroy_processor();
 
         ParserResult res_array = boost::apply_visitor(ast::make_array(symbols_), ast);
diff --git a/include/processor.hh b/include/processor.hh
index 6f9452e..b16e018 100644
--- a/include/processor.hh
+++ b/include/processor.hh
@@ -127,6 +127,8 @@ using namespace details;
 
 
 typedef std::shared_ptr<ArenaAlloc> ArenaAllocPtr;
+typedef std::shared_ptr<PatchArena> PatchArenaPtr;
+
 
 
 #define CODE(OP_NAME) \
@@ -158,7 +160,7 @@ struct ProcessorBase {
 		return arena_;
 	}
 	
-	inline static ProcessorBase *create_processor(ExpressionDAG &se, uint vec_n_blocks, uint simd_size = 0, ArenaAllocPtr arena = nullptr);
+	inline static ProcessorBase *create_processor(ExpressionDAG &se, uint vec_n_blocks, uint simd_size = 0, PatchArenaPtr arena = nullptr);
 
 	ArenaAllocPtr arena_;
 };
@@ -477,7 +479,13 @@ struct Processor : public ProcessorBase {
 	Operation * program_;
 	std::vector< std::shared_ptr<ValueCopyNode> > val_copy_nodes_;
 };
-
+template <class VCLVec>
+ProcessorBase* create_processor_(ExpressionDAG& se, uint vector_size, uint simd_size, PatchArenaPtr arena) {
+	if (arena == nullptr) {
+		return create_processor_<VCLVec>(se, vector_size, simd_size, (ArenaAllocPtr)std::shared_ptr<ArenaAlloc>(nullptr)); //will create new ArenaAlloc in the other method
+	}
+	return create_processor_<VCLVec>(se, vector_size, simd_size, std::make_shared<ArenaAlloc>(*arena));
+}
 
 template <class VCLVec> 
 ProcessorBase * create_processor_(ExpressionDAG &se, uint vector_size,  uint simd_size, ArenaAllocPtr arena)
@@ -503,7 +511,7 @@ ProcessorBase * create_processor_(ExpressionDAG &se, uint vector_size,  uint sim
     if (arena == nullptr)
         arena = std::make_shared<ArenaAlloc>(simd_bytes, est);
     else
-        BP_ASSERT(arena->size_ >= est);
+        BP_ASSERT(arena->get_size() >= est);
     return arena->create<Processor<Vec<VCLVec>>>(arena, se, vec_n_blocks);
 }
 

From 82f21d694d9ae4833bfe8b085b86c18163be9613 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 6 Apr 2025 21:41:44 +0200
Subject: [PATCH 06/31] Eigen

---
 CMakeLists.txt            |  18 ++++-
 include/arena_alloc.hh    |   2 +-
 include/array.hh          | 142 +++++++++++++++++++++++++++++++++++++-
 include/scalar_wrapper.hh |  65 +++++++++++++++++
 4 files changed, 223 insertions(+), 4 deletions(-)
 create mode 100644 include/scalar_wrapper.hh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 00a9a14..aa6181a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -160,7 +160,7 @@ if (NOT Boost_FOUND)
   if (NOT EXTERNAL_PROJECT_DIR)
    unset(BOOST_ROOT)
   endif()
-  find_package( Boost 1.58.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED)
+  find_package( Boost 1.70.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED)
 endif()
 
 message(STATUS "-------------------------------------------------------")
@@ -169,11 +169,24 @@ message(STATUS "BOOST_ROOT = ${BOOST_ROOT}")
 message(STATUS "Boost_LIBRARIES = ${Boost_LIBRARIES}")
 message(STATUS "Boost_LIBRARY_DIRS = ${Boost_LIBRARY_DIRS}")
 message(STATUS "Boost_INCLUDE_DIR = ${Boost_INCLUDE_DIR}")
+message(STATUS "=======================================================\n")
+
+#Eigen
+message(STATUS "=======================================================")
+message(STATUS "====== EIGEN ==========================================")
+message(STATUS "=======================================================")
+
+find_package(Eigen3 CONFIG REQUIRED PATHS "/usr/local") #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
+
+message(STATUS "-------------------------------------------------------")
+message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}")
+message(STATUS "Eigen3_DIR = ${Eigen3_DIR}")
+message(STATUS "EIGEN3_INCLUDE_DIR = ${EIGEN3_INCLUDE_DIR}")
 message(STATUS "=======================================================\n\n")
 
 message(STATUS "VCL2_INCLUDE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2")
 
-set(BPARSER_INCLUDES   ${CMAKE_CURRENT_SOURCE_DIR}/include     ${Boost_INCLUDE_DIR}     ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2)
+set(BPARSER_INCLUDES   ${CMAKE_CURRENT_SOURCE_DIR}/include     ${Boost_INCLUDE_DIR}     ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2      ${EIGEN3_INCLUDE_DIR})
 if(NOT PROJECT_IS_TOP_LEVEL)
     set(BPARSER_INCLUDES ${BPARSER_INCLUDES} PARENT_SCOPE)
 endif()
@@ -198,6 +211,7 @@ add_library(bparser SHARED
     ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_AVX512.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_double.cc
 )
+target_link_libraries(bparser Eigen3::Eigen) #Interface library, includes the header files
 set_target_properties(bparser PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DBPARSER_DLL")
 
 
diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh
index 5084f16..0ec025e 100644
--- a/include/arena_alloc.hh
+++ b/include/arena_alloc.hh
@@ -90,7 +90,7 @@ struct ArenaAlloc {
 		return arena->allocate_simd<T>(n_items);
 	}
 
-	inline std::size_t get_size() {
+	inline std::size_t get_size() const {
 		return size_; //arena->buffer_size would be more appropriate
 	}
 	
diff --git a/include/array.hh b/include/array.hh
index 490118f..6ac2598 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -16,6 +16,7 @@
 #include <boost/math/constants/constants.hpp>
 #include "config.hh"
 #include "scalar_node.hh"
+#include "scalar_wrapper.hh"
 //#include "test_tools.hh"
 
 namespace bparser {
@@ -859,6 +860,133 @@ public:
 		return result;
 	}
 
+	/**
+	 * Numpy.matmul:
+	 *
+	 * a 		has shape (..., i,j, k,l)
+	 * b 		has shape (..., i,j, l,m)
+	 * result 	has shape (..., i,j, k,m)
+	 */
+	static Array mat_mult_old(const Array& a, const Array& b) {
+		//std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n";
+
+		if (a.shape().size() == 0)
+			Throw() << "Matmult can not multiply by scalar a." << "\n";
+		if (b.shape().size() == 0)
+			Throw() << "Matmult can not multiply by scalar b." << "\n";
+
+		auto a_broadcast = MultiIdxRange(a.shape()).full();
+
+		if (a.shape().size() == 1) {
+			a_broadcast.insert_axis(0, 0, 1);
+		}
+		auto b_broadcast = MultiIdxRange(b.shape()).full();
+		if (b.shape().size() == 1) {
+			b_broadcast.insert_axis(1, 1, 1);
+		}
+		Shape a_shape = a_broadcast.source_shape_;
+		//uint a_only_dim = *(a_shape.end() - 2);
+		uint a_common_dim = *(a_shape.end() - 1);
+		a_shape.insert(a_shape.end(), 1);
+		// a_shape : (...,i,j,k,l,1)
+
+		Shape b_shape = b_broadcast.source_shape_;
+		//uint b_only_dim = *(b_shape.end() - 1);
+		uint b_common_dim = *(b_shape.end() - 2);
+		b_shape.insert(b_shape.end() - 2, 1);
+		// b_shape : (...,i,j,1,l,m)
+		if (a_common_dim != b_common_dim)
+			Throw() << "Matmult summing dimension mismatch: " << a_common_dim << " != " << b_common_dim << "\n";
+		Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape);
+		// common_shape : (...,i,j,k,l,m)
+		Shape result_shape(common_shape);
+		*(result_shape.end() - 2) = 1;
+		// result_shape : (...,i,j,k,1,m)
+
+		MultiIdxRange a_range = MultiIdxRange(a_shape).full().broadcast(common_shape);
+		a_range.target_transpose(-2, -1);
+		MultiIdxRange b_range = MultiIdxRange(b_shape).full().broadcast(common_shape);
+		b_range.target_transpose(-2, -1);
+		MultiIdxRange result_range = MultiIdxRange(result_shape).full();
+		result_range.target_transpose(-2, -1);
+		// transpose a_range and b_range in  order to have common_dim the last one:
+		// a_shape : (....i,j, k,l,1)
+		// b_shape : (...,i,j,1,m,l)
+		*(b_range.target_transpose_.end() - 1) = b_range.target_transpose_.size() - 2;
+		*(b_range.target_transpose_.end() - 2) = b_range.target_transpose_.size() - 1;
+
+
+		MultiIdx a_idx(a_range);
+		MultiIdx b_idx(b_range); // allocated
+		MultiIdx result_idx(result_range);
+		/*
+				std::cout << "a_idx, shp: " << print_vector(a_idx.range_.full_shape_) << "\n";
+				std::cout << "b_idx, shp: " << print_vector(b_idx.range_.full_shape_) << "\n";
+				std::cout << "r_idx, shp: " << print_vector(result_idx.range_.full_shape_) << "\n";
+		*/
+
+		ScalarNodePtr sum;
+		Array result(result_shape);
+		for (; result_idx.valid();) {
+			sum = nullptr;
+			a_idx.reset_indices(result_idx);
+			b_idx.reset_indices(result_idx);
+			for (; a_idx.valid();) {
+				/*
+								std::cout << "a_idx: " << print_vector(a_idx.indices()) << " didx: "
+										<< a_idx.src_idx() << "\n";
+								std::cout << "b_idx: " << print_vector(b_idx.indices()) << " didx: "
+										<< b_idx.src_idx() << "\n";
+				*/
+				ScalarNodePtr mult = details::ScalarNode::create<details::_mul_>(
+					a.elements_[a_idx.idx_src()],
+					b.elements_[b_idx.idx_src()]);
+				if (sum == nullptr) {
+					sum = mult;
+				}
+				else {
+					// TODO: how to use inplace operations correctly ??
+					sum = details::ScalarNode::create<details::_add_>(sum, mult);
+				}
+				//std::cout << "aidx ";
+				a_idx.inc_trg(-1, 1, false);
+				//std::cout << "bidx ";
+				b_idx.inc_trg(-1, 1, false);
+				BP_ASSERT(a_idx.valid() == b_idx.valid());
+			}
+			/*
+						std::cout << "r_idx: " << print_vector(result_idx.indices()) << " didx: "
+												<< result_idx.src_idx() << "\n";
+			*/
+
+			result.elements_[result_idx.idx_src()] = sum;
+
+			result_idx.inc_trg(-2);
+
+		}
+
+
+		auto final_range = MultiIdxRange(result.shape()).full();
+
+		//std::cout << "  raw res: "<< print_vector(result_shape);
+		if (b.shape().size() == 1 && *(result_shape.end() - 1) == 1) {
+			// cut -1 axis
+			//std::cout << "  b cut: "<< result_shape.size()-1 << "\n";
+			final_range.remove_target_axis(result_shape.size() - 1);
+		}
+		BP_ASSERT(*(result_shape.end() - 2) == 1);
+		//std::cout << "  r cut: "<< result_shape.size()-2 << "\n";
+		final_range.remove_target_axis(result_shape.size() - 2);
+		// cut -2 axis always
+		if (a.shape().size() == 1 && *(result_shape.end() - 3) == 1) {
+			// cut -3 axis
+			// std::cout << "  a cut: "<< result_shape.size()-3 << "\n";
+			final_range.remove_target_axis(result_shape.size() - 3);
+		}
+		// std::cout << "  final res: " << print_vector(final_range.sub_shape()) << "\n";
+		return Array(result, final_range);
+
+	}
 
 	/**
 	 * Numpy.matmul:
@@ -869,6 +997,18 @@ public:
 	 */
 	static Array mat_mult(const Array &a,  const Array &b) {
 		//std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n";
+		Eigen::Matrix2d m1;
+		m1(0, 0) = 1;
+		m1(0, 1) = 2;
+		m1(1, 0) = 3;
+		m1(1, 1) = 4;
+
+		Eigen::Vector2d v1;
+		v1(0) = 5;
+		v1(1) = 6;
+
+		std::cout << (m1 * v1) << std::endl;
+		std::cout << (v1.transpose() * m1) << std::endl;
 
 		if (a.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar a." << "\n";
@@ -945,7 +1085,7 @@ public:
 					sum = mult;
 				} else {
 					// TODO: how to use inplace operations correctly ??
-					sum = details::ScalarNode::create<details::_add_>(sum, mult);
+					sum = (details::ScalarWrapper(sum) + details::ScalarWrapper(mult)).get();
 				}
 				//std::cout << "aidx ";
 				a_idx.inc_trg(-1,1, false);
diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh
new file mode 100644
index 0000000..ef5c655
--- /dev/null
+++ b/include/scalar_wrapper.hh
@@ -0,0 +1,65 @@
+/*
+ * scalar_wrapper.hh
+ *
+ *  Created on: Apr 6, 2025
+ *      Author: LV
+ */
+
+//https://eigen.tuxfamily.org/dox/TopicCustomizing_CustomScalar.html
+
+#ifndef INCLUDE_SCALAR_WRAPPER_HH_
+#define INCLUDE_SCALAR_WRAPPER_HH_
+
+#include "scalar_node.hh"
+#include <Eigen/Core>
+
+namespace bparser {
+	namespace details {
+		struct ScalarWrapper {
+
+			ScalarWrapper(ScalarNodePtr existing_ptr) : node(existing_ptr) { ; }
+
+
+			inline ScalarWrapper operator+(const ScalarWrapper& b) const {
+				return ScalarWrapper(ScalarNode::create<_add_>(node, b.node));
+			}
+
+			inline ScalarNodePtr operator*() const {
+				return get();
+			}
+
+			inline ScalarNodePtr get() const {
+				return node;
+			}
+
+
+
+		protected:
+			ScalarNodePtr node;
+		};
+
+
+	}
+}
+//https://eigen.tuxfamily.org/dox/structEigen_1_1NumTraits.html
+namespace Eigen {
+	template<> struct NumTraits<bparser::details::ScalarWrapper>
+	: NumTraits<double>
+	{
+		typedef bparser::details::ScalarWrapper Real;
+		typedef bparser::details::ScalarWrapper NonInteger;
+		typedef bparser::details::ScalarWrapper Nested;
+
+		enum {
+			IsComplex = 0,
+			IsInteger = 0,
+			IsSigned = 1,
+			RequireInitialization = 0,
+			ReadCost = HugeCost,
+			AddCost = HugeCost,
+			MulCost = HugeCost
+		};
+	};
+}
+
+#endif //!INCLUDE_SCALAR_WRAPPER_HH_
\ No newline at end of file

From 11fbb54cb4464f34829586114c8874eba89d5347 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 13 Apr 2025 19:31:48 +0200
Subject: [PATCH 07/31] does not work, topic for consultation

---
 include/array.hh          | 143 +++++++++++++++++---------------------
 include/scalar_wrapper.hh |  83 ++++++++++++++++++++--
 2 files changed, 142 insertions(+), 84 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index 6ac2598..f77bd7b 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -12,7 +12,6 @@
 #include <vector>
 #include <algorithm>
 #include <cmath>
-#include <memory>
 #include <boost/math/constants/constants.hpp>
 #include "config.hh"
 #include "scalar_node.hh"
@@ -860,6 +859,46 @@ public:
 		return result;
 	}
 
+	static Eigen::MatrixX<details::ScalarWrapper> wrap_array(const bparser::Array& a) {
+		BP_ASSERT(a.shape().size() <= 2); // "Eigen does not support more than 2 dimensions"
+
+		using namespace details;
+		if (a.shape().size() == 0) {
+			return Eigen::MatrixX<ScalarWrapper>();
+		}
+		if (a.shape().size() == 1) {
+			Eigen::VectorX<ScalarWrapper> v(a.shape()[0]); //Rows x cols, which is which
+			for (uint i = 0; i < a.shape()[0]; i++) {
+				v(i) = ScalarWrapper(a.elements()[i]);
+			}
+			return v;
+		}
+		else {// (a.shape().size() == 2) {
+			MultiIdx index(a.range());
+			Eigen::MatrixX<ScalarWrapper> m(a.shape()[0], a.shape()[1]);
+			for (uint row = 0; row < a.shape()[0]; row++) {
+				for (uint col = 0; col < a.shape()[1]; col++) {
+					m(row, col) = ScalarWrapper(a[index]);
+					index.inc_src();
+				}
+			}
+			return m;
+		}
+	}
+
+	static bparser::Array unwrap_array(const Eigen::MatrixX<details::ScalarWrapper>& m) {
+		using namespace details;
+		Array a({ (uint)m.rows(), (uint)m.cols() });
+		MultiIdx index(a.range());
+		for (uint row = 0; row < a.shape()[0]; row++) {
+			for (uint col = 0; col < a.shape()[1]; col++) {
+				a.elements_[index.idx_src()] = m(row, col).get();
+				index.inc_src();
+			}
+		}
+		return a;
+	}
+
 	/**
 	 * Numpy.matmul:
 	 *
@@ -997,18 +1036,23 @@ public:
 	 */
 	static Array mat_mult(const Array &a,  const Array &b) {
 		//std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n";
-		Eigen::Matrix2d m1;
-		m1(0, 0) = 1;
-		m1(0, 1) = 2;
-		m1(1, 0) = 3;
-		m1(1, 1) = 4;
-
-		Eigen::Vector2d v1;
-		v1(0) = 5;
-		v1(1) = 6;
-
-		std::cout << (m1 * v1) << std::endl;
-		std::cout << (v1.transpose() * m1) << std::endl;
+		Eigen::Matrix<details::ScalarWrapper,2,2> m1;
+		m1(0, 0) = details::ScalarWrapper(details::ScalarNode::create_const(1));
+		m1(0, 1) = details::ScalarWrapper(details::ScalarNode::create_const(2));
+		m1(1, 0) = details::ScalarWrapper(details::ScalarNode::create_const(3));
+		m1(1, 1) = details::ScalarWrapper(details::ScalarNode::create_const(4));
+
+		Eigen::Vector<details::ScalarWrapper,2> v1;
+
+		v1(0) = details::ScalarWrapper(details::ScalarNode::create_const(5));
+		v1(1) = details::ScalarWrapper(details::ScalarNode::create_const(6));
+		//17,39
+		auto r = (m1 * v1);
+		//std::cout << r << std::endl;
+		//std::cout << (v1.transpose() * m1) << std::endl;
+		std::cout << "Shape: ---------" << std::endl;
+		std::cout << print_shape(a.shape()) << std::endl;
+		std::cout << print_shape(b.shape()) << std::endl;
 
 		if (a.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar a." << "\n";
@@ -1016,14 +1060,15 @@ public:
 			Throw() << "Matmult can not multiply by scalar b." << "\n";
 
 		auto a_broadcast = MultiIdxRange(a.shape()).full();
-
 		if (a.shape().size() == 1) {
 			a_broadcast.insert_axis(0, 0, 1);
 		}
+
 		auto b_broadcast = MultiIdxRange(b.shape()).full();
 		if (b.shape().size() == 1) {
 			b_broadcast.insert_axis(1, 1, 1);
 		}
+
 		Shape a_shape = a_broadcast.source_shape_;
 		//uint a_only_dim = *(a_shape.end() - 2);
 		uint a_common_dim = *(a_shape.end() - 1);
@@ -1035,74 +1080,14 @@ public:
 		uint b_common_dim = *(b_shape.end() - 2);
 		b_shape.insert(b_shape.end() - 2, 1);
 		// b_shape : (...,i,j,1,l,m)
+
 		if (a_common_dim != b_common_dim)
 			Throw() << "Matmult summing dimension mismatch: " << a_common_dim << " != " << b_common_dim << "\n";
-		Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape);
+		//Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape);
 		// common_shape : (...,i,j,k,l,m)
-		Shape result_shape(common_shape);
-		*(result_shape.end() - 2) = 1;
-		// result_shape : (...,i,j,k,1,m)
-
-		MultiIdxRange a_range = MultiIdxRange(a_shape).full().broadcast(common_shape);
-		a_range.target_transpose(-2, -1);
-		MultiIdxRange b_range = MultiIdxRange(b_shape).full().broadcast(common_shape);
-		b_range.target_transpose(-2, -1);
-		MultiIdxRange result_range = MultiIdxRange(result_shape).full();
-		result_range.target_transpose(-2, -1);
-		// transpose a_range and b_range in  order to have common_dim the last one:
-		// a_shape : (....i,j, k,l,1)
-		// b_shape : (...,i,j,1,m,l)
-		*(b_range.target_transpose_.end() - 1) = b_range.target_transpose_.size() - 2;
-		*(b_range.target_transpose_.end() - 2) = b_range.target_transpose_.size() - 1;
-
-
-		MultiIdx a_idx(a_range);
-		MultiIdx b_idx(b_range); // allocated
-		MultiIdx result_idx(result_range);
-/*
-		std::cout << "a_idx, shp: " << print_vector(a_idx.range_.full_shape_) << "\n";
-		std::cout << "b_idx, shp: " << print_vector(b_idx.range_.full_shape_) << "\n";
-		std::cout << "r_idx, shp: " << print_vector(result_idx.range_.full_shape_) << "\n";
-*/
-
-		ScalarNodePtr sum;
-		Array result(result_shape);
-		for(;result_idx.valid();) {
-			sum = nullptr;
-			a_idx.reset_indices(result_idx);
-			b_idx.reset_indices(result_idx);
-			for(;a_idx.valid();) {
-/*
-				std::cout << "a_idx: " << print_vector(a_idx.indices()) << " didx: "
-						<< a_idx.src_idx() << "\n";
-				std::cout << "b_idx: " << print_vector(b_idx.indices()) << " didx: "
-						<< b_idx.src_idx() << "\n";
-*/
-				ScalarNodePtr mult = details::ScalarNode::create<details::_mul_>(
-						a.elements_[a_idx.idx_src()],
-						b.elements_[b_idx.idx_src()]);
-				if (sum == nullptr) {
-					sum = mult;
-				} else {
-					// TODO: how to use inplace operations correctly ??
-					sum = (details::ScalarWrapper(sum) + details::ScalarWrapper(mult)).get();
-				}
-				//std::cout << "aidx ";
-				a_idx.inc_trg(-1,1, false);
-				//std::cout << "bidx ";
-				b_idx.inc_trg(-1,1, false);
-				BP_ASSERT(a_idx.valid() == b_idx.valid());
-			}
-/*
-			std::cout << "r_idx: " << print_vector(result_idx.indices()) << " didx: "
-									<< result_idx.src_idx() << "\n";
-*/
-
-			result.elements_[result_idx.idx_src()] = sum;
-
-			result_idx.inc_trg(-2);
-
-		}
+		
+		Array result = unwrap_array(wrap_array(a) * wrap_array(b));
+		Shape result_shape = result.shape();
 
 
 		auto final_range = MultiIdxRange(result.shape()).full();
diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh
index ef5c655..d4d997b 100644
--- a/include/scalar_wrapper.hh
+++ b/include/scalar_wrapper.hh
@@ -15,16 +15,46 @@
 
 namespace bparser {
 	namespace details {
+		// Eigen compatible wrapper for ScalarNode
 		struct ScalarWrapper {
 
+			ScalarWrapper() : node(ScalarNode::create_zero()) { ; }
+			ScalarWrapper(int i) : node(ScalarNode::create_const(i)) { ; }
+			ScalarWrapper(double d) : node(ScalarNode::create_const(d)) { ; }
 			ScalarWrapper(ScalarNodePtr existing_ptr) : node(existing_ptr) { ; }
 
+			inline ScalarWrapper operator-() const {
+				return un_op<_minus_>(*this);
+			}
+
+			inline ScalarWrapper& operator+=(const ScalarWrapper& b) {
+				node = bin_op<_add_>(*this, b).get();
+				return *this;
+			}
 
 			inline ScalarWrapper operator+(const ScalarWrapper& b) const {
-				return ScalarWrapper(ScalarNode::create<_add_>(node, b.node));
+				return bin_op<_add_>(*this, b);
+			}
+
+			inline ScalarWrapper operator-(const ScalarWrapper& b) const {
+				return bin_op<_sub_>(*this, b);
+			}
+
+			inline ScalarWrapper operator*(const ScalarWrapper& b) const {
+				return bin_op<_mul_>(*this, b);
 			}
 
-			inline ScalarNodePtr operator*() const {
+			inline ScalarWrapper operator/(const ScalarWrapper& b) const {
+				return bin_op<_div_>(*this, b);
+			}
+
+			inline bool operator==(const ScalarWrapper& b) const {
+				b.get();
+				return false;// return bin_op<_eq_>(*this, b); //How??????????
+			}
+
+
+			inline ScalarNodePtr operator*() const { //dereference
 				return get();
 			}
 
@@ -32,15 +62,58 @@ namespace bparser {
 				return node;
 			}
 
+			template<class T>
+			static ScalarWrapper bin_op(const ScalarWrapper& a, const ScalarWrapper& b) {
+				return ScalarWrapper(ScalarNode::create<T>(a.get(), b.get()));
+			}
+
+			template<class T>
+			static ScalarWrapper un_op(const ScalarWrapper& a) {
+				return ScalarWrapper(ScalarNode::create<T>(a.get()));
+			}
 
 
 		protected:
 			ScalarNodePtr node;
+
+
+		}; //ScalarWrapper
+
+		//inline std::ostream& operator<<(std::ostream& out, const ScalarWrapper& s) {
+		//
+		//}
+
+#define UN_OP(OP)											\
+		inline ScalarWrapper OP(const ScalarWrapper& s) {	\
+			return ScalarWrapper::un_op<_##OP##_>(s);		\
+		}													\
+		using std::OP;
+
+		inline ScalarWrapper abs(const ScalarWrapper& s) {
+			return ScalarWrapper::un_op<_abs_>(s); 
+		}
+		using std::abs;
+
+		//https://eigen.tuxfamily.org/dox/namespaceEigen.html#a54cc34b64b4935307efc06d56cd531df
+		inline ScalarWrapper abs2(const ScalarWrapper& s) {
+			return s*s;
 		};
 
+		inline ScalarWrapper sqrt(const ScalarWrapper& s) {
+			return ScalarWrapper::un_op<_sqrt_>(s);
+		}
+		using std::sqrt;
+
+		UN_OP(log)
+		UN_OP(log10)
+
+
+
+
+
+	} //details
+} //bparser
 
-	}
-}
 //https://eigen.tuxfamily.org/dox/structEigen_1_1NumTraits.html
 namespace Eigen {
 	template<> struct NumTraits<bparser::details::ScalarWrapper>
@@ -54,7 +127,7 @@ namespace Eigen {
 			IsComplex = 0,
 			IsInteger = 0,
 			IsSigned = 1,
-			RequireInitialization = 0,
+			RequireInitialization = 1,
 			ReadCost = HugeCost,
 			AddCost = HugeCost,
 			MulCost = HugeCost

From 8bf99072152b9595c7989e4ce49073decaa876ca Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 13 Apr 2025 20:24:24 +0200
Subject: [PATCH 08/31] Found the issue, but not fixed yet

---
 include/array.hh    | 9 +++++----
 test/test_parser.cc | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index f77bd7b..5f68d64 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -867,7 +867,7 @@ public:
 			return Eigen::MatrixX<ScalarWrapper>();
 		}
 		if (a.shape().size() == 1) {
-			Eigen::VectorX<ScalarWrapper> v(a.shape()[0]); //Rows x cols, which is which
+			Eigen::VectorX<ScalarWrapper> v(a.shape()[0]); //mat_mult_old is altering the vector size. Undocumented convenience?
 			for (uint i = 0; i < a.shape()[0]; i++) {
 				v(i) = ScalarWrapper(a.elements()[i]);
 			}
@@ -1087,10 +1087,10 @@ public:
 		// common_shape : (...,i,j,k,l,m)
 		
 		Array result = unwrap_array(wrap_array(a) * wrap_array(b));
-		Shape result_shape = result.shape();
+		//Shape result_shape = result.shape();
 
-
-		auto final_range = MultiIdxRange(result.shape()).full();
+		return result;
+		/*auto final_range = MultiIdxRange(result.shape()).full();
 
 		//std::cout << "  raw res: "<< print_vector(result_shape);
 		if (b.shape().size() == 1 && *(result_shape.end() - 1) == 1 ) {
@@ -1109,6 +1109,7 @@ public:
 		}
 		// std::cout << "  final res: " << print_vector(final_range.sub_shape()) << "\n";
 		return Array(result, final_range);
+		*/
 
 	}
 
diff --git a/test/test_parser.cc b/test/test_parser.cc
index 8f905df..efb16e7 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -227,6 +227,7 @@ void test_expression() {
 	BP_ASSERT(test_expr("25 % cs3", {1}));
 	BP_ASSERT(test_expr("25 % cv4", {1, 0, 1}));
 
+	BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2,1 })); //Eigen test
 	BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1}));
 	BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1}));
 	ASSERT_THROW(test_expr("[[1], [2], [3]] @ [3, 4, 1]", {14}, {1}), "Matmult summing dimension mismatch");

From 8ccd10e11c01074bda828b4ab4f0458a989999b2 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sat, 26 Apr 2025 18:55:08 +0200
Subject: [PATCH 09/31] Added getter

---
 include/arena_alloc.hh    | 5 ++---
 include/arena_resource.hh | 3 +++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh
index 0ec025e..77b1824 100644
--- a/include/arena_alloc.hh
+++ b/include/arena_alloc.hh
@@ -33,8 +33,8 @@ struct ArenaAlloc {
 	ArenaAlloc(std::size_t alignment, std::size_t size)
 	//: alignment_(alignment),
 	//  size_(0)
-	: size_(align_size(alignment, size)) //We cannot access this->arena->buffer_size_. However it *should* not change and is currently only used by one assert. Maybe create getter? -LV
 	{
+		size_t size_ = align_size(alignment, size);
 		buffer = align_alloc(alignment, size_);
 		arena = new PatchArena(buffer, size_, alignment);
 		/*size_ = align_size(alignment_, size);
@@ -91,7 +91,7 @@ struct ArenaAlloc {
 	}
 
 	inline std::size_t get_size() const {
-		return size_; //arena->buffer_size would be more appropriate
+		return arena->get_size();
 	}
 	
 	//std::size_t alignment_;
@@ -101,7 +101,6 @@ struct ArenaAlloc {
 protected:
 	PatchArena* arena;
 	void* buffer;
-	std::size_t size_;
 };
 
 } // namespace bparser
diff --git a/include/arena_resource.hh b/include/arena_resource.hh
index 5fd054a..6baa1cf 100644
--- a/include/arena_resource.hh
+++ b/include/arena_resource.hh
@@ -101,6 +101,9 @@ public:
 #endif
     }
 
+    inline size_t get_size() {
+        return buffer_size_;
+    }
 protected:
     void* raw_allocate(size_t bytes, size_t alignment) {
         //ASSERT(!full_data_).error("Allocation of new data is not possible because child arena was created.");

From c48a10f8c80bc40ad21868856efa93cf6ec9c4eb Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sat, 26 Apr 2025 18:55:52 +0200
Subject: [PATCH 10/31] Expanded wrap_array wiht MultiIdx. Added Numpy diag
 func

---
 include/array.hh          | 147 +++++++++++++++++++++-----------------
 include/grammar.impl.hh   |   1 +
 include/scalar_wrapper.hh |  37 ++++++----
 test/test_parser.cc       |   6 +-
 4 files changed, 112 insertions(+), 79 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index 5f68d64..4a56471 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -859,44 +859,82 @@ public:
 		return result;
 	}
 
+
+
+	//Wraps the ScalarNodes of an Array into an Eigen Matrix of ScalarWrappers.
+	//Vectors will be column vectors. Eigen does not support vectors without orientation.
+	//Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor
 	static Eigen::MatrixX<details::ScalarWrapper> wrap_array(const bparser::Array& a) {
-		BP_ASSERT(a.shape().size() <= 2); // "Eigen does not support more than 2 dimensions"
+		MultiIdx idx(a.range());
+		return wrap_array(a, idx);
+	}
+
+	//Wraps the ScalarNodes of an Array accessed via MultiIdx.idx_trg() created from supplied MultiIdxRange into an Eigen Matrix of ScalarWrapper
+	//Vectors will be column vectors. Eigen does not support vectors without orientation.
+	//Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor
+	static Eigen::MatrixX<details::ScalarWrapper> wrap_array(const bparser::Array& a, MultiIdxRange& range) {
+		MultiIdx idx (range);
+		return wrap_array(a, idx);
+	}
+
+	//Wraps the ScalarNodes of an Array accessed via MultiIdx.idx_trg() into an Eigen Matrix of ScalarWrapper
+	//Vectors will be column vectors. Eigen does not support vectors without orientation.
+	//Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor
+	static Eigen::MatrixX<details::ScalarWrapper> wrap_array(const bparser::Array& a, MultiIdx& index) {
 
 		using namespace details;
-		if (a.shape().size() == 0) {
-			return Eigen::MatrixX<ScalarWrapper>();
+		Shape trg_shape = index.range_.target_shape();
+		if (trg_shape.size() == 0) {
+			Throw() << "Attempted to wrap scalar into Eigen Matrix";
 		}
-		if (a.shape().size() == 1) {
-			Eigen::VectorX<ScalarWrapper> v(a.shape()[0]); //mat_mult_old is altering the vector size. Undocumented convenience?
-			for (uint i = 0; i < a.shape()[0]; i++) {
-				v(i) = ScalarWrapper(a.elements()[i]);
+		if (trg_shape.size() == 1) {
+			uint len = trg_shape[0];
+			Eigen::VectorX<ScalarWrapper> v(len);
+			for (uint i = 0; i < len && index.valid(); i++, index.inc_trg()) {
+				v(i) = ScalarWrapper(a.elements_[index.idx_trg()]);
 			}
 			return v;
 		}
-		else {// (a.shape().size() == 2) {
-			MultiIdx index(a.range());
-			Eigen::MatrixX<ScalarWrapper> m(a.shape()[0], a.shape()[1]);
-			for (uint row = 0; row < a.shape()[0]; row++) {
-				for (uint col = 0; col < a.shape()[1]; col++) {
-					m(row, col) = ScalarWrapper(a[index]);
-					index.inc_src();
+		else {// (a.shape().size() > 2) {
+			
+
+			
+			uint rows = *(trg_shape.end() - 2);
+			uint cols = *(trg_shape.end() - 1);
+
+			Eigen::MatrixX<ScalarWrapper> m(rows, cols);
+			for (uint row = 0; row < rows; row++) {
+				for (uint col = 0; col < cols && index.valid(); col++, index.inc_trg()) {
+					m(row, col) = ScalarWrapper(a.elements_[index.idx_trg()]);
 				}
 			}
 			return m;
 		}
 	}
 
-	static bparser::Array unwrap_array(const Eigen::MatrixX<details::ScalarWrapper>& m) {
+	//Creates an Array of ScalarNodes from an Eigen Matrix of ScalarWrappers
+	// make_vector - Will reduce the Array shape if Matrix is actually a Vector. Shape:(x,1) -> (x); (1,y) -> (y)
+	static bparser::Array unwrap_array(const Eigen::MatrixX<details::ScalarWrapper>& m, const bool make_vector = false) {
 		using namespace details;
-		Array a({ (uint)m.rows(), (uint)m.cols() });
-		MultiIdx index(a.range());
-		for (uint row = 0; row < a.shape()[0]; row++) {
-			for (uint col = 0; col < a.shape()[1]; col++) {
-				a.elements_[index.idx_src()] = m(row, col).get();
-				index.inc_src();
+
+		if (make_vector && (m.rows() == 1 || m.cols() == 1)) {
+			Array a({ (uint)std::max(m.rows(),m.cols()) });
+			MultiIdx index(a.range());
+			for (uint i = 0; i < a.shape()[0]; i++, index.inc_src()) {
+				a.elements_[index.idx_src()] = m(i).get();
 			}
+			return a;
+		}
+		else {
+			Array a({ (uint)m.rows(), (uint)m.cols() });
+			MultiIdx index(a.range());
+			for (uint row = 0; row < a.shape()[0]; row++) {
+				for (uint col = 0; col < a.shape()[1]; col++, index.inc_src()) {
+					a.elements_[index.idx_src()] = m(row, col).get();
+				}
+			}
+			return a;
 		}
-		return a;
 	}
 
 	/**
@@ -1036,60 +1074,29 @@ public:
 	 */
 	static Array mat_mult(const Array &a,  const Array &b) {
 		//std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n";
-		Eigen::Matrix<details::ScalarWrapper,2,2> m1;
-		m1(0, 0) = details::ScalarWrapper(details::ScalarNode::create_const(1));
-		m1(0, 1) = details::ScalarWrapper(details::ScalarNode::create_const(2));
-		m1(1, 0) = details::ScalarWrapper(details::ScalarNode::create_const(3));
-		m1(1, 1) = details::ScalarWrapper(details::ScalarNode::create_const(4));
-
-		Eigen::Vector<details::ScalarWrapper,2> v1;
-
-		v1(0) = details::ScalarWrapper(details::ScalarNode::create_const(5));
-		v1(1) = details::ScalarWrapper(details::ScalarNode::create_const(6));
-		//17,39
-		auto r = (m1 * v1);
-		//std::cout << r << std::endl;
-		//std::cout << (v1.transpose() * m1) << std::endl;
-		std::cout << "Shape: ---------" << std::endl;
-		std::cout << print_shape(a.shape()) << std::endl;
-		std::cout << print_shape(b.shape()) << std::endl;
+
+		//std::cout << "Shape: ---------" << std::endl;
+		//std::cout << print_shape(a.shape()) << std::endl;
+		//std::cout << print_shape(b.shape()) << std::endl;
 
 		if (a.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar a." << "\n";
 		if (b.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar b." << "\n";
 
-		auto a_broadcast = MultiIdxRange(a.shape()).full();
-		if (a.shape().size() == 1) {
-			a_broadcast.insert_axis(0, 0, 1);
-		}
+		auto m_a = wrap_array(a);
+		auto m_b = wrap_array(b);
 
-		auto b_broadcast = MultiIdxRange(b.shape()).full();
-		if (b.shape().size() == 1) {
-			b_broadcast.insert_axis(1, 1, 1);
+		if (a.shape().size() == 1) { //is vector
+			m_a = m_a.transpose(); //colvec -> rowvec
 		}
 
-		Shape a_shape = a_broadcast.source_shape_;
-		//uint a_only_dim = *(a_shape.end() - 2);
-		uint a_common_dim = *(a_shape.end() - 1);
-		a_shape.insert(a_shape.end(), 1);
-		// a_shape : (...,i,j,k,l,1)
-
-		Shape b_shape = b_broadcast.source_shape_;
-		//uint b_only_dim = *(b_shape.end() - 1);
-		uint b_common_dim = *(b_shape.end() - 2);
-		b_shape.insert(b_shape.end() - 2, 1);
-		// b_shape : (...,i,j,1,l,m)
+		if (m_a.cols() != m_b.rows())
+			Throw() << "Matmult summing dimension mismatch: " << m_a.cols() << " != " << m_b.rows() << "\n";
 
-		if (a_common_dim != b_common_dim)
-			Throw() << "Matmult summing dimension mismatch: " << a_common_dim << " != " << b_common_dim << "\n";
-		//Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape);
-		// common_shape : (...,i,j,k,l,m)
-		
-		Array result = unwrap_array(wrap_array(a) * wrap_array(b));
+		return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1));
 		//Shape result_shape = result.shape();
 
-		return result;
 		/*auto final_range = MultiIdxRange(result.shape()).full();
 
 		//std::cout << "  raw res: "<< print_vector(result_shape);
@@ -1113,6 +1120,18 @@ public:
 
 	}
 
+	static Array diag(const Array& a) {
+		if (a.shape().size() == 0)
+			return a;
+
+		if (a.shape().size() == 1) { // diag -> matrix
+			return unwrap_array(wrap_array(a).asDiagonal());
+		}
+		// matrix -> diag
+		return unwrap_array(wrap_array(a).diagonal(),true);
+
+	}
+
 	static Array flatten(const Array &tensor) {
 		uint n_elements = shape_size(tensor.shape());
 		Shape res_shape(1, n_elements);
diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh
index a356ccd..e4ec1c7 100644
--- a/include/grammar.impl.hh
+++ b/include/grammar.impl.hh
@@ -179,6 +179,7 @@ struct grammar : qi::grammar<Iterator, ast::operand(), ascii::space_type> {
             FN("power"  , binary_array<_pow_>())
 			FN("minimum", binary_array<_min_>())
 			FN("maximum", binary_array<_max_>())
+            FN("diag"   , &Array::diag)
             ;
 
         unary_op.add
diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh
index d4d997b..54dda38 100644
--- a/include/scalar_wrapper.hh
+++ b/include/scalar_wrapper.hh
@@ -49,8 +49,10 @@ namespace bparser {
 			}
 
 			inline bool operator==(const ScalarWrapper& b) const {
-				b.get();
-				return false;// return bin_op<_eq_>(*this, b); //How??????????
+				if (((***this).result_storage == constant      && (**b).result_storage == constant     ) || 
+					((***this).result_storage == constant_bool && (**b).result_storage == constant_bool) )
+					return *(***this).values_ == *(**b).values_;
+				return false;
 			}
 
 
@@ -89,23 +91,30 @@ namespace bparser {
 		}													\
 		using std::OP;
 
-		inline ScalarWrapper abs(const ScalarWrapper& s) {
-			return ScalarWrapper::un_op<_abs_>(s); 
-		}
-		using std::abs;
+		/*
+		UN_OP(abs)
 
 		//https://eigen.tuxfamily.org/dox/namespaceEigen.html#a54cc34b64b4935307efc06d56cd531df
 		inline ScalarWrapper abs2(const ScalarWrapper& s) {
 			return s*s;
 		};
-
-		inline ScalarWrapper sqrt(const ScalarWrapper& s) {
-			return ScalarWrapper::un_op<_sqrt_>(s);
-		}
-		using std::sqrt;
-
-		UN_OP(log)
-		UN_OP(log10)
+		*/
+
+		//UN_OP(sqrt)
+		//UN_OP(exp)
+		//UN_OP(log)
+		//UN_OP(log10)
+		//UN_OP(sin)
+		//UN_OP(sinh)
+		//UN_OP(asin)
+		//UN_OP(cos)
+		//UN_OP(cosh)
+		//UN_OP(acos)
+		//UN_OP(tan)
+		//UN_OP(tanh)
+		//UN_OP(atan)
+		//UN_OP(ceil)
+		//UN_OP(floor)
 
 
 
diff --git a/test/test_parser.cc b/test/test_parser.cc
index efb16e7..ab1fdd0 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -227,7 +227,7 @@ void test_expression() {
 	BP_ASSERT(test_expr("25 % cs3", {1}));
 	BP_ASSERT(test_expr("25 % cv4", {1, 0, 1}));
 
-	BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2,1 })); //Eigen test
+	BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2 }));
 	BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1}));
 	BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1}));
 	ASSERT_THROW(test_expr("[[1], [2], [3]] @ [3, 4, 1]", {14}, {1}), "Matmult summing dimension mismatch");
@@ -237,6 +237,10 @@ void test_expression() {
 	BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
 	BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
 
+	BP_ASSERT(test_expr("diag([1,2,3])", {1, 0, 0,  0, 2, 0,  0, 0, 3}, {3,3}));
+	BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, {2}));
+	BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, {3}));
+
 
 	BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2}));
 	BP_ASSERT(test_expr("floor(-3.5)", {-4}, {}));

From d248826e8952c4fcc297e7255ba83558d2db6902 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sat, 26 Apr 2025 19:40:39 +0200
Subject: [PATCH 11/31] workflow change

---
 .github/workflows/ccpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index b8ade54..56deb1e 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -268,7 +268,7 @@ jobs:
           # install eigen
           wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\
           tar -xzf eigen-3.4.0.tar.gz &&\
-          mv eigen-3.4.0 /usr/local
+          sudo mv eigen-3.4.0 /usr/local
           
           
 

From 783cbdcd7901ec7213de87bee05415d88040cf01 Mon Sep 17 00:00:00 2001
From: Jan Brezina <jan.brezina@tul.cz>
Date: Sun, 4 May 2025 20:14:00 +0200
Subject: [PATCH 12/31] Update eigen  install in GH action

---
 .github/workflows/ccpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index b8ade54..56deb1e 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -268,7 +268,7 @@ jobs:
           # install eigen
           wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\
           tar -xzf eigen-3.4.0.tar.gz &&\
-          mv eigen-3.4.0 /usr/local
+          sudo mv eigen-3.4.0 /usr/local
           
           
 

From 740f6910184070753f5b26b64bc2788a35e151fb Mon Sep 17 00:00:00 2001
From: Jan Brezina <jan.brezina@tul.cz>
Date: Sun, 4 May 2025 21:34:35 +0200
Subject: [PATCH 13/31] Add more complex matrix multiplication tests.

---
 test/test_parser.cc | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/test/test_parser.cc b/test/test_parser.cc
index 8f905df..eaa09a4 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -234,8 +234,26 @@ void test_expression() {
 	BP_ASSERT(test_expr("[[1, 2], [2, 3], [3, 4]] @ [[1], [2]]", {5, 8, 11}, {3,1}));
 	BP_ASSERT(test_expr("[[1, 2], [2, 3], [3, 4]] @ [1, 2]", {5, 8, 11}, {3}));
 	BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
+	BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
 	BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
 
+	// 2×2 @ 2×2 → 2×2
+	BP_ASSERT(test_expr(
+		"[[1, 2], [3, 4]] @ [[5, 6], [7, 8]]",
+		{19, 22, 43, 50},    // 1*5+2*7, 1*6+2*8, 3*5+4*7, 3*6+4*8
+		{2, 2}
+	));
+
+	// 3×1×2 @ 2×3 → 3×1×3 (batched matmul)
+	BP_ASSERT(test_expr(
+		"[[[1,2]], [[3,4]], [[5,6]]] @ [[7,8,9], [10,11,12]]",
+		{
+		27, 30, 33,   // batch 0: [1,2]×[[7,8,9],[10,11,12]]
+		61, 68, 75,   // batch 1: [3,4]×...
+		95,106,117    // batch 2: [5,6]×...
+		},
+		{3, 1, 3}
+	));
 
 	BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2}));
 	BP_ASSERT(test_expr("floor(-3.5)", {-4}, {}));

From 7e162d6e4c670a0b6f42e1e678cd356f2cefcec1 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Mon, 5 May 2025 23:37:46 +0200
Subject: [PATCH 14/31] const keyword

---
 include/arena_resource.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/arena_resource.hh b/include/arena_resource.hh
index 6baa1cf..a01c847 100644
--- a/include/arena_resource.hh
+++ b/include/arena_resource.hh
@@ -101,7 +101,7 @@ public:
 #endif
     }
 
-    inline size_t get_size() {
+    inline size_t get_size() const{
         return buffer_size_;
     }
 protected:

From a384ac8402eedbcf71f53a98f4dfc46efb82be0a Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Mon, 5 May 2025 23:38:08 +0200
Subject: [PATCH 15/31] reworked mat_mult, doesn't work for tensors...

---
 include/array.hh | 62 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index 4a56471..6b8d877 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -884,6 +884,7 @@ public:
 
 		using namespace details;
 		Shape trg_shape = index.range_.target_shape();
+		std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl;
 		if (trg_shape.size() == 0) {
 			Throw() << "Attempted to wrap scalar into Eigen Matrix";
 		}
@@ -896,9 +897,6 @@ public:
 			return v;
 		}
 		else {// (a.shape().size() > 2) {
-			
-
-			
 			uint rows = *(trg_shape.end() - 2);
 			uint cols = *(trg_shape.end() - 1);
 
@@ -1075,15 +1073,65 @@ public:
 	static Array mat_mult(const Array &a,  const Array &b) {
 		//std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n";
 
-		//std::cout << "Shape: ---------" << std::endl;
-		//std::cout << print_shape(a.shape()) << std::endl;
-		//std::cout << print_shape(b.shape()) << std::endl;
+		std::cout << "Shape: ---------" << std::endl;
+		std::cout << print_shape(a.shape()) << std::endl;
+		std::cout << print_shape(b.shape()) << std::endl;
 
 		if (a.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar a." << "\n";
 		if (b.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar b." << "\n";
 
+		Shape a_shape = a.shape();
+		if (a_shape.size() == 1) {
+			a_shape.insert(a_shape.begin(), 1);
+		}
+
+
+		Shape b_shape = b.shape();
+		if (b_shape.size() == 1) {
+			b_shape.push_back(1);
+		}
+
+
+		uint a_cols = *(a_shape.end() - 1), b_rows = *(b_shape.end() - 2);
+
+		if (a_cols != b_rows) {
+			Throw() << "Matmult summing dimension mismatch: " << a_cols << " != " << b_rows << "\n";
+		}
+
+		a_shape.insert(a_shape.end(), 1);
+		// a_shape : (...,i,j,k,l,1)
+		b_shape.insert(b_shape.end() - 2, 1);
+		// b_shape : (...,i,j,1,l,m)
+
+		
+		Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape));// shape (..., i,j,k,l,m)
+		result_shape.erase(result_shape.end() - 2);
+
+		Array result(result_shape);
+		//std::cout << print_shape(result_shape) << std::endl;
+
+		bool should_transpose = a.shape().size() == 1;
+
+		for (MultiIdx	
+			result_idx(result.range()),
+			a_idx(a.range()),
+			b_idx(b.range());	result_idx.valid();) {
+
+			Eigen::MatrixX<details::ScalarWrapper> m_a = wrap_array(a, a_idx);
+			Eigen::MatrixX<details::ScalarWrapper> m_b = wrap_array(b, b_idx);
+			if (should_transpose) m_a = m_a.transpose();
+
+			Array matmult = unwrap_array(m_a * m_b);
+
+			for (MultiIdx mult_idx(matmult.range()); mult_idx.valid(); mult_idx.inc_src(), result_idx.inc_src()) {
+				result.elements_[result_idx.idx_src()] = matmult[mult_idx]; //TODO
+				//std::cout << result_idx.idx_src() << std::endl;
+			}
+		}
+		return result;
+		/*
 		auto m_a = wrap_array(a);
 		auto m_b = wrap_array(b);
 
@@ -1094,7 +1142,7 @@ public:
 		if (m_a.cols() != m_b.rows())
 			Throw() << "Matmult summing dimension mismatch: " << m_a.cols() << " != " << m_b.rows() << "\n";
 
-		return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1));
+		return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1));*/
 		//Shape result_shape = result.shape();
 
 		/*auto final_range = MultiIdxRange(result.shape()).full();

From f096b04f702f6b99476ed5f1aa9eb6baed26c6f5 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 20:27:15 +0200
Subject: [PATCH 16/31] Matmult for tensors+ Moved diag tests below matmult
 tests

---
 include/array.hh    | 54 ++++++++++++++++++++++++++++++++-------------
 test/test_parser.cc | 12 +++++-----
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index 6b8d877..ce120f6 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -892,7 +892,7 @@ public:
 			uint len = trg_shape[0];
 			Eigen::VectorX<ScalarWrapper> v(len);
 			for (uint i = 0; i < len && index.valid(); i++, index.inc_trg()) {
-				v(i) = ScalarWrapper(a.elements_[index.idx_trg()]);
+				v(i) = ScalarWrapper(a[index]);
 			}
 			return v;
 		}
@@ -903,7 +903,7 @@ public:
 			Eigen::MatrixX<ScalarWrapper> m(rows, cols);
 			for (uint row = 0; row < rows; row++) {
 				for (uint col = 0; col < cols && index.valid(); col++, index.inc_trg()) {
-					m(row, col) = ScalarWrapper(a.elements_[index.idx_trg()]);
+					m(row, col) = ScalarWrapper(a[index]);
 				}
 			}
 			return m;
@@ -1073,9 +1073,9 @@ public:
 	static Array mat_mult(const Array &a,  const Array &b) {
 		//std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n";
 
-		std::cout << "Shape: ---------" << std::endl;
-		std::cout << print_shape(a.shape()) << std::endl;
-		std::cout << print_shape(b.shape()) << std::endl;
+		//std::cout << "Shape: ---------" << std::endl;
+		//std::cout << print_shape(a.shape()) << std::endl;
+		//std::cout << print_shape(b.shape()) << std::endl;
 
 		if (a.shape().size() == 0)
 			Throw() << "Matmult can not multiply by scalar a." << "\n";
@@ -1085,52 +1085,76 @@ public:
 		Shape a_shape = a.shape();
 		if (a_shape.size() == 1) {
 			a_shape.insert(a_shape.begin(), 1);
+			// shape (l) -> (1,l)
 		}
 
 
 		Shape b_shape = b.shape();
 		if (b_shape.size() == 1) {
 			b_shape.push_back(1);
+			// shape (l) -> (l,1)
 		}
 
 
 		uint a_cols = *(a_shape.end() - 1), b_rows = *(b_shape.end() - 2);
 
-		if (a_cols != b_rows) {
+		if (a_cols != b_rows) { // l != l
 			Throw() << "Matmult summing dimension mismatch: " << a_cols << " != " << b_rows << "\n";
 		}
 
+		//Add for common shape
 		a_shape.insert(a_shape.end(), 1);
 		// a_shape : (...,i,j,k,l,1)
 		b_shape.insert(b_shape.end() - 2, 1);
 		// b_shape : (...,i,j,1,l,m)
 
 		
-		Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape));// shape (..., i,j,k,l,m)
-		result_shape.erase(result_shape.end() - 2);
+		Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape)); 
+		// r_shape (..., i,j,k,l,m)
+		MultiIdxRange a_range(MultiIdxRange(a_shape).full().broadcast(result_shape));
+		// a_shape (..., 1,1,k,l,1) -> (...,i,j,k,l,1)
+		MultiIdxRange b_range(MultiIdxRange(b_shape).full().broadcast(result_shape));
+		// b_shape (..., 1,1,1,l,m) -> (...,i,j,1,l,m)
+
+		//Remove for computation
+		a_range.target_transpose_.erase(a_range.target_transpose_.end() - 1); 
+		// a_shape (..., i,j,k,l, )
+		b_range.target_transpose_.erase(b_range.target_transpose_.end() - 3); 
+		// b_shape (..., i,j, ,l,m)
+		result_shape.erase(result_shape.end() - 2); 
+		// r_shape (..., i,j,k, ,m)
 
-		Array result(result_shape);
 		//std::cout << print_shape(result_shape) << std::endl;
 
+		Array result(result_shape);
 		bool should_transpose = a.shape().size() == 1;
 
 		for (MultiIdx	
 			result_idx(result.range()),
-			a_idx(a.range()),
-			b_idx(b.range());	result_idx.valid();) {
+			a_idx(a_range),
+			b_idx(b_range);	result_idx.valid(); ) {
 
 			Eigen::MatrixX<details::ScalarWrapper> m_a = wrap_array(a, a_idx);
 			Eigen::MatrixX<details::ScalarWrapper> m_b = wrap_array(b, b_idx);
-			if (should_transpose) m_a = m_a.transpose();
 
 			Array matmult = unwrap_array(m_a * m_b);
 
 			for (MultiIdx mult_idx(matmult.range()); mult_idx.valid(); mult_idx.inc_src(), result_idx.inc_src()) {
-				result.elements_[result_idx.idx_src()] = matmult[mult_idx]; //TODO
-				//std::cout << result_idx.idx_src() << std::endl;
+				result.elements_[result_idx.idx_src()] = matmult[mult_idx]; 
 			}
 		}
-		return result;
+
+		MultiIdxRange final_range(result.range());
+		if (b.shape().size() == 1) {
+			final_range.remove_target_axis(absolute_idx(-1, result_shape.size()));
+			// shape (..., i,j,k,1) -> ...,j,k)
+		}
+		if (a.shape().size() == 1) {
+			final_range.remove_target_axis(absolute_idx(-2, result_shape.size()));
+			// shape (..., i,j,1,m) -> ...,j,m)
+		}
+		
+		return Array(result,final_range);
 		/*
 		auto m_a = wrap_array(a);
 		auto m_b = wrap_array(b);
diff --git a/test/test_parser.cc b/test/test_parser.cc
index 49b3b66..4d20bf1 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -226,7 +226,7 @@ void test_expression() {
 
 	BP_ASSERT(test_expr("25 % cs3", {1}));
 	BP_ASSERT(test_expr("25 % cv4", {1, 0, 1}));
-
+	
 	BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2 }));
 	BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1}));
 	BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1}));
@@ -237,17 +237,13 @@ void test_expression() {
 	BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
 	BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3}));
 
-	BP_ASSERT(test_expr("diag([1,2,3])", {1, 0, 0,  0, 2, 0,  0, 0, 3}, {3,3}));
-	BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, {2}));
-	BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, {3}));
-
 	// 2×2 @ 2×2 → 2×2
 	BP_ASSERT(test_expr(
 		"[[1, 2], [3, 4]] @ [[5, 6], [7, 8]]",
 		{19, 22, 43, 50},    // 1*5+2*7, 1*6+2*8, 3*5+4*7, 3*6+4*8
 		{2, 2}
 	));
-
+	
 	// 3×1×2 @ 2×3 → 3×1×3 (batched matmul)
 	BP_ASSERT(test_expr(
 		"[[[1,2]], [[3,4]], [[5,6]]] @ [[7,8,9], [10,11,12]]",
@@ -259,6 +255,10 @@ void test_expression() {
 		{3, 1, 3}
 	));
 
+	BP_ASSERT(test_expr("diag([1,2,3])", { 1, 0, 0,  0, 2, 0,  0, 0, 3 }, { 3,3 }));
+	BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, { 2 }));
+	BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, { 3 }));
+
 	BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2}));
 	BP_ASSERT(test_expr("floor(-3.5)", {-4}, {}));
 	BP_ASSERT(test_expr("ceil(-3.5)", {-3}, {}));

From ada89c0f86c55ebc149d3d355736f3dcd8d17773 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 20:50:34 +0200
Subject: [PATCH 17/31] FetchContent_Declare

---
 CMakeLists.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa6181a..9740d55 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,6 +5,8 @@ option(SANITIZER_ON "Whether to use AddressSanitizer (asan) in the DEBUG configu
 
 message(STATUS "CMakeLists.txt - BParser")
 
+include(FetchContent)
+
 # CLANG
 
 #set(CMAKE_CXX_FLAGS     "-std=c++14 -finline-hint-functions -pedantic-errors -Werror=pedantic -Wall -Wextra -Werror -Wno-long-long -Wno-strict-aliasing -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION")
@@ -176,7 +178,14 @@ message(STATUS "=======================================================")
 message(STATUS "====== EIGEN ==========================================")
 message(STATUS "=======================================================")
 
-find_package(Eigen3 CONFIG REQUIRED PATHS "/usr/local") #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
+#find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
+FetchContent_Declare(
+    Eigen
+    DOWNLOAD_EXTRACT_TIMESTAMP
+    URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
+    FIND_PACKAGE_ARGS NAMES Eigen3 #find_package(Eigen NAMES Eigen3)
+)
+FetchContent_MakeAvailable(Eigen)
 
 message(STATUS "-------------------------------------------------------")
 message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}")

From be6e36af9c8ebc5777dfa8a976090ea008e74230 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 21:06:16 +0200
Subject: [PATCH 18/31] Eigen install attempt 2

---
 CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9740d55..c648884 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,10 @@ message(STATUS "CMakeLists.txt - BParser")
 
 include(FetchContent)
 
+if(POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+endif()
+
 # CLANG
 
 #set(CMAKE_CXX_FLAGS     "-std=c++14 -finline-hint-functions -pedantic-errors -Werror=pedantic -Wall -Wextra -Werror -Wno-long-long -Wno-strict-aliasing -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION")
@@ -181,9 +185,9 @@ message(STATUS "=======================================================")
 #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
 FetchContent_Declare(
     Eigen
-    DOWNLOAD_EXTRACT_TIMESTAMP
     URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
-    FIND_PACKAGE_ARGS NAMES Eigen3 #find_package(Eigen NAMES Eigen3)
+    EXCLUDE_FROM_ALL
+    FIND_PACKAGE_ARGS NAMES Eigen3 #same as find_package(Eigen NAMES Eigen3)
 )
 FetchContent_MakeAvailable(Eigen)
 

From 9bd4c50b2e3cefe42ba31e98efcf8becd4b043e3 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 21:14:35 +0200
Subject: [PATCH 19/31] Eigen install attempt 3

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c648884..a4d69fb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,6 +182,7 @@ message(STATUS "=======================================================")
 message(STATUS "====== EIGEN ==========================================")
 message(STATUS "=======================================================")
 
+set(EIGEN_BUILD_CMAKE_PACKAGE TRUE)
 #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
 FetchContent_Declare(
     Eigen

From 18b139319f67808109358bed4789a56f99d37a20 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 21:26:55 +0200
Subject: [PATCH 20/31] Eigen install attempt 4

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a4d69fb..945530a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -187,7 +187,7 @@ set(EIGEN_BUILD_CMAKE_PACKAGE TRUE)
 FetchContent_Declare(
     Eigen
     URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
-    EXCLUDE_FROM_ALL
+    #EXCLUDE_FROM_ALL
     FIND_PACKAGE_ARGS NAMES Eigen3 #same as find_package(Eigen NAMES Eigen3)
 )
 FetchContent_MakeAvailable(Eigen)

From 5c8edbeebc946edc8564ef809b7108d17e48207e Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 21:32:31 +0200
Subject: [PATCH 21/31] Eigen install attempt 5

---
 CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 945530a..b7d67de 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -185,12 +185,12 @@ message(STATUS "=======================================================")
 set(EIGEN_BUILD_CMAKE_PACKAGE TRUE)
 #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
 FetchContent_Declare(
-    Eigen
+    Eigen3
     URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
-    #EXCLUDE_FROM_ALL
-    FIND_PACKAGE_ARGS NAMES Eigen3 #same as find_package(Eigen NAMES Eigen3)
+    EXCLUDE_FROM_ALL
+    FIND_PACKAGE_ARGS CONFIG REQUIRED #same as find_package(Eigen3 CONFIG REQUIRED)
 )
-FetchContent_MakeAvailable(Eigen)
+FetchContent_MakeAvailable(Eigen3)
 
 message(STATUS "-------------------------------------------------------")
 message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}")

From 188cc58efddb2f98cde061376c1d01750e8e6b81 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 21:58:47 +0200
Subject: [PATCH 22/31] Eigen install attempt 6, now via apt-get

---
 .github/workflows/ccpp.yml | 4 +---
 CMakeLists.txt             | 3 +--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 56deb1e..781c7f5 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -266,9 +266,7 @@ jobs:
           sudo apt-get install -y libboost-all-dev
 
           # install eigen
-          wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\
-          tar -xzf eigen-3.4.0.tar.gz &&\
-          sudo mv eigen-3.4.0 /usr/local
+          sudo apt install libeigen3-dev
           
           
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7d67de..ae5cbef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,13 +182,12 @@ message(STATUS "=======================================================")
 message(STATUS "====== EIGEN ==========================================")
 message(STATUS "=======================================================")
 
-set(EIGEN_BUILD_CMAKE_PACKAGE TRUE)
 #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL
 FetchContent_Declare(
     Eigen3
     URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
     EXCLUDE_FROM_ALL
-    FIND_PACKAGE_ARGS CONFIG REQUIRED #same as find_package(Eigen3 CONFIG REQUIRED)
+    FIND_PACKAGE_ARGS CONFIG #same as find_package(Eigen3 CONFIG)
 )
 FetchContent_MakeAvailable(Eigen3)
 

From f8fd43b339e99d025443b85df464b274efa5831c Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 22:32:27 +0200
Subject: [PATCH 23/31] forgot delete

---
 include/arena_alloc.hh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh
index 77b1824..0bde0b6 100644
--- a/include/arena_alloc.hh
+++ b/include/arena_alloc.hh
@@ -53,6 +53,7 @@ struct ArenaAlloc {
 		//align_free(base_);
 		if (buffer != nullptr) {
 			align_free(buffer);
+			delete arena;
 		}
 	}
 

From 9b8bd1b3bfbd969d2ecfdb6e328250b7bb97e76d Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Tue, 6 May 2025 23:04:34 +0200
Subject: [PATCH 24/31] PatchArena in test_speed and commented debug print

---
 include/array.hh   | 2 +-
 test/test_speed.cc | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index ce120f6..3a56236 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -884,7 +884,7 @@ public:
 
 		using namespace details;
 		Shape trg_shape = index.range_.target_shape();
-		std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl;
+		//std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl;
 		if (trg_shape.size() == 0) {
 			Throw() << "Attempted to wrap scalar into Eigen Matrix";
 		}
diff --git a/test/test_speed.cc b/test/test_speed.cc
index 2bdb7ce..6ac4fb4 100644
--- a/test/test_speed.cc
+++ b/test/test_speed.cc
@@ -23,6 +23,7 @@
 #include "test_tools.hh"
 
 #include "arena_alloc.hh"
+#include "arena_resource.hh"
 
 // Optimized structure, holds data in common arena
 struct ExprData {
@@ -31,7 +32,8 @@ struct ExprData {
 	{
 		uint simd_bytes = sizeof(double) * simd_size;
 
-		arena = std::make_shared<bparser::ArenaAlloc>(simd_bytes, 512 * 1012);
+		patch_arena = std::make_shared<AssemblyArena>(512 * 1012, simd_bytes);
+		arena = std::make_shared<bparser::ArenaAlloc>(*patch_arena);//(simd_bytes, 512 * 1012);
 		v1 = arena->create_array<double>(vec_size * 3);
 		fill_seq(v1, 100, 100 + 3 * vec_size);
 		v2 = arena->create_array<double>(vec_size * 3);
@@ -54,6 +56,7 @@ struct ExprData {
 	~ExprData()
 	{}
 
+	std::shared_ptr<PatchArena> patch_arena;
 	std::shared_ptr<bparser::ArenaAlloc> arena;
 	uint vec_size;
 	uint simd_size;
@@ -266,7 +269,7 @@ void test_expr(std::string expr, uint block_size, void (* func)(ExprData&)) {
 		//std::cout << "vres: " << vres << ", " << vres + block_size << ", " << vres + 2*vec_size << "\n";
 		//std::cout << "Symbols: " << print_vector(p.symbols()) << "\n";
 		//std::cout.flush();
-		p.compile(data1.arena);
+		p.compile(data1.patch_arena);
 
 		std::vector<uint> ss = std::vector<uint>(data1.subset, data1.subset+vec_size/simd_size);
 		p.set_subset(ss);

From 921a13e9394b829628172c2c8979fd15159d61fb Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Thu, 8 May 2025 17:29:47 +0200
Subject: [PATCH 25/31] tr - matrix trace

---
 include/array.hh        | 14 +++++++++++++-
 include/grammar.impl.hh |  1 +
 test/test_parser.cc     |  2 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/array.hh b/include/array.hh
index 3a56236..b590b27 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -1193,8 +1193,9 @@ public:
 	}
 
 	static Array diag(const Array& a) {
-		if (a.shape().size() == 0)
+		if (a.shape().size() == 0) {
 			return a;
+		}
 
 		if (a.shape().size() == 1) { // diag -> matrix
 			return unwrap_array(wrap_array(a).asDiagonal());
@@ -1204,6 +1205,17 @@ public:
 
 	}
 
+	static Array trace(const Array& a) {
+		if (a.shape().size() != 2) {
+			Throw() << "Function trace can only be used for matrices" << "\n";
+		}
+		Shape s; //empty Shape for scalar
+		Array r(s);
+		r.elements_[0U] = *wrap_array(a).trace();
+		return r;
+		//return full_({}, *wrap_array(a).trace());
+	}
+
 	static Array flatten(const Array &tensor) {
 		uint n_elements = shape_size(tensor.shape());
 		Shape res_shape(1, n_elements);
diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh
index e4ec1c7..8a5ad5c 100644
--- a/include/grammar.impl.hh
+++ b/include/grammar.impl.hh
@@ -180,6 +180,7 @@ struct grammar : qi::grammar<Iterator, ast::operand(), ascii::space_type> {
 			FN("minimum", binary_array<_min_>())
 			FN("maximum", binary_array<_max_>())
             FN("diag"   , &Array::diag)
+            FN("tr"     , &Array::trace)
             ;
 
         unary_op.add
diff --git a/test/test_parser.cc b/test/test_parser.cc
index 4d20bf1..28b3c20 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -259,6 +259,8 @@ void test_expression() {
 	BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, { 2 }));
 	BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, { 3 }));
 
+	BP_ASSERT(test_expr("tr([[1,9,9],[9,1,9],[9,9,1]])", { 3 }, {}));
+
 	BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2}));
 	BP_ASSERT(test_expr("floor(-3.5)", {-4}, {}));
 	BP_ASSERT(test_expr("ceil(-3.5)", {-3}, {}));

From 090c867eb81c00e5fd039bca3e5e0d24f922ef1b Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Wed, 15 Oct 2025 18:59:22 +0200
Subject: [PATCH 26/31] Alternative dot print which helps understand the DAG
 using any dot graph creator

---
 include/expression_dag.hh | 53 +++++++++++++++++++++++++++++++++++++++
 include/parser.hh         |  1 +
 2 files changed, 54 insertions(+)

diff --git a/include/expression_dag.hh b/include/expression_dag.hh
index da08bb5..2b44a47 100644
--- a/include/expression_dag.hh
+++ b/include/expression_dag.hh
@@ -102,6 +102,7 @@ public:
 
 	/**
 	 * Print ScalarExpression graph in the dot format.
+	 * Useful for debugging
 	 */
 	void print_in_dot() {
 		std::map<ScalarNodePtr , uint> i_node;
@@ -131,6 +132,58 @@ public:
 		std::cout << "Node: " << node->op_name_ <<  "_" << node->result_idx_ << " " << node->result_storage << std::endl;
 	}
 
+	/**
+	 * Print ScalarExpression graph in the common dot format.
+	 * Useful for understanding the DAG
+	 */
+	void print_in_dot2() {
+		sort_nodes();
+		
+		std::cout << "\n" << "----- begin cut here -----" << "\n";
+		std::cout << "digraph Expr {" << "\n";
+
+		std::cout << "/* definitions */" << "\n";
+		std::cout << "edge [dir=back]" << "\n";
+		for (uint i = 0; i < sorted.size(); ++i) {
+			_print_dot_node_definition(sorted[i]);
+		}
+		std::cout << "/* end of definitions */" << "\n";
+
+		for (uint i = 0; i < sorted.size(); ++i) {
+			for (uint in = 0; in < sorted[i]->n_inputs_; ++in) {
+				std::cout << "    ";
+				_print_dot_node(sorted[i]);
+				std::cout << "\n -> ";
+				_print_dot_node(sorted[i]->inputs_[in]);
+				std::cout << "\n\n";
+			}
+		}
+		std::cout << "}" << "\n";
+		std::cout << "-----  end cut here  -----" << "\n";
+		std::cout.flush();
+	}
+	void _print_dot_node(ScalarNodePtr  node) {
+		std::cout << node->op_name_ << "_" << (int)node.get() << "__" << node->result_storage;// << std::endl;
+	}
+
+	void _print_dot_node_definition(ScalarNodePtr  node) {
+		_print_dot_node(node);
+		std::cout << ' ';
+
+		if		(node->result_storage == ResultStorage::constant) {
+			std::cout << "[shape=circle,label=\"const " << *node->values_ << "\"]" << std::endl;
+		}
+		else if (node->result_storage == ResultStorage::constant_bool) {
+			std::cout << "[shape=circle,label=\"const " << *node->values_ << "\"]" << std::endl;
+		}
+		else if (node->result_storage == ResultStorage::expr_result) {
+			std::cout << "[shape=box,label=\"" << node->op_name_ << " " << node->result_idx_ << "\"]" << std::endl;
+		}
+		else {
+			std::cout << "[label=\"" << node->op_name_ << "\"]" << std::endl;
+		}
+	}
+
 
 private:
 	void _print_i_node(uint i) {
diff --git a/include/parser.hh b/include/parser.hh
index ca8bde1..ade2953 100644
--- a/include/parser.hh
+++ b/include/parser.hh
@@ -190,6 +190,7 @@ public:
 		details::ExpressionDAG se(result_array_.elements());
 
 		//se.print_in_dot();
+        //se.print_in_dot2();
 		processor = ProcessorBase::create_processor(se, max_vec_size, simd_size, arena);
     }
 

From cb9414e2a97cdce2772d349c661d44a661c3e6e9 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 19 Oct 2025 16:58:29 +0200
Subject: [PATCH 27/31] Better cast

---
 include/expression_dag.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/expression_dag.hh b/include/expression_dag.hh
index 2b44a47..64f95e3 100644
--- a/include/expression_dag.hh
+++ b/include/expression_dag.hh
@@ -163,7 +163,7 @@ public:
 		std::cout.flush();
 	}
 	void _print_dot_node(ScalarNodePtr  node) {
-		std::cout << node->op_name_ << "_" << (int)node.get() << "__" << node->result_storage;// << std::endl;
+		std::cout << node->op_name_ << "_" << (uintptr_t)node.get() << "__" << node->result_storage;// << std::endl;
 	}
 
 	void _print_dot_node_definition(ScalarNodePtr  node) {

From 95d91c38f64a1659c60b1ad6162c432d9a410c90 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Mon, 20 Oct 2025 19:03:11 +0200
Subject: [PATCH 28/31] Improved print_in_dot2. If the symbols_ map is
 supplied, it will also print the variable and constant names in dot

---
 include/expression_dag.hh | 130 +++++++++++++++++++++++++++++++++-----
 include/parser.hh         |   1 +
 2 files changed, 115 insertions(+), 16 deletions(-)

diff --git a/include/expression_dag.hh b/include/expression_dag.hh
index 64f95e3..b57cfc5 100644
--- a/include/expression_dag.hh
+++ b/include/expression_dag.hh
@@ -15,6 +15,7 @@
 #include "config.hh"
 #include "scalar_node.hh"
 #include "assert.hh"
+#include "array.hh"
 
 
 namespace bparser {
@@ -40,6 +41,8 @@ private:
 	/// Result nodes, given as input.
 	NodeVec results;
 
+	typedef std::pair<std::string, bool> InvDotNameAndScalar;
+	typedef std::map<ScalarNodePtr, InvDotNameAndScalar> InvDotMap;
 
 	/**
 	 * Used in the setup_result_storage to note number of unclosed nodes
@@ -134,27 +137,45 @@ public:
 
 	/**
 	 * Print ScalarExpression graph in the common dot format.
-	 * Useful for understanding the DAG
+	 * Useful for understanding the DAG.
 	 */
 	void print_in_dot2() {
+		print_in_dot2(InvDotMap());
+	}
+
+	/**
+	 * Print ScalarExpression graph in the common dot format.
+	 * Useful for understanding the DAG. Using the parser's map of var. Name -> Array find the inverse ScalarNodePtr -> var. Name
+	 */
+	void print_in_dot2(const std::map<std::string, bparser::Array>& symbols) {
+		print_in_dot2(create_inverse_map(symbols));
+	}
+
+	/**
+	 * Print ScalarExpression graph in the common dot format.
+	 * Useful for understanding the DAG. Using the map of ScalarNodePtr -> variableName
+	 */
+	void print_in_dot2(const InvDotMap& names) {
+
 		sort_nodes();
 		
 		std::cout << "\n" << "----- begin cut here -----" << "\n";
 		std::cout << "digraph Expr {" << "\n";
 
 		std::cout << "/* definitions */" << "\n";
+
 		std::cout << "edge [dir=back]" << "\n";
 		for (uint i = 0; i < sorted.size(); ++i) {
-			_print_dot_node_definition(sorted[i]);
+			_print_dot_node_definition(sorted[i],names);
 		}
 		std::cout << "/* end of definitions */" << "\n";
 
 		for (uint i = 0; i < sorted.size(); ++i) {
 			for (uint in = 0; in < sorted[i]->n_inputs_; ++in) {
 				std::cout << "    ";
-				_print_dot_node(sorted[i]);
+				_print_dot_node_id(sorted[i]);
 				std::cout << "\n -> ";
-				_print_dot_node(sorted[i]->inputs_[in]);
+				_print_dot_node_id(sorted[i]->inputs_[in]);
 				std::cout << "\n\n";
 			}
 		}
@@ -162,30 +183,107 @@ public:
 		std::cout << "-----  end cut here  -----" << "\n";
 		std::cout.flush();
 	}
-	void _print_dot_node(ScalarNodePtr  node) {
+	
+	//Create a map of ScalarNodePtr -> (variable name, is_scalar)
+	InvDotMap create_inverse_map(const std::map<std::string, bparser::Array>& symbols) const {
+		InvDotMap inv_map;
+		if (symbols.empty()) return inv_map;
+		for (const auto& s : symbols)
+		{
+			for (const auto& n : s.second.elements()) {
+				inv_map[n] = std::pair<std::string,bool>(s.first, s.second.shape().empty());
+			}
+		}
+		return inv_map;
+	}
+
+
+private:
+	//Print the vertice identifier for dot
+	void _print_dot_node_id(const ScalarNodePtr& node) const {
 		std::cout << node->op_name_ << "_" << (uintptr_t)node.get() << "__" << node->result_storage;// << std::endl;
 	}
 
-	void _print_dot_node_definition(ScalarNodePtr  node) {
-		_print_dot_node(node);
+	//Print how the vertice should look in dot
+	void _print_dot_node_definition(const ScalarNodePtr& node, const InvDotMap& invmap) const {
+		_print_dot_node_id(node);
 		std::cout << ' ';
 
-		if		(node->result_storage == ResultStorage::constant) {
-			std::cout << "[shape=circle,label=\"const " << *node->values_ << "\"]" << std::endl;
+		if (node->result_storage == ResultStorage::constant) {				// Constant
+			std::cout << "[shape=circle,";
+
+			try { //If the constant has a name
+				std::string name(invmap.at(node).first);
+				std::cout << "label=\"" << name << ": " << *node->values_ << "\",group=\"" << name << '"';
+			}
+			catch (std::out_of_range) { //No name
+				std::cout << "label=\"" << "const " << *node->values_ << '"';
+			}
+			std::cout  << "]" << std::endl;
+		}
+
+		else if (node->result_storage == ResultStorage::constant_bool) {	//Constant bool
+			std::cout << "[shape=circle,";
+
+			try { //If the constant has a name
+				std::string name(invmap.at(node).first);
+				std::cout << "label=\"" << name << ": " << *node->values_ << "\",group=\"" << name << '"';
+			}
+			catch (std::out_of_range) { //No name
+				std::cout << "label=\"" << "const " << *node->values_ << '"';
+			}
+			std::cout << "]" << std::endl;
 		}
-		else if (node->result_storage == ResultStorage::constant_bool) {
-			std::cout << "[shape=circle,label=\"const " << *node->values_ << "\"]" << std::endl;
+
+		else if (node->result_storage == ResultStorage::expr_result) {		//Result
+			std::cout << "[shape=box,label=\"" << node->op_name_ << " [" << node->result_idx_ << "]" << "\"]" << std::endl;
 		}
-		else if (node->result_storage == ResultStorage::expr_result) {
-			std::cout << "[shape=box,label=\"" << node->op_name_ << " " << node->result_idx_ << "\"]" << std::endl;
+
+		else if (node->result_storage == ResultStorage::value) {			// Value
+
+			std::cout << "[shape=circle,";
+			try {
+				std::string name(invmap.at(node).first);
+				bool scalar(invmap.at(node).second);
+				if (scalar) {
+					std::cout << "label=\"" << name << '"';
+				}
+				else {
+					std::cout << "label=<" << name << "<SUB>i</SUB>" << '>';
+				}
+				std::cout << ",group=\"" << name << '"';
+			}
+			catch (std::out_of_range) {
+				std::cout << "label=<<I>var</I>>";
+			}
+			
+			std::cout << "]" << std::endl;
+		}
+
+		else if (node->result_storage == ResultStorage::value_copy) {		//Value copy
+			std::cout << "[shape=circle,";
+			try {
+				std::string name(invmap.at(node).first);
+				bool scalar(invmap.at(node).second);
+				if (scalar) {
+					std::cout << "label=\"" << name << '"';
+				}
+				else {
+					std::cout << "label=<" << name << "<SUB>i</SUB>" << '>';
+				}
+				std::cout << ",group=\"" << name << '"';
+			}
+			catch (std::out_of_range) {
+				std::cout << "label=<<I>var_cp</I>>";
+			}
+			std::cout << "]" << std::endl;
 		}
-		else {
+
+		else {//Temporary & other											//Temporary & other
 			std::cout << "[label=\"" << node->op_name_ << "\"]" << std::endl;
 		}
 	}
 
-
-private:
 	void _print_i_node(uint i) {
 		std::cout << sorted[i]->op_name_ << "_" << i << "_"<< sorted[i]->result_idx_;
 	}
diff --git a/include/parser.hh b/include/parser.hh
index ade2953..781dbc8 100644
--- a/include/parser.hh
+++ b/include/parser.hh
@@ -191,6 +191,7 @@ public:
 
 		//se.print_in_dot();
         //se.print_in_dot2();
+        //se.print_in_dot2(symbols_);
 		processor = ProcessorBase::create_processor(se, max_vec_size, simd_size, arena);
     }
 

From 12b37e909f8d269efbbdb8630c3715eab8b2774e Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Mon, 20 Oct 2025 19:08:09 +0200
Subject: [PATCH 29/31] More appropriate catch blocks

---
 include/expression_dag.hh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/expression_dag.hh b/include/expression_dag.hh
index b57cfc5..ab32ccb 100644
--- a/include/expression_dag.hh
+++ b/include/expression_dag.hh
@@ -216,7 +216,7 @@ private:
 				std::string name(invmap.at(node).first);
 				std::cout << "label=\"" << name << ": " << *node->values_ << "\",group=\"" << name << '"';
 			}
-			catch (std::out_of_range) { //No name
+			catch (const std::out_of_range&) { //No name
 				std::cout << "label=\"" << "const " << *node->values_ << '"';
 			}
 			std::cout  << "]" << std::endl;
@@ -229,7 +229,7 @@ private:
 				std::string name(invmap.at(node).first);
 				std::cout << "label=\"" << name << ": " << *node->values_ << "\",group=\"" << name << '"';
 			}
-			catch (std::out_of_range) { //No name
+			catch (const std::out_of_range&) { //No name
 				std::cout << "label=\"" << "const " << *node->values_ << '"';
 			}
 			std::cout << "]" << std::endl;
@@ -253,7 +253,7 @@ private:
 				}
 				std::cout << ",group=\"" << name << '"';
 			}
-			catch (std::out_of_range) {
+			catch (const std::out_of_range&) {
 				std::cout << "label=<<I>var</I>>";
 			}
 			
@@ -273,7 +273,7 @@ private:
 				}
 				std::cout << ",group=\"" << name << '"';
 			}
-			catch (std::out_of_range) {
+			catch (const std::out_of_range&) {
 				std::cout << "label=<<I>var_cp</I>>";
 			}
 			std::cout << "]" << std::endl;

From b44c9d7afdf64972293df751b071aae683e626b3 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sat, 1 Nov 2025 17:23:38 +0100
Subject: [PATCH 30/31] Matrix and vector norms

---
 include/array.hh          | 87 +++++++++++++++++++++++++++++++++++++++
 include/grammar.impl.hh   |  4 ++
 include/scalar_wrapper.hh | 37 +++++++++++++----
 test/test_parser.cc       |  8 ++++
 4 files changed, 129 insertions(+), 7 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index b590b27..4f0c174 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -1216,6 +1216,93 @@ public:
 		//return full_({}, *wrap_array(a).trace());
 	}
 
+	static Array norm1(const Array& a) {
+		switch (a.shape().size()) {
+			case 0: //scalar
+				Throw() << "Norms are not for scalar values" << "\n";
+				break;
+			case 1: //vector
+			{
+
+				Shape s; //empty Shape for scalar
+				Array r(s);
+				r.elements_[0U] = *wrap_array(a).lpNorm<1>();
+				return r;
+			}
+			case 2: //matrix
+			{
+				Shape s; //empty Shape for scalar
+				Array r(s);
+				r.elements_[0U] = *wrap_array(a).colwise().lpNorm<1>().maxCoeff();
+				return r;
+			}
+			default:
+				Throw() << "Norms are not avaiable for ND tensors" << "\n";
+			}
+	}
+
+	static Array norm2(const Array& a) {
+		switch (a.shape().size()) {
+		case 0: //scalar
+			Throw() << "Norms are not for scalar values" << "\n";
+			break;
+		case 1: //vector
+		{
+			//Euclidean norm
+			Shape s; //empty Shape for scalar
+			Array r(s);
+			r.elements_[0U] = *wrap_array(a).norm();
+			return r;
+		}
+		case 2: //matrix
+		{
+			//Spectral norm
+			Throw() << "norm2(matrix) not yet implemented" << "\n";
+			Shape s; //empty Shape for scalar
+			Array r(s);
+			//r.elements_[0U] = *wrap_array(a);
+			return r;
+		}
+		default:
+			Throw() << "Norms are not avaiable for ND tensors" << "\n";
+		}
+	}
+
+	static Array normfro(const Array& a) {
+		if (a.shape().size() != 2) {
+			Throw() << "Frobenius norm is only defined for matrices" << "\n";
+		}
+
+		Shape s;
+		Array r(s);
+		r.elements_[0U] = *wrap_array(a).norm();
+		return r;
+	}
+
+	static Array norminf(const Array& a) {
+		switch (a.shape().size()) {
+			case 0: //scalar
+				Throw() << "Norms are not for scalar values" << "\n";
+				break;
+			case 1: //vector
+			{
+				Shape s; //empty Shape for scalar
+				Array r(s);
+				r.elements_[0U] = *wrap_array(a).lpNorm<Eigen::Infinity>();
+				return r;
+			}
+			case 2: //matrix
+			{
+				Shape s; //empty Shape for scalar
+				Array r(s);
+				r.elements_[0U] = *wrap_array(a).rowwise().lpNorm<1>().maxCoeff();
+				return r;
+			}
+			default:
+				Throw() << "Norms are not avaiable for ND tensors" << "\n";
+		}
+	}
+
 	static Array flatten(const Array &tensor) {
 		uint n_elements = shape_size(tensor.shape());
 		Shape res_shape(1, n_elements);
diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh
index 8a5ad5c..082592f 100644
--- a/include/grammar.impl.hh
+++ b/include/grammar.impl.hh
@@ -181,6 +181,10 @@ struct grammar : qi::grammar<Iterator, ast::operand(), ascii::space_type> {
 			FN("maximum", binary_array<_max_>())
             FN("diag"   , &Array::diag)
             FN("tr"     , &Array::trace)
+            FN("norm1"  , &Array::norm1)
+            FN("norm2"  , &Array::norm2)
+            FN("normfro", &Array::normfro)
+            FN("norminf", &Array::norminf)
             ;
 
         unary_op.add
diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh
index 54dda38..5273e6d 100644
--- a/include/scalar_wrapper.hh
+++ b/include/scalar_wrapper.hh
@@ -49,8 +49,7 @@ namespace bparser {
 			}
 
 			inline bool operator==(const ScalarWrapper& b) const {
-				if (((***this).result_storage == constant      && (**b).result_storage == constant     ) || 
-					((***this).result_storage == constant_bool && (**b).result_storage == constant_bool) )
+				if ((*this).is_constant() && (*this).have_same_result_storage(b))
 					return *(***this).values_ == *(**b).values_;
 				return false;
 			}
@@ -78,6 +77,14 @@ namespace bparser {
 		protected:
 			ScalarNodePtr node;
 
+			inline bool is_constant() const {
+				return (***this).result_storage == constant ||
+					   (***this).result_storage == constant_bool;
+			}
+
+			inline bool have_same_result_storage(const ScalarWrapper& b)const {
+				return (***this).result_storage == (**b).result_storage;
+			}
 
 		}; //ScalarWrapper
 
@@ -91,18 +98,25 @@ namespace bparser {
 		}													\
 		using std::OP;
 
-		/*
+#define BIN_OP(OP)																	\
+		inline ScalarWrapper OP(const ScalarWrapper& a,const ScalarWrapper& b) {	\
+			return ScalarWrapper::bin_op<_##OP##_>(a,b);							\
+		}																			\
+		using std::OP;
+
+		
 		UN_OP(abs)
 
 		//https://eigen.tuxfamily.org/dox/namespaceEigen.html#a54cc34b64b4935307efc06d56cd531df
 		inline ScalarWrapper abs2(const ScalarWrapper& s) {
 			return s*s;
-		};
-		*/
+		}
+		
 
-		//UN_OP(sqrt)
+		UN_OP(sqrt)
 		//UN_OP(exp)
 		//UN_OP(log)
+		//UN_OP(log2)
 		//UN_OP(log10)
 		//UN_OP(sin)
 		//UN_OP(sinh)
@@ -116,9 +130,18 @@ namespace bparser {
 		//UN_OP(ceil)
 		//UN_OP(floor)
 
+		BIN_OP(max)
+		inline ScalarWrapper maxi(const ScalarWrapper& a, const ScalarWrapper& b) {
+			return ScalarWrapper::bin_op<_max_>(a, b);
+		}
 
+		BIN_OP(min)
+		inline ScalarWrapper mini(const ScalarWrapper& a, const ScalarWrapper& b) {
+			return ScalarWrapper::bin_op<_min_>(a, b);
+		}
 
-
+		//BIN_OP(atan2)
+		//BIN_OP(pow)
 
 	} //details
 } //bparser
diff --git a/test/test_parser.cc b/test/test_parser.cc
index 28b3c20..df23f82 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -261,6 +261,14 @@ void test_expression() {
 
 	BP_ASSERT(test_expr("tr([[1,9,9],[9,1,9],[9,9,1]])", { 3 }, {}));
 
+	BP_ASSERT(test_expr("norm1([-4,-3,-2,-1,0,1,2,3,4])", {20}, {}));
+	BP_ASSERT(test_expr("norm1([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7 }, {}));
+	BP_ASSERT(test_expr("norm2([-4,-3,-2,-1,0,1,2,3,4])", { 7.745966692414834 }, {}));
+	//BP_ASSERT(test_expr("norm2([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.3484692283495345 }, {}));
+	BP_ASSERT(test_expr("normfro([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.745966692414834 }, {}));
+	BP_ASSERT(test_expr("norminf([-4,-3,-2,-1,0,1,2,3,4])", { 4 }, {}));
+	BP_ASSERT(test_expr("norminf([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 9 }, {}));
+
 	BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2}));
 	BP_ASSERT(test_expr("floor(-3.5)", {-4}, {}));
 	BP_ASSERT(test_expr("ceil(-3.5)", {-3}, {}));

From 69be136a871486f1369004034cac09e7b60ffa40 Mon Sep 17 00:00:00 2001
From: lukas-valenta-tul <lukas.valenta@tul.cz>
Date: Sun, 9 Nov 2025 19:26:46 +0100
Subject: [PATCH 31/31] Attempt at computing spectral norm using eigenvalues.
 Requires comparison operators and static cast to double, not possible for
 Bparser

---
 include/array.hh          | 14 +++++++----
 include/scalar_wrapper.hh | 50 ++++++++++++++++++++++++++++++++++++++-
 test/test_parser.cc       |  2 +-
 3 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/include/array.hh b/include/array.hh
index 4f0c174..cbd88fd 100644
--- a/include/array.hh
+++ b/include/array.hh
@@ -1257,11 +1257,17 @@ public:
 		case 2: //matrix
 		{
 			//Spectral norm
-			Throw() << "norm2(matrix) not yet implemented" << "\n";
-			Shape s; //empty Shape for scalar
+			Throw() << "norm2(matrix) is not yet possible" << "\n";
+			/*Shape s; //empty Shape for scalar
 			Array r(s);
-			//r.elements_[0U] = *wrap_array(a);
-			return r;
+
+			Eigen::MatrixX<details::ScalarWrapper> m( wrap_array(a) );
+
+			r.elements_[0U] = *details::sqrt((m.adjoint()*m).eigenvalues().real().maxCoeff());
+			//computing eigenvalues would require static cast to double and comparison operators (<,<=,>,>=,!=,==)
+			//something which we cannot support
+			return r;*/
+			break;
 		}
 		default:
 			Throw() << "Norms are not avaiable for ND tensors" << "\n";
diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh
index 5273e6d..8a07d46 100644
--- a/include/scalar_wrapper.hh
+++ b/include/scalar_wrapper.hh
@@ -12,6 +12,7 @@
 
 #include "scalar_node.hh"
 #include <Eigen/Core>
+//#include <Eigen/Eigenvalues> //impossible
 
 namespace bparser {
 	namespace details {
@@ -23,6 +24,10 @@ namespace bparser {
 			ScalarWrapper(double d) : node(ScalarNode::create_const(d)) { ; }
 			ScalarWrapper(ScalarNodePtr existing_ptr) : node(existing_ptr) { ; }
 
+			inline ScalarWrapper operator+() const {
+				return ScalarWrapper(*this);
+			}
+
 			inline ScalarWrapper operator-() const {
 				return un_op<_minus_>(*this);
 			}
@@ -36,14 +41,29 @@ namespace bparser {
 				return bin_op<_add_>(*this, b);
 			}
 
+			inline ScalarWrapper& operator-=(const ScalarWrapper& b) {
+				node = bin_op<_sub_>(*this, b).get();
+				return *this;
+			}
+
 			inline ScalarWrapper operator-(const ScalarWrapper& b) const {
 				return bin_op<_sub_>(*this, b);
 			}
 
+			inline ScalarWrapper& operator*=(const ScalarWrapper& b) {
+				node = bin_op<_mul_>(*this, b).get();
+				return *this;
+			}
+
 			inline ScalarWrapper operator*(const ScalarWrapper& b) const {
 				return bin_op<_mul_>(*this, b);
 			}
 
+			inline ScalarWrapper& operator/=(const ScalarWrapper& b) {
+				node = bin_op<_div_>(*this, b).get();
+				return *this;
+			}
+
 			inline ScalarWrapper operator/(const ScalarWrapper& b) const {
 				return bin_op<_div_>(*this, b);
 			}
@@ -53,6 +73,34 @@ namespace bparser {
 					return *(***this).values_ == *(**b).values_;
 				return false;
 			}
+			/* These do not make any sense with what we are trying to achieve
+			inline bool operator!=(const ScalarWrapper& b) const {
+				return !((*this) == b);
+			}
+
+			inline bool operator<(const ScalarWrapper& b) const {
+				if ((*this).is_constant() && (*this).have_same_result_storage(b))
+					return *(***this).values_ < *(**b).values_;
+				return false;
+			}
+
+			inline bool operator<=(const ScalarWrapper& b) const {
+				if ((*this).is_constant() && (*this).have_same_result_storage(b))
+					return *(***this).values_ <= *(**b).values_;
+				return false;
+			}
+
+			inline bool operator>=(const ScalarWrapper& b) const {
+				if ((*this).is_constant() && (*this).have_same_result_storage(b))
+					return *(***this).values_ >= *(**b).values_;
+				return false;
+			}
+
+			inline bool operator>(const ScalarWrapper& b) const {
+				if ((*this).is_constant() && (*this).have_same_result_storage(b))
+					return *(***this).values_ > *(**b).values_;
+				return false;
+			}*/
 
 
 			inline ScalarNodePtr operator*() const { //dereference
@@ -111,7 +159,7 @@ namespace bparser {
 		inline ScalarWrapper abs2(const ScalarWrapper& s) {
 			return s*s;
 		}
-		
+	
 
 		UN_OP(sqrt)
 		//UN_OP(exp)
diff --git a/test/test_parser.cc b/test/test_parser.cc
index df23f82..e226692 100644
--- a/test/test_parser.cc
+++ b/test/test_parser.cc
@@ -264,7 +264,7 @@ void test_expression() {
 	BP_ASSERT(test_expr("norm1([-4,-3,-2,-1,0,1,2,3,4])", {20}, {}));
 	BP_ASSERT(test_expr("norm1([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7 }, {}));
 	BP_ASSERT(test_expr("norm2([-4,-3,-2,-1,0,1,2,3,4])", { 7.745966692414834 }, {}));
-	//BP_ASSERT(test_expr("norm2([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.3484692283495345 }, {}));
+	//BP_ASSERT(test_expr("norm2([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.3484692283495345 }, {})); //Spectral norm uses eigenvalues/singular values. Eigen uses comparison operators in the algorithm. Bparser does not like that
 	BP_ASSERT(test_expr("normfro([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.745966692414834 }, {}));
 	BP_ASSERT(test_expr("norminf([-4,-3,-2,-1,0,1,2,3,4])", { 4 }, {}));
 	BP_ASSERT(test_expr("norminf([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 9 }, {}));