diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index fcb5beb..781c7f5 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -265,6 +265,8 @@ jobs: # Bparser dependency sudo apt-get install -y libboost-all-dev + # install eigen + sudo apt install libeigen3-dev diff --git a/CMakeLists.txt b/CMakeLists.txt index 627b7e3..ae5cbef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,12 @@ option(SANITIZER_ON "Whether to use AddressSanitizer (asan) in the DEBUG configu message(STATUS "CMakeLists.txt - BParser") +include(FetchContent) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + # CLANG #set(CMAKE_CXX_FLAGS "-std=c++14 -finline-hint-functions -pedantic-errors -Werror=pedantic -Wall -Wextra -Werror -Wno-long-long -Wno-strict-aliasing -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION") @@ -160,7 +166,7 @@ if (NOT Boost_FOUND) if (NOT EXTERNAL_PROJECT_DIR) unset(BOOST_ROOT) endif() - find_package( Boost 1.58.0 REQUIRED) + find_package( Boost 1.70.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED) endif() message(STATUS "-------------------------------------------------------") @@ -169,11 +175,31 @@ message(STATUS "BOOST_ROOT = ${BOOST_ROOT}") message(STATUS "Boost_LIBRARIES = ${Boost_LIBRARIES}") message(STATUS "Boost_LIBRARY_DIRS = ${Boost_LIBRARY_DIRS}") message(STATUS "Boost_INCLUDE_DIR = ${Boost_INCLUDE_DIR}") +message(STATUS "=======================================================\n") + +#Eigen +message(STATUS "=======================================================") +message(STATUS "====== EIGEN ==========================================") +message(STATUS "=======================================================") + +#find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL +FetchContent_Declare( + Eigen3 + URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz + EXCLUDE_FROM_ALL + FIND_PACKAGE_ARGS CONFIG #same as find_package(Eigen3 CONFIG) +) +FetchContent_MakeAvailable(Eigen3) + +message(STATUS "-------------------------------------------------------") +message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}") +message(STATUS "Eigen3_DIR = ${Eigen3_DIR}") +message(STATUS "EIGEN3_INCLUDE_DIR = ${EIGEN3_INCLUDE_DIR}") message(STATUS "=======================================================\n\n") message(STATUS "VCL2_INCLUDE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2") -set(BPARSER_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include ${Boost_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2) +set(BPARSER_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include ${Boost_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2 ${EIGEN3_INCLUDE_DIR}) if(NOT PROJECT_IS_TOP_LEVEL) set(BPARSER_INCLUDES ${BPARSER_INCLUDES} PARENT_SCOPE) endif() @@ -198,6 +224,8 @@ add_library(bparser SHARED ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_AVX512.cc ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_double.cc ) +target_link_libraries(bparser Eigen3::Eigen) #Interface library, includes the header files +set_target_properties(bparser PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DBPARSER_DLL") @@ -259,6 +287,6 @@ endmacro() define_test(test_parser bparser) define_test(test_array) define_test(test_grammar bparser) -define_test(test_processor) +define_test(test_processor bparser) #is it broken? -LV define_test(test_speed bparser) define_test(test_simd) diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh index 31196ab..0bde0b6 100644 --- a/include/arena_alloc.hh +++ b/include/arena_alloc.hh @@ -12,6 +12,7 @@ #include #include #include "aligned_alloc.hh" +#include "arena_resource.hh" namespace bparser { @@ -21,55 +22,86 @@ inline size_t align_size(size_t al, size_t size) { } struct ArenaAlloc { + + + //Creates a wrapper of PatchArena for backwards compatibility with BParser + ArenaAlloc(PatchArena& existing_arena) : arena(&existing_arena),buffer(nullptr) { + ; + } + //Creates a wrapper with a new PatchArena with the specified memory alignment and size + //However AssemblyArena might be the correct class to create ArenaAlloc(std::size_t alignment, std::size_t size) - : alignment_(alignment), - size_(0) + //: alignment_(alignment), + // size_(0) { - size_ = align_size(alignment_, size); + size_t size_ = align_size(alignment, size); + buffer = align_alloc(alignment, size_); + arena = new PatchArena(buffer, size_, alignment); + /*size_ = align_size(alignment_, size); base_ = (char*)align_alloc(alignment_, size_); BP_ASSERT(base_ != nullptr); ptr_ = base_; //std::cout << "arena begin: " << (void *)base_ << " end: " << end() << std::endl; + */ } ~ArenaAlloc() { destroy(); } - void destroy() { - align_free(base_); + inline void destroy() { + //align_free(base_); + if (buffer != nullptr) { + align_free(buffer); + delete arena; + } } - void *end() { + /*void* end() { return base_ + size_; - } + }*/ - void * allocate(std::size_t size) { + inline void* allocate(std::size_t size) { + /* size = align_size(alignment_, size); void * ptr = ptr_; ptr_ += size; BP_ASSERT(ptr_ <= end()); //std::cout << "allocated: " << ptr << " end: " << (void *)ptr_ << " aend: " << end() << "\n"; return ptr; + */ + return arena->allocate(size); + } template - T * create(Args&&... args) { + T* create(Args&&... args) { + void * ptr = allocate(sizeof(T)); return new (ptr) T(std::forward(args)...); + } template - T * create_array(uint n_items) { + T* create_array(uint n_items) { + /* void * ptr = allocate(sizeof(T) * n_items); return new (ptr) T[n_items]; + */ + return arena->allocate_simd(n_items); } - - std::size_t alignment_; - std::size_t size_; - char * base_; - char * ptr_; + inline std::size_t get_size() const { + return arena->get_size(); + } + + //std::size_t alignment_; + //std::size_t size_; + //char * base_; + //char * ptr_; +protected: + PatchArena* arena; + void* buffer; }; } // namespace bparser diff --git a/include/arena_resource.hh b/include/arena_resource.hh new file mode 100644 index 0000000..a01c847 --- /dev/null +++ b/include/arena_resource.hh @@ -0,0 +1,184 @@ +/*! + * + * Copyright (C) 2015 Technical University of Liberec. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 3 as published by the + * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html) + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * + * @file arena_resource.hh + */ + +#ifndef ARENA_RESOURCE_HH_ +#define ARENA_RESOURCE_HH_ + +#include +#include +#include +#include +#include // !! Use Flow exception mechanism + +//#include "system/asserts.hh" +#include "assert.hh" + + +// Final proposal of Arena +// TODO shared_ptr out of class, pass pointer to data, describe how to use +template +class PatchArenaResource : public std::pmr::memory_resource { +protected: + /// Returns different upstream resource in debug / release mode + static inline std::pmr::memory_resource* upstream_resource() { +#ifdef DEBUG + return std::pmr::null_memory_resource(); +#else + return std::pmr::get_default_resource(); +#endif + } + +public: + //DECLARE_EXCEPTION( ExcArenaAllocation, + // << "Allocation of ArenaResource failed. Please check if correct type of upstream is used."); +#define EXC_ARENA_ALLOCATION "Allocation of ArenaResource failed. Please check if correct type of upstream is used." + + /// Same as previous but doesn't construct buffer implicitly. + PatchArenaResource(void *buffer, size_t buffer_size, size_t simd_alignment, std::pmr::memory_resource* upstream = PatchArenaResource::upstream_resource()) + : upstream_( upstream ), + buffer_(buffer), + buffer_size_(buffer_size), + resource_(buffer_, buffer_size, upstream_), + simd_alignment_(simd_alignment), + full_data_(false) + { + //ASSERT_PERMANENT_EQ( (buffer_size%simd_alignment), 0 ); + BP_ASSERT( (buffer_size % simd_alignment) == 0 ); + } + + + ~PatchArenaResource() = default; // virtual, call destructor buffer_ = default_resource, (resource_) + + /// Compute and print free space and used space of arena buffer. Development method + inline void print_space() { + void *p = this->raw_allocate(1, simd_alignment_); + size_t used_size = (char *)p - (char *)buffer_; + size_t free_space = buffer_size_ - used_size; + std::cout << "Allocated space of arena is " << used_size << " B, free space is " << free_space << " B." << std::endl; + } + + + /// Getter for resource + Resource &resource() { + return resource_; + } + + /// Allocate and return data pointer of n_item array of type T (alignment to length 8 bytes) + template + T* allocate_8(size_t n_items) { + size_t bytes = sizeof(T) * n_items; + return (T*)this->raw_allocate(bytes, 8); + } + + /// Allocate and return data pointer of n_item array of type T (alignment to length given by simd_alignment constructor argument) + template + T* allocate_simd(size_t n_items) { + size_t bytes = sizeof(T) * n_items; + return (T*)this->raw_allocate(bytes, simd_alignment_); + } + + // Reset allocated data + void reset() { + resource_.release(); + full_data_ = false; +#ifdef DEBUG + char *c_buffer = (char *)buffer_; + for (size_t i=0; ideallocate(p, bytes, alignment); + } + + /// Override do_is_equal for memory resource comparison + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + return this == &other; + } + + std::pmr::memory_resource* upstream_; ///< Pointer to upstream + void* buffer_; ///< Pointer to buffer + size_t buffer_size_; ///< Size of buffer + Resource resource_; ///< Resource of arena + size_t simd_alignment_; ///< Size of SIMD alignment + bool full_data_; ///< Flag signs full data (child arena is created) +}; + + +template +class AssemblyArenaResource : public PatchArenaResource { +public: + /// Constructor. Creates assembly arena + AssemblyArenaResource(size_t buffer_size, size_t simd_alignment, std::pmr::memory_resource* upstream = PatchArenaResource::upstream_resource()) + : PatchArenaResource( std::pmr::get_default_resource()->allocate(buffer_size, simd_alignment), buffer_size, simd_alignment, upstream ) {} + + virtual ~AssemblyArenaResource() { + this->do_deallocate(this->buffer_, this->buffer_size_, this->simd_alignment_); + } + + /** + * Create and return child arena. + * + * Child arena is created in free space of actual arena. + * Actual arena is marked as full (flag full_data_) and cannot allocate new data. + */ + PatchArenaResource *get_child_arena() { + void *p = this->raw_allocate(1, this->simd_alignment_); + size_t used_size = (char *)p - (char *)this->buffer_; + size_t free_space = this->buffer_size_ - used_size; + this->full_data_ = true; + return new PatchArenaResource(p, free_space, this->simd_alignment_); + } + + +}; + + + +using AssemblyArena = AssemblyArenaResource; +using PatchArena = PatchArenaResource; + + +#endif /* ARENA_RESOURCE_HH_ */ diff --git a/include/array.hh b/include/array.hh index 490118f..cbd88fd 100644 --- a/include/array.hh +++ b/include/array.hh @@ -12,10 +12,10 @@ #include #include #include -#include #include #include "config.hh" #include "scalar_node.hh" +#include "scalar_wrapper.hh" //#include "test_tools.hh" namespace bparser { @@ -860,6 +860,81 @@ public: } + + //Wraps the ScalarNodes of an Array into an Eigen Matrix of ScalarWrappers. + //Vectors will be column vectors. Eigen does not support vectors without orientation. + //Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor + static Eigen::MatrixX wrap_array(const bparser::Array& a) { + MultiIdx idx(a.range()); + return wrap_array(a, idx); + } + + //Wraps the ScalarNodes of an Array accessed via MultiIdx.idx_trg() created from supplied MultiIdxRange into an Eigen Matrix of ScalarWrapper + //Vectors will be column vectors. Eigen does not support vectors without orientation. + //Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor + static Eigen::MatrixX wrap_array(const bparser::Array& a, MultiIdxRange& range) { + MultiIdx idx (range); + return wrap_array(a, idx); + } + + //Wraps the ScalarNodes of an Array accessed via MultiIdx.idx_trg() into an Eigen Matrix of ScalarWrapper + //Vectors will be column vectors. Eigen does not support vectors without orientation. + //Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor + static Eigen::MatrixX wrap_array(const bparser::Array& a, MultiIdx& index) { + + using namespace details; + Shape trg_shape = index.range_.target_shape(); + //std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl; + if (trg_shape.size() == 0) { + Throw() << "Attempted to wrap scalar into Eigen Matrix"; + } + if (trg_shape.size() == 1) { + uint len = trg_shape[0]; + Eigen::VectorX v(len); + for (uint i = 0; i < len && index.valid(); i++, index.inc_trg()) { + v(i) = ScalarWrapper(a[index]); + } + return v; + } + else {// (a.shape().size() > 2) { + uint rows = *(trg_shape.end() - 2); + uint cols = *(trg_shape.end() - 1); + + Eigen::MatrixX m(rows, cols); + for (uint row = 0; row < rows; row++) { + for (uint col = 0; col < cols && index.valid(); col++, index.inc_trg()) { + m(row, col) = ScalarWrapper(a[index]); + } + } + return m; + } + } + + //Creates an Array of ScalarNodes from an Eigen Matrix of ScalarWrappers + // make_vector - Will reduce the Array shape if Matrix is actually a Vector. Shape:(x,1) -> (x); (1,y) -> (y) + static bparser::Array unwrap_array(const Eigen::MatrixX& m, const bool make_vector = false) { + using namespace details; + + if (make_vector && (m.rows() == 1 || m.cols() == 1)) { + Array a({ (uint)std::max(m.rows(),m.cols()) }); + MultiIdx index(a.range()); + for (uint i = 0; i < a.shape()[0]; i++, index.inc_src()) { + a.elements_[index.idx_src()] = m(i).get(); + } + return a; + } + else { + Array a({ (uint)m.rows(), (uint)m.cols() }); + MultiIdx index(a.range()); + for (uint row = 0; row < a.shape()[0]; row++) { + for (uint col = 0; col < a.shape()[1]; col++, index.inc_src()) { + a.elements_[index.idx_src()] = m(row, col).get(); + } + } + return a; + } + } + /** * Numpy.matmul: * @@ -867,7 +942,7 @@ public: * b has shape (..., i,j, l,m) * result has shape (..., i,j, k,m) */ - static Array mat_mult(const Array &a, const Array &b) { + static Array mat_mult_old(const Array& a, const Array& b) { //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; if (a.shape().size() == 0) @@ -919,44 +994,45 @@ public: MultiIdx a_idx(a_range); MultiIdx b_idx(b_range); // allocated MultiIdx result_idx(result_range); -/* - std::cout << "a_idx, shp: " << print_vector(a_idx.range_.full_shape_) << "\n"; - std::cout << "b_idx, shp: " << print_vector(b_idx.range_.full_shape_) << "\n"; - std::cout << "r_idx, shp: " << print_vector(result_idx.range_.full_shape_) << "\n"; -*/ + /* + std::cout << "a_idx, shp: " << print_vector(a_idx.range_.full_shape_) << "\n"; + std::cout << "b_idx, shp: " << print_vector(b_idx.range_.full_shape_) << "\n"; + std::cout << "r_idx, shp: " << print_vector(result_idx.range_.full_shape_) << "\n"; + */ ScalarNodePtr sum; Array result(result_shape); - for(;result_idx.valid();) { + for (; result_idx.valid();) { sum = nullptr; a_idx.reset_indices(result_idx); b_idx.reset_indices(result_idx); - for(;a_idx.valid();) { -/* - std::cout << "a_idx: " << print_vector(a_idx.indices()) << " didx: " - << a_idx.src_idx() << "\n"; - std::cout << "b_idx: " << print_vector(b_idx.indices()) << " didx: " - << b_idx.src_idx() << "\n"; -*/ + for (; a_idx.valid();) { + /* + std::cout << "a_idx: " << print_vector(a_idx.indices()) << " didx: " + << a_idx.src_idx() << "\n"; + std::cout << "b_idx: " << print_vector(b_idx.indices()) << " didx: " + << b_idx.src_idx() << "\n"; + */ ScalarNodePtr mult = details::ScalarNode::create( - a.elements_[a_idx.idx_src()], - b.elements_[b_idx.idx_src()]); + a.elements_[a_idx.idx_src()], + b.elements_[b_idx.idx_src()]); if (sum == nullptr) { sum = mult; - } else { + } + else { // TODO: how to use inplace operations correctly ?? sum = details::ScalarNode::create(sum, mult); } //std::cout << "aidx "; - a_idx.inc_trg(-1,1, false); + a_idx.inc_trg(-1, 1, false); //std::cout << "bidx "; - b_idx.inc_trg(-1,1, false); + b_idx.inc_trg(-1, 1, false); BP_ASSERT(a_idx.valid() == b_idx.valid()); } -/* - std::cout << "r_idx: " << print_vector(result_idx.indices()) << " didx: " - << result_idx.src_idx() << "\n"; -*/ + /* + std::cout << "r_idx: " << print_vector(result_idx.indices()) << " didx: " + << result_idx.src_idx() << "\n"; + */ result.elements_[result_idx.idx_src()] = sum; @@ -967,6 +1043,134 @@ public: auto final_range = MultiIdxRange(result.shape()).full(); + //std::cout << " raw res: "<< print_vector(result_shape); + if (b.shape().size() == 1 && *(result_shape.end() - 1) == 1) { + // cut -1 axis + //std::cout << " b cut: "<< result_shape.size()-1 << "\n"; + final_range.remove_target_axis(result_shape.size() - 1); + } + BP_ASSERT(*(result_shape.end() - 2) == 1); + //std::cout << " r cut: "<< result_shape.size()-2 << "\n"; + final_range.remove_target_axis(result_shape.size() - 2); + // cut -2 axis always + if (a.shape().size() == 1 && *(result_shape.end() - 3) == 1) { + // cut -3 axis + // std::cout << " a cut: "<< result_shape.size()-3 << "\n"; + final_range.remove_target_axis(result_shape.size() - 3); + } + // std::cout << " final res: " << print_vector(final_range.sub_shape()) << "\n"; + return Array(result, final_range); + + } + + /** + * Numpy.matmul: + * + * a has shape (..., i,j, k,l) + * b has shape (..., i,j, l,m) + * result has shape (..., i,j, k,m) + */ + static Array mat_mult(const Array &a, const Array &b) { + //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; + + //std::cout << "Shape: ---------" << std::endl; + //std::cout << print_shape(a.shape()) << std::endl; + //std::cout << print_shape(b.shape()) << std::endl; + + if (a.shape().size() == 0) + Throw() << "Matmult can not multiply by scalar a." << "\n"; + if (b.shape().size() == 0) + Throw() << "Matmult can not multiply by scalar b." << "\n"; + + Shape a_shape = a.shape(); + if (a_shape.size() == 1) { + a_shape.insert(a_shape.begin(), 1); + // shape (l) -> (1,l) + } + + + Shape b_shape = b.shape(); + if (b_shape.size() == 1) { + b_shape.push_back(1); + // shape (l) -> (l,1) + } + + + uint a_cols = *(a_shape.end() - 1), b_rows = *(b_shape.end() - 2); + + if (a_cols != b_rows) { // l != l + Throw() << "Matmult summing dimension mismatch: " << a_cols << " != " << b_rows << "\n"; + } + + //Add for common shape + a_shape.insert(a_shape.end(), 1); + // a_shape : (...,i,j,k,l,1) + b_shape.insert(b_shape.end() - 2, 1); + // b_shape : (...,i,j,1,l,m) + + + Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape)); + // r_shape (..., i,j,k,l,m) + MultiIdxRange a_range(MultiIdxRange(a_shape).full().broadcast(result_shape)); + // a_shape (..., 1,1,k,l,1) -> (...,i,j,k,l,1) + MultiIdxRange b_range(MultiIdxRange(b_shape).full().broadcast(result_shape)); + // b_shape (..., 1,1,1,l,m) -> (...,i,j,1,l,m) + + //Remove for computation + a_range.target_transpose_.erase(a_range.target_transpose_.end() - 1); + // a_shape (..., i,j,k,l, ) + b_range.target_transpose_.erase(b_range.target_transpose_.end() - 3); + // b_shape (..., i,j, ,l,m) + result_shape.erase(result_shape.end() - 2); + // r_shape (..., i,j,k, ,m) + + //std::cout << print_shape(result_shape) << std::endl; + + Array result(result_shape); + bool should_transpose = a.shape().size() == 1; + + for (MultiIdx + result_idx(result.range()), + a_idx(a_range), + b_idx(b_range); result_idx.valid(); ) { + + Eigen::MatrixX m_a = wrap_array(a, a_idx); + Eigen::MatrixX m_b = wrap_array(b, b_idx); + + Array matmult = unwrap_array(m_a * m_b); + + for (MultiIdx mult_idx(matmult.range()); mult_idx.valid(); mult_idx.inc_src(), result_idx.inc_src()) { + result.elements_[result_idx.idx_src()] = matmult[mult_idx]; + } + } + + MultiIdxRange final_range(result.range()); + if (b.shape().size() == 1) { + final_range.remove_target_axis(absolute_idx(-1, result_shape.size())); + // shape (..., i,j,k,1) -> ...,j,k) + } + if (a.shape().size() == 1) { + final_range.remove_target_axis(absolute_idx(-2, result_shape.size())); + // shape (..., i,j,1,m) -> ...,j,m) + } + + return Array(result,final_range); + /* + auto m_a = wrap_array(a); + auto m_b = wrap_array(b); + + if (a.shape().size() == 1) { //is vector + m_a = m_a.transpose(); //colvec -> rowvec + } + + if (m_a.cols() != m_b.rows()) + Throw() << "Matmult summing dimension mismatch: " << m_a.cols() << " != " << m_b.rows() << "\n"; + + return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1));*/ + //Shape result_shape = result.shape(); + + /*auto final_range = MultiIdxRange(result.shape()).full(); + //std::cout << " raw res: "<< print_vector(result_shape); if (b.shape().size() == 1 && *(result_shape.end() - 1) == 1 ) { // cut -1 axis @@ -984,7 +1188,125 @@ public: } // std::cout << " final res: " << print_vector(final_range.sub_shape()) << "\n"; return Array(result, final_range); + */ + + } + static Array diag(const Array& a) { + if (a.shape().size() == 0) { + return a; + } + + if (a.shape().size() == 1) { // diag -> matrix + return unwrap_array(wrap_array(a).asDiagonal()); + } + // matrix -> diag + return unwrap_array(wrap_array(a).diagonal(),true); + + } + + static Array trace(const Array& a) { + if (a.shape().size() != 2) { + Throw() << "Function trace can only be used for matrices" << "\n"; + } + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).trace(); + return r; + //return full_({}, *wrap_array(a).trace()); + } + + static Array norm1(const Array& a) { + switch (a.shape().size()) { + case 0: //scalar + Throw() << "Norms are not for scalar values" << "\n"; + break; + case 1: //vector + { + + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).lpNorm<1>(); + return r; + } + case 2: //matrix + { + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).colwise().lpNorm<1>().maxCoeff(); + return r; + } + default: + Throw() << "Norms are not avaiable for ND tensors" << "\n"; + } + } + + static Array norm2(const Array& a) { + switch (a.shape().size()) { + case 0: //scalar + Throw() << "Norms are not for scalar values" << "\n"; + break; + case 1: //vector + { + //Euclidean norm + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).norm(); + return r; + } + case 2: //matrix + { + //Spectral norm + Throw() << "norm2(matrix) is not yet possible" << "\n"; + /*Shape s; //empty Shape for scalar + Array r(s); + + Eigen::MatrixX m( wrap_array(a) ); + + r.elements_[0U] = *details::sqrt((m.adjoint()*m).eigenvalues().real().maxCoeff()); + //computing eigenvalues would require static cast to double and comparison operators (<,<=,>,>=,!=,==) + //something which we cannot support + return r;*/ + break; + } + default: + Throw() << "Norms are not avaiable for ND tensors" << "\n"; + } + } + + static Array normfro(const Array& a) { + if (a.shape().size() != 2) { + Throw() << "Frobenius norm is only defined for matrices" << "\n"; + } + + Shape s; + Array r(s); + r.elements_[0U] = *wrap_array(a).norm(); + return r; + } + + static Array norminf(const Array& a) { + switch (a.shape().size()) { + case 0: //scalar + Throw() << "Norms are not for scalar values" << "\n"; + break; + case 1: //vector + { + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).lpNorm(); + return r; + } + case 2: //matrix + { + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).rowwise().lpNorm<1>().maxCoeff(); + return r; + } + default: + Throw() << "Norms are not avaiable for ND tensors" << "\n"; + } } static Array flatten(const Array &tensor) { diff --git a/include/config.hh b/include/config.hh index d1db252..91cf3fe 100644 --- a/include/config.hh +++ b/include/config.hh @@ -33,9 +33,13 @@ typedef unsigned int uint; #endif #if defined(_WIN32) -# define EXPORT __declspec(dllexport) +# if defined(BPARSER_DLL) +# define EXPORT __declspec(dllexport) +# else +# define EXPORT __declspec(dllimport) +# endif #else -#define EXPORT +# define EXPORT #endif #if defined(_WIN32) diff --git a/include/create_processor.hh b/include/create_processor.hh index a4f10e4..fecb051 100644 --- a/include/create_processor.hh +++ b/include/create_processor.hh @@ -25,7 +25,7 @@ namespace bparser{ } } - ProcessorBase * ProcessorBase::create_processor(ExpressionDAG &se, uint vector_size, uint simd_size, ArenaAllocPtr arena) { + ProcessorBase * ProcessorBase::create_processor(ExpressionDAG &se, uint vector_size, uint simd_size, PatchArenaPtr arena) { if (simd_size == 0) { simd_size = get_simd_size(); } diff --git a/include/expression_dag.hh b/include/expression_dag.hh index da08bb5..ab32ccb 100644 --- a/include/expression_dag.hh +++ b/include/expression_dag.hh @@ -15,6 +15,7 @@ #include "config.hh" #include "scalar_node.hh" #include "assert.hh" +#include "array.hh" namespace bparser { @@ -40,6 +41,8 @@ private: /// Result nodes, given as input. NodeVec results; + typedef std::pair InvDotNameAndScalar; + typedef std::map InvDotMap; /** * Used in the setup_result_storage to note number of unclosed nodes @@ -102,6 +105,7 @@ public: /** * Print ScalarExpression graph in the dot format. + * Useful for debugging */ void print_in_dot() { std::map i_node; @@ -131,8 +135,155 @@ public: std::cout << "Node: " << node->op_name_ << "_" << node->result_idx_ << " " << node->result_storage << std::endl; } + /** + * Print ScalarExpression graph in the common dot format. + * Useful for understanding the DAG. + */ + void print_in_dot2() { + print_in_dot2(InvDotMap()); + } + + /** + * Print ScalarExpression graph in the common dot format. + * Useful for understanding the DAG. Using the parser's map of var. Name -> Array find the inverse ScalarNodePtr -> var. Name + */ + void print_in_dot2(const std::map& symbols) { + print_in_dot2(create_inverse_map(symbols)); + } + + /** + * Print ScalarExpression graph in the common dot format. + * Useful for understanding the DAG. Using the map of ScalarNodePtr -> variableName + */ + void print_in_dot2(const InvDotMap& names) { + + sort_nodes(); + + std::cout << "\n" << "----- begin cut here -----" << "\n"; + std::cout << "digraph Expr {" << "\n"; + + std::cout << "/* definitions */" << "\n"; + + std::cout << "edge [dir=back]" << "\n"; + for (uint i = 0; i < sorted.size(); ++i) { + _print_dot_node_definition(sorted[i],names); + } + std::cout << "/* end of definitions */" << "\n"; + + for (uint i = 0; i < sorted.size(); ++i) { + for (uint in = 0; in < sorted[i]->n_inputs_; ++in) { + std::cout << " "; + _print_dot_node_id(sorted[i]); + std::cout << "\n -> "; + _print_dot_node_id(sorted[i]->inputs_[in]); + std::cout << "\n\n"; + } + } + std::cout << "}" << "\n"; + std::cout << "----- end cut here -----" << "\n"; + std::cout.flush(); + } + + //Create a map of ScalarNodePtr -> (variable name, is_scalar) + InvDotMap create_inverse_map(const std::map& symbols) const { + InvDotMap inv_map; + if (symbols.empty()) return inv_map; + for (const auto& s : symbols) + { + for (const auto& n : s.second.elements()) { + inv_map[n] = std::pair(s.first, s.second.shape().empty()); + } + } + return inv_map; + } + private: + //Print the vertice identifier for dot + void _print_dot_node_id(const ScalarNodePtr& node) const { + std::cout << node->op_name_ << "_" << (uintptr_t)node.get() << "__" << node->result_storage;// << std::endl; + } + + //Print how the vertice should look in dot + void _print_dot_node_definition(const ScalarNodePtr& node, const InvDotMap& invmap) const { + _print_dot_node_id(node); + std::cout << ' '; + + if (node->result_storage == ResultStorage::constant) { // Constant + std::cout << "[shape=circle,"; + + try { //If the constant has a name + std::string name(invmap.at(node).first); + std::cout << "label=\"" << name << ": " << *node->values_ << "\",group=\"" << name << '"'; + } + catch (const std::out_of_range&) { //No name + std::cout << "label=\"" << "const " << *node->values_ << '"'; + } + std::cout << "]" << std::endl; + } + + else if (node->result_storage == ResultStorage::constant_bool) { //Constant bool + std::cout << "[shape=circle,"; + + try { //If the constant has a name + std::string name(invmap.at(node).first); + std::cout << "label=\"" << name << ": " << *node->values_ << "\",group=\"" << name << '"'; + } + catch (const std::out_of_range&) { //No name + std::cout << "label=\"" << "const " << *node->values_ << '"'; + } + std::cout << "]" << std::endl; + } + + else if (node->result_storage == ResultStorage::expr_result) { //Result + std::cout << "[shape=box,label=\"" << node->op_name_ << " [" << node->result_idx_ << "]" << "\"]" << std::endl; + } + + else if (node->result_storage == ResultStorage::value) { // Value + + std::cout << "[shape=circle,"; + try { + std::string name(invmap.at(node).first); + bool scalar(invmap.at(node).second); + if (scalar) { + std::cout << "label=\"" << name << '"'; + } + else { + std::cout << "label=<" << name << "i" << '>'; + } + std::cout << ",group=\"" << name << '"'; + } + catch (const std::out_of_range&) { + std::cout << "label=<var>"; + } + + std::cout << "]" << std::endl; + } + + else if (node->result_storage == ResultStorage::value_copy) { //Value copy + std::cout << "[shape=circle,"; + try { + std::string name(invmap.at(node).first); + bool scalar(invmap.at(node).second); + if (scalar) { + std::cout << "label=\"" << name << '"'; + } + else { + std::cout << "label=<" << name << "i" << '>'; + } + std::cout << ",group=\"" << name << '"'; + } + catch (const std::out_of_range&) { + std::cout << "label=<var_cp>"; + } + std::cout << "]" << std::endl; + } + + else {//Temporary & other //Temporary & other + std::cout << "[label=\"" << node->op_name_ << "\"]" << std::endl; + } + } + void _print_i_node(uint i) { std::cout << sorted[i]->op_name_ << "_" << i << "_"<< sorted[i]->result_idx_; } diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh index 037c8ff..082592f 100644 --- a/include/grammar.impl.hh +++ b/include/grammar.impl.hh @@ -17,7 +17,8 @@ #include #include -#include +//#include +#include //#define BOOST_SPIRIT_NO_PREDEFINED_TERMINALS @@ -178,6 +179,12 @@ struct grammar : qi::grammar { FN("power" , binary_array<_pow_>()) FN("minimum", binary_array<_min_>()) FN("maximum", binary_array<_max_>()) + FN("diag" , &Array::diag) + FN("tr" , &Array::trace) + FN("norm1" , &Array::norm1) + FN("norm2" , &Array::norm2) + FN("normfro", &Array::normfro) + FN("norminf", &Array::norminf) ; unary_op.add diff --git a/include/instrset_detect.cc b/include/instrset_detect.cc index a023773..49618c3 100644 --- a/include/instrset_detect.cc +++ b/include/instrset_detect.cc @@ -1,6 +1,9 @@ #include "instrset_detect.hh" +namespace bparser { + + int b_instrset_detect(void) { + return instrset_detect(); + } -int b_instrset_detect(void) { - return instrset_detect(); } \ No newline at end of file diff --git a/include/instrset_detect.hh b/include/instrset_detect.hh index 10430a6..5cd0b54 100644 --- a/include/instrset_detect.hh +++ b/include/instrset_detect.hh @@ -10,12 +10,14 @@ * Wraps the third party library function for DLL export reasons. */ -#ifndef INCLUDE_INSTRSET_DETECT_HH -#define INCLUDE_INSTRSET_DETECT_HH +#ifndef INCLUDE_INSTRSET_DETECT_HH_ +#define INCLUDE_INSTRSET_DETECT_HH_ #include "config.hh" #include "instrset.h" +namespace bparser{ -EXPORT int b_instrset_detect(void); + EXPORT int b_instrset_detect(void); -#endif \ No newline at end of file +} +#endif //!INCLUDE_INSTRSET_DETECT_HH_ \ No newline at end of file diff --git a/include/parser.hh b/include/parser.hh index e233927..781dbc8 100644 --- a/include/parser.hh +++ b/include/parser.hh @@ -169,7 +169,7 @@ public: /// /// All variable names have to be set before this call. /// TODO: set result variable - void compile(std::shared_ptr arena = nullptr) { + void compile(std::shared_ptr arena = nullptr) { destroy_processor(); ParserResult res_array = boost::apply_visitor(ast::make_array(symbols_), ast); @@ -190,6 +190,8 @@ public: details::ExpressionDAG se(result_array_.elements()); //se.print_in_dot(); + //se.print_in_dot2(); + //se.print_in_dot2(symbols_); processor = ProcessorBase::create_processor(se, max_vec_size, simd_size, arena); } diff --git a/include/processor.hh b/include/processor.hh index 6f9452e..b16e018 100644 --- a/include/processor.hh +++ b/include/processor.hh @@ -127,6 +127,8 @@ using namespace details; typedef std::shared_ptr ArenaAllocPtr; +typedef std::shared_ptr PatchArenaPtr; + #define CODE(OP_NAME) \ @@ -158,7 +160,7 @@ struct ProcessorBase { return arena_; } - inline static ProcessorBase *create_processor(ExpressionDAG &se, uint vec_n_blocks, uint simd_size = 0, ArenaAllocPtr arena = nullptr); + inline static ProcessorBase *create_processor(ExpressionDAG &se, uint vec_n_blocks, uint simd_size = 0, PatchArenaPtr arena = nullptr); ArenaAllocPtr arena_; }; @@ -477,7 +479,13 @@ struct Processor : public ProcessorBase { Operation * program_; std::vector< std::shared_ptr > val_copy_nodes_; }; - +template +ProcessorBase* create_processor_(ExpressionDAG& se, uint vector_size, uint simd_size, PatchArenaPtr arena) { + if (arena == nullptr) { + return create_processor_(se, vector_size, simd_size, (ArenaAllocPtr)std::shared_ptr(nullptr)); //will create new ArenaAlloc in the other method + } + return create_processor_(se, vector_size, simd_size, std::make_shared(*arena)); +} template ProcessorBase * create_processor_(ExpressionDAG &se, uint vector_size, uint simd_size, ArenaAllocPtr arena) @@ -503,7 +511,7 @@ ProcessorBase * create_processor_(ExpressionDAG &se, uint vector_size, uint sim if (arena == nullptr) arena = std::make_shared(simd_bytes, est); else - BP_ASSERT(arena->size_ >= est); + BP_ASSERT(arena->get_size() >= est); return arena->create>>(arena, se, vec_n_blocks); } diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh new file mode 100644 index 0000000..8a07d46 --- /dev/null +++ b/include/scalar_wrapper.hh @@ -0,0 +1,218 @@ +/* + * scalar_wrapper.hh + * + * Created on: Apr 6, 2025 + * Author: LV + */ + +//https://eigen.tuxfamily.org/dox/TopicCustomizing_CustomScalar.html + +#ifndef INCLUDE_SCALAR_WRAPPER_HH_ +#define INCLUDE_SCALAR_WRAPPER_HH_ + +#include "scalar_node.hh" +#include +//#include //impossible + +namespace bparser { + namespace details { + // Eigen compatible wrapper for ScalarNode + struct ScalarWrapper { + + ScalarWrapper() : node(ScalarNode::create_zero()) { ; } + ScalarWrapper(int i) : node(ScalarNode::create_const(i)) { ; } + ScalarWrapper(double d) : node(ScalarNode::create_const(d)) { ; } + ScalarWrapper(ScalarNodePtr existing_ptr) : node(existing_ptr) { ; } + + inline ScalarWrapper operator+() const { + return ScalarWrapper(*this); + } + + inline ScalarWrapper operator-() const { + return un_op<_minus_>(*this); + } + + inline ScalarWrapper& operator+=(const ScalarWrapper& b) { + node = bin_op<_add_>(*this, b).get(); + return *this; + } + + inline ScalarWrapper operator+(const ScalarWrapper& b) const { + return bin_op<_add_>(*this, b); + } + + inline ScalarWrapper& operator-=(const ScalarWrapper& b) { + node = bin_op<_sub_>(*this, b).get(); + return *this; + } + + inline ScalarWrapper operator-(const ScalarWrapper& b) const { + return bin_op<_sub_>(*this, b); + } + + inline ScalarWrapper& operator*=(const ScalarWrapper& b) { + node = bin_op<_mul_>(*this, b).get(); + return *this; + } + + inline ScalarWrapper operator*(const ScalarWrapper& b) const { + return bin_op<_mul_>(*this, b); + } + + inline ScalarWrapper& operator/=(const ScalarWrapper& b) { + node = bin_op<_div_>(*this, b).get(); + return *this; + } + + inline ScalarWrapper operator/(const ScalarWrapper& b) const { + return bin_op<_div_>(*this, b); + } + + inline bool operator==(const ScalarWrapper& b) const { + if ((*this).is_constant() && (*this).have_same_result_storage(b)) + return *(***this).values_ == *(**b).values_; + return false; + } + /* These do not make any sense with what we are trying to achieve + inline bool operator!=(const ScalarWrapper& b) const { + return !((*this) == b); + } + + inline bool operator<(const ScalarWrapper& b) const { + if ((*this).is_constant() && (*this).have_same_result_storage(b)) + return *(***this).values_ < *(**b).values_; + return false; + } + + inline bool operator<=(const ScalarWrapper& b) const { + if ((*this).is_constant() && (*this).have_same_result_storage(b)) + return *(***this).values_ <= *(**b).values_; + return false; + } + + inline bool operator>=(const ScalarWrapper& b) const { + if ((*this).is_constant() && (*this).have_same_result_storage(b)) + return *(***this).values_ >= *(**b).values_; + return false; + } + + inline bool operator>(const ScalarWrapper& b) const { + if ((*this).is_constant() && (*this).have_same_result_storage(b)) + return *(***this).values_ > *(**b).values_; + return false; + }*/ + + + inline ScalarNodePtr operator*() const { //dereference + return get(); + } + + inline ScalarNodePtr get() const { + return node; + } + + template + static ScalarWrapper bin_op(const ScalarWrapper& a, const ScalarWrapper& b) { + return ScalarWrapper(ScalarNode::create(a.get(), b.get())); + } + + template + static ScalarWrapper un_op(const ScalarWrapper& a) { + return ScalarWrapper(ScalarNode::create(a.get())); + } + + + protected: + ScalarNodePtr node; + + inline bool is_constant() const { + return (***this).result_storage == constant || + (***this).result_storage == constant_bool; + } + + inline bool have_same_result_storage(const ScalarWrapper& b)const { + return (***this).result_storage == (**b).result_storage; + } + + }; //ScalarWrapper + + //inline std::ostream& operator<<(std::ostream& out, const ScalarWrapper& s) { + // + //} + +#define UN_OP(OP) \ + inline ScalarWrapper OP(const ScalarWrapper& s) { \ + return ScalarWrapper::un_op<_##OP##_>(s); \ + } \ + using std::OP; + +#define BIN_OP(OP) \ + inline ScalarWrapper OP(const ScalarWrapper& a,const ScalarWrapper& b) { \ + return ScalarWrapper::bin_op<_##OP##_>(a,b); \ + } \ + using std::OP; + + + UN_OP(abs) + + //https://eigen.tuxfamily.org/dox/namespaceEigen.html#a54cc34b64b4935307efc06d56cd531df + inline ScalarWrapper abs2(const ScalarWrapper& s) { + return s*s; + } + + + UN_OP(sqrt) + //UN_OP(exp) + //UN_OP(log) + //UN_OP(log2) + //UN_OP(log10) + //UN_OP(sin) + //UN_OP(sinh) + //UN_OP(asin) + //UN_OP(cos) + //UN_OP(cosh) + //UN_OP(acos) + //UN_OP(tan) + //UN_OP(tanh) + //UN_OP(atan) + //UN_OP(ceil) + //UN_OP(floor) + + BIN_OP(max) + inline ScalarWrapper maxi(const ScalarWrapper& a, const ScalarWrapper& b) { + return ScalarWrapper::bin_op<_max_>(a, b); + } + + BIN_OP(min) + inline ScalarWrapper mini(const ScalarWrapper& a, const ScalarWrapper& b) { + return ScalarWrapper::bin_op<_min_>(a, b); + } + + //BIN_OP(atan2) + //BIN_OP(pow) + + } //details +} //bparser + +//https://eigen.tuxfamily.org/dox/structEigen_1_1NumTraits.html +namespace Eigen { + template<> struct NumTraits + : NumTraits + { + typedef bparser::details::ScalarWrapper Real; + typedef bparser::details::ScalarWrapper NonInteger; + typedef bparser::details::ScalarWrapper Nested; + + enum { + IsComplex = 0, + IsInteger = 0, + IsSigned = 1, + RequireInitialization = 1, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + }; +} + +#endif //!INCLUDE_SCALAR_WRAPPER_HH_ \ No newline at end of file diff --git a/test/test_parser.cc b/test/test_parser.cc index 8f905df..e226692 100644 --- a/test/test_parser.cc +++ b/test/test_parser.cc @@ -226,7 +226,8 @@ void test_expression() { BP_ASSERT(test_expr("25 % cs3", {1})); BP_ASSERT(test_expr("25 % cv4", {1, 0, 1})); - + + BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2 })); BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1})); BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1})); ASSERT_THROW(test_expr("[[1], [2], [3]] @ [3, 4, 1]", {14}, {1}), "Matmult summing dimension mismatch"); @@ -236,6 +237,37 @@ void test_expression() { BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); + // 2×2 @ 2×2 → 2×2 + BP_ASSERT(test_expr( + "[[1, 2], [3, 4]] @ [[5, 6], [7, 8]]", + {19, 22, 43, 50}, // 1*5+2*7, 1*6+2*8, 3*5+4*7, 3*6+4*8 + {2, 2} + )); + + // 3×1×2 @ 2×3 → 3×1×3 (batched matmul) + BP_ASSERT(test_expr( + "[[[1,2]], [[3,4]], [[5,6]]] @ [[7,8,9], [10,11,12]]", + { + 27, 30, 33, // batch 0: [1,2]×[[7,8,9],[10,11,12]] + 61, 68, 75, // batch 1: [3,4]×... + 95,106,117 // batch 2: [5,6]×... + }, + {3, 1, 3} + )); + + BP_ASSERT(test_expr("diag([1,2,3])", { 1, 0, 0, 0, 2, 0, 0, 0, 3 }, { 3,3 })); + BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, { 2 })); + BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, { 3 })); + + BP_ASSERT(test_expr("tr([[1,9,9],[9,1,9],[9,9,1]])", { 3 }, {})); + + BP_ASSERT(test_expr("norm1([-4,-3,-2,-1,0,1,2,3,4])", {20}, {})); + BP_ASSERT(test_expr("norm1([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7 }, {})); + BP_ASSERT(test_expr("norm2([-4,-3,-2,-1,0,1,2,3,4])", { 7.745966692414834 }, {})); + //BP_ASSERT(test_expr("norm2([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.3484692283495345 }, {})); //Spectral norm uses eigenvalues/singular values. Eigen uses comparison operators in the algorithm. Bparser does not like that + BP_ASSERT(test_expr("normfro([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 7.745966692414834 }, {})); + BP_ASSERT(test_expr("norminf([-4,-3,-2,-1,0,1,2,3,4])", { 4 }, {})); + BP_ASSERT(test_expr("norminf([[-4,-3,-2],[-1,0,1],[2,3,4]])", { 9 }, {})); BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2})); BP_ASSERT(test_expr("floor(-3.5)", {-4}, {})); diff --git a/test/test_speed.cc b/test/test_speed.cc index 2bdb7ce..6ac4fb4 100644 --- a/test/test_speed.cc +++ b/test/test_speed.cc @@ -23,6 +23,7 @@ #include "test_tools.hh" #include "arena_alloc.hh" +#include "arena_resource.hh" // Optimized structure, holds data in common arena struct ExprData { @@ -31,7 +32,8 @@ struct ExprData { { uint simd_bytes = sizeof(double) * simd_size; - arena = std::make_shared(simd_bytes, 512 * 1012); + patch_arena = std::make_shared(512 * 1012, simd_bytes); + arena = std::make_shared(*patch_arena);//(simd_bytes, 512 * 1012); v1 = arena->create_array(vec_size * 3); fill_seq(v1, 100, 100 + 3 * vec_size); v2 = arena->create_array(vec_size * 3); @@ -54,6 +56,7 @@ struct ExprData { ~ExprData() {} + std::shared_ptr patch_arena; std::shared_ptr arena; uint vec_size; uint simd_size; @@ -266,7 +269,7 @@ void test_expr(std::string expr, uint block_size, void (* func)(ExprData&)) { //std::cout << "vres: " << vres << ", " << vres + block_size << ", " << vres + 2*vec_size << "\n"; //std::cout << "Symbols: " << print_vector(p.symbols()) << "\n"; //std::cout.flush(); - p.compile(data1.arena); + p.compile(data1.patch_arena); std::vector ss = std::vector(data1.subset, data1.subset+vec_size/simd_size); p.set_subset(ss);