diff --git a/scripts/download_deps.sh b/scripts/download_deps.sh
index ed1d40a..b573363 100755
--- a/scripts/download_deps.sh
+++ b/scripts/download_deps.sh
@@ -76,7 +76,7 @@ mkdir -p $DOWNLOAD_DIR
 cd $DOWNLOAD_DIR
 
 if [ $TARGET_IS_MACOS -eq 1 ]; then
-  conditional_download Torch PHASM_USE_TORCH libtorch.zip https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.10.1.zip libtorch-macos-1.10.1.zip
+  conditional_download Torch PHASM_USE_TORCH libtorch_macos.zip https://download.pytorch.org/libtorch/cpu/libtorch-macos-1.10.1.zip
   conditional_download Julia USE_JULIA julia.tar.gz https://julialang-s3.julialang.org/bin/mac/aarch64/1.9/julia-1.9.2-macaarch64.tar.gz
   conditional_download JANA PHASM_USE_JANA JANA.zip https://github.com/JeffersonLab/JANA2/archive/refs/tags/v2.0.6.zip
   conditional_download PIN PHASM_USE_PIN pin.tar.gz https://software.intel.com/sites/landingpage/pintool/downloads/pin-3.22-98547-g7a303a835-clang-mac.tar.gz
diff --git a/surrogate/CMakeLists.txt b/surrogate/CMakeLists.txt
index e15f294..c47b2f6 100644
--- a/surrogate/CMakeLists.txt
+++ b/surrogate/CMakeLists.txt
@@ -9,6 +9,7 @@ set(SURROGATE_LIBRARY_SOURCES
         src/tensor.cpp
         src/plugin_loader.cc
         src/flamegraph.cpp
+        src/omnitensor.cpp
         )
 
 add_library(phasm-surrogate STATIC ${SURROGATE_LIBRARY_SOURCES})
@@ -38,6 +39,7 @@ set(SURROGATE_LIBRARY_TEST_SOURCES
         test/plugin_tests.cc
         test/optics_oop_tests.cpp
         test/flamegraph_tests.cpp
+        test/omnitensor_tests.cpp
         )
 add_executable("phasm-surrogate-tests" ${SURROGATE_LIBRARY_TEST_SOURCES})
 target_include_directories(phasm-surrogate-tests PRIVATE include ../memtrace/include)
diff --git a/surrogate/include/omnitensor.hpp b/surrogate/include/omnitensor.hpp
new file mode 100644
index 0000000..b406ae1
--- /dev/null
+++ b/surrogate/include/omnitensor.hpp
@@ -0,0 +1,165 @@
+
+#pragma once
+#include "tensor.hpp"
+#include <stddef.h>
+#include <vector>
+#include <iostream>
+#include <map>
+#include <variant>
+#include <array>
+#include <initializer_list>
+
+
+namespace phasm {
+
+
+class TensorIndices {
+    std::vector<size_t> m_data;
+public:
+    inline TensorIndices(std::vector<size_t> indices) : m_data(indices) {};
+    inline TensorIndices(std::initializer_list<size_t> indices) : m_data(indices) {};
+    inline size_t get_dim_count() const { return m_data.size(); }
+    inline size_t get_index(size_t dim) const { return m_data[dim]; }
+    inline void set_index(size_t dim, size_t index) { m_data[dim] = index; }
+};
+
+
+namespace detail {
+
+class NestedListV1 {
+    std::map<TensorIndices, std::pair<size_t, size_t>> m_leaves;
+public:
+    void insert(const TensorIndices& indices, size_t offset, size_t count);
+    std::pair<size_t,size_t> get(const TensorIndices& index);
+};
+
+class NestedListV2 {
+    std::vector<size_t> m_contents;
+    size_t m_depth;
+public:
+    inline NestedListV2(size_t depth) : m_depth(depth) {};
+    void append(const TensorIndices& indices, size_t offset, size_t count);
+    void insert(const TensorIndices& indices, size_t offset, size_t count);
+    std::pair<size_t,size_t> get(const TensorIndices& indices);
+};
+
+
+class NestedListV3 {
+    std::vector<size_t> m_contents;
+    size_t m_depth;
+public:
+    inline NestedListV3(size_t depth) : m_depth(depth) {}
+    void reserve(size_t dim, size_t length);
+    void append(const TensorIndices& indices, size_t offset, size_t count);
+    std::pair<size_t,size_t> get(const TensorIndices& index);
+};
+
+} // namespace detail
+
+
+
+class omnitensor {
+
+public:
+    enum class DimType { Array, Vector, List };
+
+    void* m_data;
+    size_t* m_sizes;
+    size_t m_capacity;
+    size_t m_length = 0;
+    size_t m_dim_count;
+    std::vector<int64_t> m_shape; // 0 for variably-sized dimensions
+    std::vector<DimType> m_dimtypes;
+    std::vector<size_t> m_dim_offsets;
+    DType m_dtype;
+
+public:
+    omnitensor(DType dtype, std::vector<int64_t> shapes, size_t capacity=1024) {
+
+        assert(capacity > 0);
+        m_capacity = capacity;
+        m_shape = shapes;
+        m_dim_count = shapes.size();
+        m_dtype = dtype;
+        m_sizes = new size_t[capacity];
+
+        switch(dtype) {
+            case DType::UI8: m_data = new uint8_t[capacity]; break;
+            case DType::I16: m_data = new int16_t[capacity]; break;
+            case DType::I32: m_data = new int32_t[capacity]; break;
+            case DType::I64: m_data = new int64_t[capacity]; break;
+            case DType::F32: m_data = new float[capacity];   break;
+            case DType::F64: m_data = new double[capacity];  break;
+            default: throw std::runtime_error("Bad dtype");
+        }
+        
+        m_length = 1;
+        for (size_t i = 0; i < m_dim_count; ++i) {
+            m_length *= m_shape[i]; // If we have any zero-dimension dims, length will be 0
+                                    
+            if (i == m_dim_count - 1) {
+                if (m_shape[i] == 0) {
+                    m_dimtypes.push_back(DimType::Vector);
+                }
+                else {
+                    m_dimtypes.push_back(DimType::Array);
+                }
+            }
+            else {
+                if (m_shape[i] == 0 && m_shape[i+1] == 0) {
+                    m_dimtypes.push_back(DimType::List);
+                }
+                else if (m_shape[i] == 0 && m_shape[i+1] != 0) {
+                    m_dimtypes.push_back(DimType::Vector);
+                }
+                else if (m_shape[i] != 0 && m_shape[i+1] == 0) {
+                    assert(false);
+                }
+                else if (m_shape[i] != 0 && m_shape[i+1] != 0) {
+                    m_dimtypes.push_back(DimType::Array);
+                }
+            }
+        }
+
+        size_t offset = 1;
+        m_dim_offsets.resize(m_dim_count);
+        for (size_t dim = 0; dim < m_dim_count; ++dim) {
+            size_t rdim = m_dim_count - 1 - dim;
+            m_dim_offsets[rdim] = offset;
+            offset *= m_shape[rdim];
+        }
+
+    };
+    size_t length() {
+        return m_length;
+    }
+
+    ~omnitensor();
+
+    inline size_t offset(const TensorIndices& indices) {
+        size_t offset = 0;
+        for (size_t dim=0; dim<indices.get_dim_count(); ++dim) {
+            if (m_dimtypes[dim] == DimType::Array) {
+                offset += m_dim_offsets[dim] * indices.get_index(dim);
+            }
+        }
+        return offset;
+    }
+
+    inline size_t length(const TensorIndices& current) {
+        if (current.get_dim_count() == 0) {
+            return m_length;
+        }
+        TensorIndices next = current;
+        size_t last_dim = next.get_dim_count() - 1;
+        next.set_index(last_dim, current.get_index(last_dim) + 1);
+        return offset(next) - offset(current);
+    }
+
+    template <typename T>
+    std::pair<T*, size_t> data(const TensorIndices& indices) {
+        return { static_cast<T*>(m_data)+offset(indices), length(indices)};
+    }
+};
+
+}; // namespace phasm
diff --git a/surrogate/src/omnitensor.cpp b/surrogate/src/omnitensor.cpp
new file mode 100644
index 0000000..ce4a40d
--- /dev/null
+++ b/surrogate/src/omnitensor.cpp
@@ -0,0 +1,106 @@
+#include <omnitensor.hpp>
+#include <iostream>
+#include <stdexcept>
+
+namespace phasm {
+
+
+namespace detail {
+
+void NestedListV3::reserve(size_t dim, size_t length) {
+
+    size_t current_offset = 0;
+    if (dim > m_depth) {
+        throw std::runtime_error("Dimension is too deep!");
+    }
+    for (size_t i=0; i<dim-1; ++i) {
+        // Walk to _latest_ node at depth dim
+        size_t capacity = m_contents[current_offset];
+        if (capacity == 0) {
+            throw std::runtime_error("Outer dimension needs to be reserved first!");
+        }
+        size_t size = m_contents[current_offset+1];
+        size_t pointer_to_last = m_contents[current_offset+1];
+        current_offset = pointer_to_last;
+    }
+    
+    size_t capacity = m_contents[current_offset];
+    size_t size = m_contents[current_offset+1];
+    if (size >= capacity) {
+        throw std::runtime_error("Outer dimension is already full!");
+    }
+    m_contents[current_offset+1] = size+1;
+    m_contents[current_offset+size+1] = m_contents.size();
+    m_contents.push_back(length);
+    m_contents.push_back(0);
+    for (int i=0; i<length; ++i) {
+        m_contents.push_back(0);
+    }
+}
+
+void NestedListV3::append(const TensorIndices& indices, size_t offset, size_t count) {
+
+}
+
+std::pair<size_t,size_t> NestedListV3::get(const TensorIndices& indices) {
+
+    size_t current_offset = 0;
+    for (size_t i=0; i<m_depth; ++i) {
+        // Walk to _latest_ node at depth dim
+        size_t index = (indices.get_dim_count() > m_depth) ? indices.get_index(dim) : 0;
+        size_t capacity = m_contents[current_offset];
+        if (capacity == 0) {
+            throw std::runtime_error("Outer dimension needs to be reserved first!");
+        }
+        size_t size = m_contents[current_offset+1];
+        size_t pointer_to_last = m_contents[current_offset+1];
+        current_offset = pointer_to_last;
+    }
+    
+    size_t capacity = m_contents[current_offset];
+    size_t size = m_contents[current_offset+1];
+    if (size >= capacity) {
+        throw std::runtime_error("Outer dimension is already full!");
+    }
+    m_contents[current_offset+1] = size+1;
+    m_contents[current_offset+size+1] = m_contents.size();
+    m_contents.push_back(length);
+    m_contents.push_back(0);
+    for (int i=0; i<length; ++i) {
+        m_contents.push_back(0);
+    }
+    size_t current_offset = 0;
+    for (size_t dim=0; dim<m_depth; ++dim) {
+        size_t index = (indices.get_dim_count() > dim) ? indices.get_index(dim) : 0;
+        // In case the user provided an index _shorter_ than m_depth, "pad" index with extra '0's. 
+
+
+    }
+    return {};
+}
+
+
+} // namespace detail
+
+
+omnitensor::~omnitensor() {
+    switch (m_dtype) {
+        case DType::UI8: delete[] static_cast<uint8_t*>(m_data); break;
+        case DType::I16: delete[] static_cast<int16_t*>(m_data); break;
+        case DType::I32: delete[] static_cast<int32_t*>(m_data); break;
+        case DType::I64: delete[] static_cast<int64_t*>(m_data); break;
+        case DType::F32: delete[] static_cast<float*>(m_data); break;
+        case DType::F64: delete[] static_cast<double*>(m_data); break;
+        default:
+            if (m_length > 0) {
+                std::cout << "PHASM: Memory leak due to invalid (corrupt?) tensor dtype" << std::endl;
+                std::terminate();
+            }
+            break;
+    }
+};
+
+
+
+} // namespace phasm
+    
diff --git a/surrogate/test/omnitensor_tests.cpp b/surrogate/test/omnitensor_tests.cpp
new file mode 100644
index 0000000..8358f1b
--- /dev/null
+++ b/surrogate/test/omnitensor_tests.cpp
@@ -0,0 +1,63 @@
+
+
+#include <tensor.hpp>
+#include <catch.hpp>
+#include <omnitensor.hpp>
+
+namespace phasm {
+
+TEST_CASE("OmnitensorBasic") {
+
+    SECTION("1x1") {
+        phasm::omnitensor t = phasm::omnitensor(phasm::DType::F32,{1},1);
+        REQUIRE(t.length() == 1);
+        REQUIRE(t.offset({0}) == 0);
+        REQUIRE(t.offset({0}) < t.length());
+    }
+    SECTION("3x5") {
+        phasm::omnitensor t = phasm::omnitensor(phasm::DType::F32,{3,5},1);
+        REQUIRE(t.length() == 15);
+
+        REQUIRE(t.offset({1,1}) == 6);
+        REQUIRE(t.length({1,1}) == 1);
+
+        REQUIRE(t.offset({2,4}) == 14);
+        REQUIRE(t.length({2,4}) == 1);
+
+        REQUIRE(t.offset({1}) == 5);
+        REQUIRE(t.length({1}) == 5);
+
+        REQUIRE(t.offset({2}) == 10);
+        REQUIRE(t.length({2}) == 5);
+
+        REQUIRE(t.offset({}) == 0);
+        REQUIRE(t.length({}) == 15);
+    }
+    SECTION("2x3x5") {
+        phasm::omnitensor t = phasm::omnitensor(phasm::DType::F32,{2,3,5},1);
+        REQUIRE(t.length() == 30);
+        REQUIRE(t.offset({0,1,1}) == 6);
+        REQUIRE(t.length({0,1,1}) == 1);
+
+        REQUIRE(t.offset({0,1}) == 5);
+        REQUIRE(t.length({0,1}) == 5);
+
+        REQUIRE(t.offset({0}) == 0);
+        REQUIRE(t.length({0}) == 15);
+
+        REQUIRE(t.offset({}) == 0);
+        REQUIRE(t.length({}) == 30);
+
+        REQUIRE(t.offset({1,1,1}) == 21);
+        REQUIRE(t.length({1,1,1}) == 1);
+
+        REQUIRE(t.offset({1,1}) == 20);
+        REQUIRE(t.length({1,1}) == 5);
+
+        REQUIRE(t.offset({1}) == 15);
+        REQUIRE(t.length({1}) == 15);
+    }
+}
+
+} // namespace phasm
+
diff --git a/torch_plugin/src/torchscript_model.cpp b/torch_plugin/src/torchscript_model.cpp
index 699596b..e3ded0c 100644
--- a/torch_plugin/src/torchscript_model.cpp
+++ b/torch_plugin/src/torchscript_model.cpp
@@ -100,10 +100,10 @@ bool TorchscriptModel::infer() {
         }
         else if (output.isTuple()) {
             auto tuple = output.toTuple();
-            if (tuple->size() != m_outputs.size()) {
+            if (tuple->elements().size() != m_outputs.size()) {
                 std::cerr << "PHASM: FATAL ERROR: Torchscript model output tuple size mismatch" << std::endl;
                 std::cerr << "  Surrogate expects " << m_outputs.size() << std::endl;
-                std::cerr << "  PT file provides " << tuple->size() << std::endl;
+                std::cerr << "  PT file provides " << tuple->elements().size() << std::endl;
                 std::cerr << "  Filename is '" << m_filename << "'" << std::endl;
                 exit(1);
             }