From 9d8e0c2c96214c204130ffef7e0f5f09251e2de3 Mon Sep 17 00:00:00 2001 From: David Koes Date: Thu, 14 Sep 2023 15:58:06 -0400 Subject: [PATCH 01/35] Switch to pip install over setup.py --- python/CMakeLists.txt | 2 +- test/test_transform.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c2c99b1..8fe775a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -76,4 +76,4 @@ endforeach() configure_file(setup.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py) #TODO: figure out reasonable install, also how to build both python2 and python3 -install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py install --prefix ${CMAKE_INSTALL_PREFIX})") +install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install ${CMAKE_CURRENT_BINARY_DIR} --prefix ${CMAKE_INSTALL_PREFIX})") diff --git a/test/test_transform.cpp b/test/test_transform.cpp index d528684..88976fd 100644 --- a/test/test_transform.cpp +++ b/test/test_transform.cpp @@ -7,7 +7,7 @@ #define BOOST_TEST_MODULE transform_test #include -#include +#include #include #include From cff124cedd64e826934e13a18a5005908ca3c3db Mon Sep 17 00:00:00 2001 From: David Koes Date: Fri, 15 Sep 2023 09:47:29 -0400 Subject: [PATCH 02/35] give up on pip install --- python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 8fe775a..c2c99b1 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -76,4 +76,4 @@ endforeach() configure_file(setup.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py) #TODO: figure out reasonable install, also how to build both python2 and python3 -install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install ${CMAKE_CURRENT_BINARY_DIR} --prefix ${CMAKE_INSTALL_PREFIX})") +install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py install --prefix ${CMAKE_INSTALL_PREFIX})") From a4634ce9db86ce0b2578c6d45ea829387e532b26 Mon Sep 17 00:00:00 2001 From: David Koes Date: Mon, 13 Nov 2023 11:27:27 -0500 Subject: [PATCH 03/35] link CI badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 43f59d7..0126f14 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ libmolgrid ========== -![Github CI](https://github.com/gnina/libmolgrid/actions/workflows/CI.yml/badge.svg) +[![Github CI](https://github.com/gnina/libmolgrid/actions/workflows/CI.yml/badge.svg)](https://github.com/gnina/libmolgrid/actions) [![codecov](https://codecov.io/gh/gnina/libmolgrid/branch/master/graph/badge.svg?token=gUFisf34rf)](https://codecov.io/gh/gnina/libmolgrid) libmolgrid is under active development, but should be suitable for use by early adopters. From 14e88915837972df4816c12aa9dfb66ee220ea9f Mon Sep 17 00:00:00 2001 From: David Koes Date: Sat, 2 Mar 2024 11:22:09 -0500 Subject: [PATCH 04/35] Better deal with merging empty coordinate sets --- src/coordinateset.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coordinateset.cpp b/src/coordinateset.cpp index a9525a4..af1a867 100644 --- a/src/coordinateset.cpp +++ b/src/coordinateset.cpp @@ -274,7 +274,8 @@ void CoordinateSet::mergeInto(const CoordinateSet& rec, const CoordinateSet& lig if(rec.type_vector.dimension(1) != lig.type_vector.dimension(1)) { throw std::invalid_argument("Type vectors are incompatible sizes"); } - if(rec.has_vector_types() != lig.has_vector_types() || rec.has_indexed_types() != lig.has_indexed_types()) { + if((rec.has_vector_types() != lig.has_vector_types()) && (rec.has_indexed_types() != lig.has_indexed_types())) { + //empty coordsets are true for both, so and throw std::invalid_argument("Incompatible types when combining coodinate sets"); } if(rec.has_indexed_types()) { From d5dc460877c78f738b203a308a7dd96d54d70eda Mon Sep 17 00:00:00 2001 From: David Koes Date: Sat, 2 Mar 2024 13:40:24 -0500 Subject: [PATCH 05/35] Update CI Don't update - don't want newer drivers than host --- .github/workflows/CI.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index dc85afb..47e7a75 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -21,9 +21,6 @@ jobs: with: fetch-depth: 0 - - name: apt update - run: apt update -y && apt upgrade -y - - name: Build from src run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE .. && make -j8 && make install From 47213e16715a1536809e2dd3ef944760894c46c2 Mon Sep 17 00:00:00 2001 From: David Koes Date: Sun, 10 Mar 2024 14:10:06 -0400 Subject: [PATCH 06/35] No apt update in CI --- .github/workflows/Coverage.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/Coverage.yml b/.github/workflows/Coverage.yml index 1c261da..6f458a8 100644 --- a/.github/workflows/Coverage.yml +++ b/.github/workflows/Coverage.yml @@ -19,9 +19,6 @@ jobs: with: fetch-depth: 0 - - name: apt update - run: apt update -y && apt upgrade -y - - name: Build from src continue-on-error: true # The one test will most likely fail, but if we skip we lose a lot of coverage. run: mkdir build && cd build && cmake -DBUILD_COVERAGE=1 .. && make -j8 && make install From 5a642b14087be643f059cb6f7c9f3be8b16cf893 Mon Sep 17 00:00:00 2001 From: David Koes Date: Wed, 29 May 2024 10:11:39 -0400 Subject: [PATCH 07/35] Address Issue #119 ExampleProvider now has save_mem_caches and load_mem_caches so the current state of the in-memory cache can be saved and restored. --- .gitignore | 3 +- .vscode/c_cpp_properties.json | 23 + .vscode/settings.json | 74 ++++ .vscode/tasks.json | 24 ++ CMakeLists.txt | 4 +- include/libmolgrid/coord_cache.h | 108 +++-- include/libmolgrid/coordinateset.h | 26 ++ include/libmolgrid/example.h | 13 +- include/libmolgrid/example_extractor.h | 32 ++ include/libmolgrid/example_provider.h | 277 +++++++----- include/libmolgrid/managed_grid.h | 568 ++++++++++++++++--------- include/libmolgrid/string_cache.h | 30 ++ python/bindings.h | 1 + python/bindings.in.cpp | 3 + src/CMakeLists.txt | 2 + src/coord_cache.cpp | 154 ++++--- src/example.cpp | 2 - src/example_extractor.cpp | 76 ++-- src/string_cache.cpp | 7 + test/test_example_provider.py | 23 + 20 files changed, 994 insertions(+), 456 deletions(-) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/settings.json create mode 100644 .vscode/tasks.json create mode 100644 include/libmolgrid/string_cache.h create mode 100644 src/string_cache.cpp diff --git a/.gitignore b/.gitignore index 36a2411..477a210 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,7 @@ _site/ .bundle/ vendor/ -# Ignore editors -.vscode +# Ignore editors (but not vscode) /.pytest_cache/ .cproject .idea diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..2f9028c --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,23 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/include/**", + "/usr/local/cuda/include", + "/usr/local/include/openbabel3/", + "/usr/include", + "/usr/include/c++/11/", + "${workspaceFolder}/include/**"], + "defines": [], + "compilerPath": "/usr/bin/c++", + "cStandard": "c17", + "cppStandard": "gnu++17", + "intelliSenseMode": "linux-gcc-x64", + "compileCommands": "${workspaceFolder}/build/compile_commands.json", + "configurationProvider": "ms-vscode.cmake-tools" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..59f9872 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,74 @@ +{ + "files.associations": { + "stdexcept": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "array": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "chrono": "cpp", + "compare": "cpp", + "complex": "cpp", + "concepts": "cpp", + "condition_variable": "cpp", + "cstdint": "cpp", + "deque": "cpp", + "list": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "new": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "semaphore": "cpp", + "sstream": "cpp", + "stop_token": "cpp", + "streambuf": "cpp", + "thread": "cpp", + "cinttypes": "cpp", + "typeindex": "cpp", + "typeinfo": "cpp", + "variant": "cpp" + }, + "C_Cpp.files.exclude": { + "/usr/local/include/libmolgrid/**": true + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..dc1a631 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,24 @@ +{ + "version": "2.0.0", + "isShelllCommand": true, + "tasks": [ + { + "label": "build", + "options": { + "cwd": "${workspaceRoot}/build" + }, + "type": "shell", + "command": "make -j24", + "group": "build" + }, + { + "label": "build-debug", + "options": { + "cwd": "${workspaceRoot}/build-debug" + }, + "type": "shell", + "command": "make -j24", + "group": "build" + } + ] +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 078176a..3ac2211 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,8 @@ if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + option(BUILD_SHARED "Build shared library" ON) option(BUILD_STATIC "Build static library" ON) option(BUILD_COVERAGE "Build with code coverage" OFF) @@ -36,7 +38,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}) #dependencies find_package(CUDA REQUIRED) -find_package(Boost REQUIRED COMPONENTS regex unit_test_framework program_options system filesystem iostreams) +find_package(Boost REQUIRED COMPONENTS regex unit_test_framework program_options system filesystem iostreams serialization) find_package(OpenBabel3 REQUIRED) include_directories(SYSTEM ${OPENBABEL3_INCLUDE_DIR}) find_package(ZLIB) diff --git a/include/libmolgrid/coord_cache.h b/include/libmolgrid/coord_cache.h index c6ba04e..842d045 100644 --- a/include/libmolgrid/coord_cache.h +++ b/include/libmolgrid/coord_cache.h @@ -7,11 +7,14 @@ #ifndef COORD_CACHE_H_ #define COORD_CACHE_H_ -#include "libmolgrid/coordinateset.h" #include "libmolgrid/atom_typer.h" +#include "libmolgrid/coordinateset.h" #include "libmolgrid/example.h" #include - +#include +#include +#include +#include namespace libmolgrid { /** \brief Load and cache molecular coordinates and atom types. @@ -21,36 +24,77 @@ namespace libmolgrid { * training runs. */ class CoordCache { - using MemCache = std::unordered_map; - MemCache memcache; - std::shared_ptr typer; - std::string data_root; - std::string molcache; - bool use_cache = true; //is possible to disable caching - bool addh = true; ///protonate - bool make_vector_types = false; ///convert index types to vector, will also convert to type based radii and add a dummy type - - //for memory mapped cache - boost::iostreams::mapped_file_source cache_map; - std::unordered_map offsets; //map from names to position in cache_map - - public: - CoordCache() {} - CoordCache(std::shared_ptr t, - const ExampleProviderSettings& settings = ExampleProviderSettings(), - const std::string& mc = ""); - ~CoordCache() {} - - /** \brief Set coord to the appropriate CoordinateSet for fname - * @param[in] fname file name, not including root directory prefix, of molecular data - * @param[out] coord CoordinateSet for passed molecule - */ - void set_coords(const char *fname, CoordinateSet& coord); - - /// return the number of types (channels) each example will have - size_t num_types() const { return typer->num_types(); } - - std::vector get_type_names() const { return typer->get_type_names(); } + using MemCache = std::unordered_map; + MemCache memcache; + std::shared_ptr typer; + std::string data_root; + std::string molcache; + bool use_cache = true; // is possible to disable caching + bool addh = true; /// protonate + bool make_vector_types = + false; /// convert index types to vector, will also convert to type based + /// radii and add a dummy type + + // for memory mapped cache + boost::iostreams::mapped_file_source cache_map; + std::unordered_map + offsets; // map from names to position in cache_map + +public: + CoordCache() {} + CoordCache( + std::shared_ptr t, + const ExampleProviderSettings &settings = ExampleProviderSettings(), + const std::string &mc = ""); + ~CoordCache() {} + + /** \brief Set coord to the appropriate CoordinateSet for fname + * @param[in] fname file name, not including root directory prefix, of + * molecular data + * @param[out] coord CoordinateSet for passed molecule + */ + void set_coords(const char *fname, CoordinateSet &coord); + + /// return the number of types (channels) each example will have + size_t num_types() const { return typer->num_types(); } + + std::vector get_type_names() const { + return typer->get_type_names(); + } + + /** \brief Write out current contents of memory cache to provided output + * stream. + * @param[in] out output stream + */ + void save_mem_cache(std::ostream &out) const; + + /** \brief Write out current contents of memory cache to provided file. + * @param[in] fname file name + */ + void save_mem_cache(const std::string &fname) const { + std::ofstream out(fname.c_str()); + if (!out) + throw std::invalid_argument("Could not open file " + fname); + save_mem_cache(out); + } + + /** \brief Read contents of input stream into memory cache. + * @param[in] in input stream + */ + void load_mem_cache(std::istream &in); + /** \brief Read contents of provided file into memory cache. + * @param[in] fname file name + */ + void load_mem_cache(const std::string &fname) { + std::ifstream in(fname.c_str()); + if (!in) + throw std::invalid_argument("Could not load file " + fname); + load_mem_cache(in); + } + + size_t mem_cache_size() const { + return memcache.size(); + } }; } /* namespace libmolgrid */ diff --git a/include/libmolgrid/coordinateset.h b/include/libmolgrid/coordinateset.h index 18f4843..351aa93 100644 --- a/include/libmolgrid/coordinateset.h +++ b/include/libmolgrid/coordinateset.h @@ -12,7 +12,10 @@ #include #include +#include + #include "libmolgrid/managed_grid.h" +#include "libmolgrid/string_cache.h" namespace libmolgrid { @@ -136,6 +139,29 @@ struct CoordinateSet { ///for debugging void dump(std::ostream& out) const; + + friend class boost::serialization::access; + template void serialize(Archive &ar, const unsigned version) { + ar & coords; + ar & type_index; + ar & type_vector; + ar & radii; + ar & max_type; + + if (Archive::is_saving::value) { + std::string tmp; + if(src) { + tmp = src; + } + ar & tmp; + } else { //reading + std::string tmp; + ar & tmp; + if(tmp.length() > 0) { + src = string_cache.get(tmp); + } + } + } }; extern template size_t CoordinateSet::copyTo(Grid& c, Grid& t, Grid& r) const; diff --git a/include/libmolgrid/example.h b/include/libmolgrid/example.h index 0efb1b9..c75f8b1 100644 --- a/include/libmolgrid/example.h +++ b/include/libmolgrid/example.h @@ -15,6 +15,7 @@ #include #include #include "libmolgrid/coordinateset.h" +#include "libmolgrid/string_cache.h" namespace libmolgrid { @@ -161,19 +162,7 @@ struct ExampleRef { }; -//for memory efficiency, only store a given string once and use the const char* -class StringCache { - std::unordered_set strings; -public: - const char* get(const std::string& s) - { - strings.insert(s); - //we assume even as the set is resized that strings never get allocated - return strings.find(s)->c_str(); - } -}; -extern StringCache string_cache; } /* namespace libmolgrid */ diff --git a/include/libmolgrid/example_extractor.h b/include/libmolgrid/example_extractor.h index 887ae7c..f7461bf 100644 --- a/include/libmolgrid/example_extractor.h +++ b/include/libmolgrid/example_extractor.h @@ -88,6 +88,38 @@ class ExampleExtractor { ///return names of types for explicitly typed examples ///type names are prepended by coordinate set index virtual std::vector get_type_names() const; + + /** \brief Write out current contents of memory caches to provided output + * stream. + * @param[in] out output stream + */ + void save_mem_caches(std::ostream &out) const; + + /** \brief Write out current contents of memory caches to provided file. + * @param[in] fname file name + */ + void save_mem_caches(const std::string &fname) const { + std::ofstream out(fname.c_str()); + if (!out) + throw std::invalid_argument("Could not open file " + fname); + save_mem_caches(out); + } + + /** \brief Read contents of input stream into memory caches. + * @param[in] in input stream + */ + void load_mem_caches(std::istream &in); + /** \brief Read contents of provided file into memory caches. + * @param[in] fname file name + */ + void load_mem_caches(const std::string &fname) { + std::ifstream in(fname.c_str()); + if (!in) + throw std::invalid_argument("Could not load file " + fname); + load_mem_caches(in); + } + + size_t mem_caches_size() const; }; } /* namespace libmolgrid */ diff --git a/include/libmolgrid/example_provider.h b/include/libmolgrid/example_provider.h index f3f0361..e6eb850 100644 --- a/include/libmolgrid/example_provider.h +++ b/include/libmolgrid/example_provider.h @@ -10,12 +10,12 @@ #define EXAMPLE_PROVIDER_H_ #include "libmolgrid/example.h" -#include "libmolgrid/exampleref_providers.h" #include "libmolgrid/example_extractor.h" +#include "libmolgrid/exampleref_providers.h" namespace libmolgrid { - // Docstring_ExampleProvider +// Docstring_ExampleProvider /** \brief Given a file of examples, provide Example classes one at a time * This contains an ExampleRefProvider, which can be configured using a * single settings object if so desired, and an example extractor. @@ -23,121 +23,172 @@ namespace libmolgrid { * of the dataset into memory. * * An example files contains a single example on each line where an example - * consists of some number of numerical labels (num_labels, will be auto-detected - * if not specified) followed by file paths to molecular data, all space separated. + * consists of some number of numerical labels (num_labels, will be + * auto-detected if not specified) followed by file paths to molecular data, all + * space separated. */ class ExampleProvider { - std::shared_ptr provider; - ExampleExtractor extractor; - ExampleProviderSettings init_settings; //save settings created with - size_t last_epoch = 0; - - public: - - /// return provider as specifyed by settings - static std::shared_ptr createProvider(const ExampleProviderSettings& settings); - - /// Create provider using default gnina typing - ExampleProvider(const ExampleProviderSettings& settings=ExampleProviderSettings()); - - /// Create provider/extractor according to settings with single typer - ExampleProvider(const ExampleProviderSettings& settings, std::shared_ptr t); - - /// Create provider/extractor according to settings with two typers - ExampleProvider(const ExampleProviderSettings& settings, std::shared_ptr t1, std::shared_ptr t2); - - /// Create provider/extractor according to settings - ExampleProvider(const ExampleProviderSettings& settings, const std::vector >& typrs, - const std::vector& molcaches = std::vector()); - - /// use provided provider - ExampleProvider(std::shared_ptr p, const ExampleExtractor& e); - virtual ~ExampleProvider() {} - - ///load example file file fname and setup provider - virtual void populate(const std::string& fname, int num_labels=-1); - ///load multiple example files - virtual void populate(const std::vector& fnames, int num_labels=-1); - - ///return number of labels for each example (computed from first example only) - virtual size_t num_labels() const { return provider->num_labels(); } - - ///provide next example - virtual void next(Example& ex); - virtual Example next() { Example ex; next(ex); return ex; } - - /** \brief Return current small epoch number - * A small epoch occurs once every training example has been - * seen at MOST once. For example, when providing a balanced - * view of unbalanced data, a small epoch will complete once the - * less common class has been iterated over. - * Note this is the epoch of the next example to be provided, not the previous. - */ - virtual size_t get_small_epoch_num() const { return provider->get_small_epoch_num(); } - - /** \brief Return current large epoch number - * A large epoch occurs once every training example has been - * seen at LEAST once. For example, when providing a balanced - * view of unbalanced data, a large epoch will complete once the - * more common class has been iterated over. - * Note this is the epoch of the next example to be provided, not the previous. - */ - virtual size_t get_large_epoch_num() const { return provider->get_large_epoch_num(); } - - /// Return number of example in small epoch - virtual size_t small_epoch_size() const { return provider->small_epoch_size(); } - - /// Return number of example in large epoch - virtual size_t large_epoch_size() const { return provider->large_epoch_size(); } - - /// Reset to beginning - virtual void reset() { provider->reset(); } - - ///skip over the first n examples - virtual void skip(unsigned n); - - ///return settings created with - const ExampleProviderSettings& settings() const { return init_settings; } - - - virtual void next_batch(std::vector& ex, unsigned batch_size=0); - - ///provide a batch of examples, unspecified or 0 batch_size uses default batch size - virtual std::vector next_batch(unsigned batch_size=0) { - std::vector ex; - next_batch(ex, batch_size); - return ex; - } - - ///return true if we have crossed into a new epoch (or are about to) - ///Note that once this returns true once, it won't again until the next - ///epoch has been consumed. - bool at_new_epoch() { - if(init_settings.iteration_scheme == LargeEpoch) { - size_t e = provider->get_large_epoch_num(); - if(e != last_epoch) { - last_epoch = e; - return true; - } - } else if(init_settings.iteration_scheme == SmallEpoch) { - size_t e = provider->get_small_epoch_num(); - if(e != last_epoch) { - last_epoch = e; - return true; - } + std::shared_ptr provider; + ExampleExtractor extractor; + ExampleProviderSettings init_settings; // save settings created with + size_t last_epoch = 0; + +public: + /// return provider as specifyed by settings + static std::shared_ptr + createProvider(const ExampleProviderSettings &settings); + + /// Create provider using default gnina typing + ExampleProvider( + const ExampleProviderSettings &settings = ExampleProviderSettings()); + + /// Create provider/extractor according to settings with single typer + ExampleProvider(const ExampleProviderSettings &settings, + std::shared_ptr t); + + /// Create provider/extractor according to settings with two typers + ExampleProvider(const ExampleProviderSettings &settings, + std::shared_ptr t1, std::shared_ptr t2); + + /// Create provider/extractor according to settings + ExampleProvider( + const ExampleProviderSettings &settings, + const std::vector> &typrs, + const std::vector &molcaches = std::vector()); + + /// use provided provider + ExampleProvider(std::shared_ptr p, + const ExampleExtractor &e); + virtual ~ExampleProvider() {} + + /// load example file file fname and setup provider + virtual void populate(const std::string &fname, int num_labels = -1); + /// load multiple example files + virtual void populate(const std::vector &fnames, + int num_labels = -1); + + /// return number of labels for each example (computed from first example + /// only) + virtual size_t num_labels() const { return provider->num_labels(); } + + /// provide next example + virtual void next(Example &ex); + virtual Example next() { + Example ex; + next(ex); + return ex; + } + + /** \brief Return current small epoch number + * A small epoch occurs once every training example has been + * seen at MOST once. For example, when providing a balanced + * view of unbalanced data, a small epoch will complete once the + * less common class has been iterated over. + * Note this is the epoch of the next example to be provided, not the + * previous. + */ + virtual size_t get_small_epoch_num() const { + return provider->get_small_epoch_num(); + } + + /** \brief Return current large epoch number + * A large epoch occurs once every training example has been + * seen at LEAST once. For example, when providing a balanced + * view of unbalanced data, a large epoch will complete once the + * more common class has been iterated over. + * Note this is the epoch of the next example to be provided, not the + * previous. + */ + virtual size_t get_large_epoch_num() const { + return provider->get_large_epoch_num(); + } + + /// Return number of example in small epoch + virtual size_t small_epoch_size() const { + return provider->small_epoch_size(); + } + + /// Return number of example in large epoch + virtual size_t large_epoch_size() const { + return provider->large_epoch_size(); + } + + /// Reset to beginning + virtual void reset() { provider->reset(); } + + /// skip over the first n examples + virtual void skip(unsigned n); + + /// return settings created with + const ExampleProviderSettings &settings() const { return init_settings; } + + virtual void next_batch(std::vector &ex, unsigned batch_size = 0); + + /// provide a batch of examples, unspecified or 0 batch_size uses default + /// batch size + virtual std::vector next_batch(unsigned batch_size = 0) { + std::vector ex; + next_batch(ex, batch_size); + return ex; + } + + /// return true if we have crossed into a new epoch (or are about to) + /// Note that once this returns true once, it won't again until the next + /// epoch has been consumed. + bool at_new_epoch() { + if (init_settings.iteration_scheme == LargeEpoch) { + size_t e = provider->get_large_epoch_num(); + if (e != last_epoch) { + last_epoch = e; + return true; + } + } else if (init_settings.iteration_scheme == SmallEpoch) { + size_t e = provider->get_small_epoch_num(); + if (e != last_epoch) { + last_epoch = e; + return true; } - return false; } - - ExampleExtractor& get_extractor() { return extractor; } - ExampleRefProvider& get_provider() { return *provider; } - - ///number of types - size_t num_types() const { return extractor.num_types(); } - ///names of types (requires explicit typing) - std::vector get_type_names() const { return extractor.get_type_names(); } - ///return number of examples - size_t size() const { return provider->size(); } + return false; + } + + ExampleExtractor &get_extractor() { return extractor; } + ExampleRefProvider &get_provider() { return *provider; } + + /// number of types + size_t num_types() const { return extractor.num_types(); } + /// names of types (requires explicit typing) + std::vector get_type_names() const { + return extractor.get_type_names(); + } + /// return number of examples + size_t size() const { return provider->size(); } + + /** \brief Write out current contents of memory caches to provided output + * stream. + * @param[in] out output stream + */ + void save_mem_caches(std::ostream &out) const { + extractor.save_mem_caches(out); + } + + /** \brief Write out current contents of memory caches to provided file. + * @param[in] fname file name + */ + void save_mem_caches(const std::string &fname) const { + extractor.save_mem_caches(fname); + } + + /** \brief Read contents of input stream into memory caches. + * @param[in] in input stream + */ + void load_mem_caches(std::istream &in) { extractor.load_mem_caches(in); } + /** \brief Read contents of provided file into memory caches. + * @param[in] fname file name + */ + void load_mem_caches(const std::string &fname) { extractor.load_mem_caches(fname); } + + size_t mem_caches_size() const { return extractor.mem_caches_size(); } }; } /* namespace libmolgrid */ diff --git a/include/libmolgrid/managed_grid.h b/include/libmolgrid/managed_grid.h index a81426a..fd64353 100644 --- a/include/libmolgrid/managed_grid.h +++ b/include/libmolgrid/managed_grid.h @@ -13,120 +13,141 @@ #include #include #include - +#include +#include #include "libmolgrid/grid.h" +namespace libmolgrid +{ -namespace libmolgrid { - - -template -struct mgrid_buffer_data { + template + struct mgrid_buffer_data + { Dtype *gpu_ptr; bool sent_to_gpu; -}; + }; -/** \brief ManagedGrid base class */ -template -class ManagedGridBase { + /** \brief ManagedGrid base class */ + template + class ManagedGridBase + { public: - using gpu_grid_t = Grid; ///cuda grid type + using gpu_grid_t = Grid; /// cuda grid type using cpu_grid_t = Grid; using type = Dtype; static constexpr size_t N = NumDims; protected: - - //two different views of the same memory - mutable gpu_grid_t gpu_grid; //treated as a cache + // two different views of the same memory + mutable gpu_grid_t gpu_grid; // treated as a cache cpu_grid_t cpu_grid; - std::shared_ptr cpu_ptr; //shared pointer lets us not worry about copying the grid - size_t capacity = 0; //amount of memory allocated (for resizing) + std::shared_ptr cpu_ptr; // shared pointer lets us not worry about copying the grid + size_t capacity = 0; // amount of memory allocated (for resizing) using buffer_data = mgrid_buffer_data; mutable buffer_data *gpu_info = nullptr; - - ///empty (unusable) grid + /// empty (unusable) grid ManagedGridBase() = default; // deallocate our special buffer memory, include gpu memory if present - static void delete_buffer(void *ptr) { - buffer_data *data = (buffer_data*)(ptr) - 1; - if(data->gpu_ptr != nullptr) { - //deallocate gpu + static void delete_buffer(void *ptr) + { + buffer_data *data = (buffer_data *)(ptr)-1; + if (data->gpu_ptr != nullptr) + { + // deallocate gpu cudaFree(data->gpu_ptr); } free(data); } - //allocate and set the cpu pointer (and grid) with space for sent_to_gpu bool, set the bool ptr location - //does not initialize memory - void alloc_and_set_cpu(size_t sz) { - //put buffer data at start so know where it is on delete - void *buffer = malloc(sizeof(buffer_data)+sz*sizeof(Dtype)); - Dtype *cpu_data = (Dtype*)((buffer_data*)buffer+1); - - if(!buffer) throw std::runtime_error("Could not allocate "+itoa(sz*sizeof(Dtype))+" bytes of CPU memory in ManagedGrid"); + // allocate and set the cpu pointer (and grid) with space for sent_to_gpu bool, set the bool ptr location + // does not initialize memory + void alloc_and_set_cpu(size_t sz) + { + // put buffer data at start so know where it is on delete + void *buffer = malloc(sizeof(buffer_data) + sz * sizeof(Dtype)); + Dtype *cpu_data = (Dtype *)((buffer_data *)buffer + 1); + + if (!buffer) + throw std::runtime_error("Could not allocate " + itoa(sz * sizeof(Dtype)) + " bytes of CPU memory in ManagedGrid"); cpu_ptr = std::shared_ptr(cpu_data, delete_buffer); cpu_grid.set_buffer(cpu_ptr.get()); - gpu_info = (buffer_data*)buffer; + gpu_info = (buffer_data *)buffer; gpu_info->gpu_ptr = nullptr; gpu_info->sent_to_gpu = false; } - //allocate and set gpu_ptr and grid, does not initialize memory, should not be called if memory is already allocated - void alloc_and_set_gpu(size_t sz) const { - if(gpu_info == nullptr) + // allocate and set gpu_ptr and grid, does not initialize memory, should not be called if memory is already allocated + void alloc_and_set_gpu(size_t sz) const + { + if (gpu_info == nullptr) throw std::runtime_error("Attempt to allocate gpu memory in empty ManagedGrid"); - if(gpu_info->gpu_ptr != nullptr) { + if (gpu_info->gpu_ptr != nullptr) + { throw std::runtime_error("Attempt to reallocate gpu memory in ManagedGrid"); } - //we are not actually using unified memory, but this make debugging easier? - cudaError_t err = cudaMalloc(&gpu_info->gpu_ptr,sz*sizeof(Dtype)); + // we are not actually using unified memory, but this make debugging easier? + cudaError_t err = cudaMalloc(&gpu_info->gpu_ptr, sz * sizeof(Dtype)); cudaGetLastError(); - if(err != cudaSuccess) { - throw std::runtime_error("Could not allocate "+itoa(sz*sizeof(Dtype))+" bytes of GPU memory in ManagedGrid"); + if (err != cudaSuccess) + { + throw std::runtime_error("Could not allocate " + itoa(sz * sizeof(Dtype)) + " bytes of GPU memory in ManagedGrid"); } gpu_grid.set_buffer(gpu_info->gpu_ptr); } - template::type> - ManagedGridBase(I... sizes): gpu_grid(nullptr, sizes...), cpu_grid(nullptr, sizes...) { - //allocate buffer + template ::type> + ManagedGridBase(I... sizes) : gpu_grid(nullptr, sizes...), cpu_grid(nullptr, sizes...) + { + // allocate buffer + capacity = this->size(); + alloc_and_set_cpu(capacity); // even with capacity == 0 need to allocate sent_to_gpu + memset(cpu_ptr.get(), 0, capacity * sizeof(Dtype)); + gpu_info->sent_to_gpu = false; + } + + ManagedGridBase(size_t *sizes): gpu_grid(nullptr, sizes), cpu_grid(nullptr, sizes) + { + // allocate buffer capacity = this->size(); - alloc_and_set_cpu(capacity); //even with capacity == 0 need to allocate sent_to_gpu - memset(cpu_ptr.get(), 0, capacity*sizeof(Dtype)); + alloc_and_set_cpu(capacity); // even with capacity == 0 need to allocate sent_to_gpu + memset(cpu_ptr.get(), 0, capacity * sizeof(Dtype)); gpu_info->sent_to_gpu = false; } - //helper for clone, allocate new memory and copies contents of current ptr into it - void clone_ptrs() { - if(capacity == 0) { + + // helper for clone, allocate new memory and copies contents of current ptr into it + void clone_ptrs() + { + if (capacity == 0) + { return; } - //duplicate cpu memory and set sent_to_gpu + // duplicate cpu memory and set sent_to_gpu std::shared_ptr old = cpu_ptr; buffer_data oldgpu = *gpu_info; alloc_and_set_cpu(capacity); - memcpy(cpu_ptr.get(), old.get(), sizeof(Dtype)*capacity); + memcpy(cpu_ptr.get(), old.get(), sizeof(Dtype) * capacity); gpu_info->sent_to_gpu = oldgpu.sent_to_gpu; - //if allocated, duplicate gpu, but only if it is active - if(oldgpu.gpu_ptr && oldgpu.sent_to_gpu) { + // if allocated, duplicate gpu, but only if it is active + if (oldgpu.gpu_ptr && oldgpu.sent_to_gpu) + { alloc_and_set_gpu(capacity); - LMG_CUDA_CHECK(cudaMemcpy(gpu_info->gpu_ptr, oldgpu.gpu_ptr, sizeof(Dtype)*capacity, cudaMemcpyDeviceToDevice)); + LMG_CUDA_CHECK(cudaMemcpy(gpu_info->gpu_ptr, oldgpu.gpu_ptr, sizeof(Dtype) * capacity, cudaMemcpyDeviceToDevice)); } } - public: + public: /// dimensions along each axis - inline const size_t * dimensions() const { return cpu_grid.dimensions(); } + inline const size_t *dimensions() const { return cpu_grid.dimensions(); } /// dimensions along specified axis inline size_t dimension(size_t i) const { return cpu_grid.dimension(i); } /// offset for each dimension, all indexing calculations use this - inline const size_t * offsets() const { return cpu_grid.offsets(); } + inline const size_t *offsets() const { return cpu_grid.offsets(); } /// offset for each dimension, all indexing calculations use this inline size_t offset(size_t i) const { return cpu_grid.offset(i); } @@ -134,109 +155,153 @@ class ManagedGridBase { inline size_t size() const { return cpu_grid.size(); } /// set contents to zero - inline void fill_zero() { - if(ongpu()) gpu_grid.fill_zero(); - else cpu_grid.fill_zero(); + inline void fill_zero() + { + if (ongpu()) + gpu_grid.fill_zero(); + else + cpu_grid.fill_zero(); } /** \brief Initializer list indexing * */ - template - inline Dtype& operator()(I... indices) { + template + inline Dtype &operator()(I... indices) + { tocpu(); return cpu_grid(indices...); } - template - inline Dtype operator()(I... indices) const { + template + inline Dtype operator()(I... indices) const + { tocpu(); return cpu_grid(indices...); } /** \brief Copy data into dest. Should be same size, but will narrow if needed */ - size_t copyTo(cpu_grid_t& dest) const { + size_t copyTo(cpu_grid_t &dest) const + { size_t sz = std::min(size(), dest.size()); - if(sz == 0) return 0; - if(ongpu()) { - LMG_CUDA_CHECK(cudaMemcpy(dest.data(), gpu_grid.data(), sz*sizeof(Dtype), cudaMemcpyDeviceToHost)); - } else { //host ot host - memcpy(dest.data(),cpu_grid.data(),sz*sizeof(Dtype)); + if (sz == 0) + return 0; + if (ongpu()) + { + LMG_CUDA_CHECK(cudaMemcpy(dest.data(), gpu_grid.data(), sz * sizeof(Dtype), cudaMemcpyDeviceToHost)); + } + else + { // host ot host + memcpy(dest.data(), cpu_grid.data(), sz * sizeof(Dtype)); } return sz; } /** \brief Copy data into dest. Should be same size, but will narrow if needed */ - size_t copyTo(gpu_grid_t& dest) const { + size_t copyTo(gpu_grid_t &dest) const + { size_t sz = std::min(size(), dest.size()); - if(sz == 0) return 0; - if(ongpu()) { - LMG_CUDA_CHECK(cudaMemcpy(dest.data(),gpu_grid.data(),sz*sizeof(Dtype),cudaMemcpyDeviceToDevice)); - } else { - LMG_CUDA_CHECK(cudaMemcpy(dest.data(),cpu_grid.data(),sz*sizeof(Dtype),cudaMemcpyHostToDevice)); + if (sz == 0) + return 0; + if (ongpu()) + { + LMG_CUDA_CHECK(cudaMemcpy(dest.data(), gpu_grid.data(), sz * sizeof(Dtype), cudaMemcpyDeviceToDevice)); + } + else + { + LMG_CUDA_CHECK(cudaMemcpy(dest.data(), cpu_grid.data(), sz * sizeof(Dtype), cudaMemcpyHostToDevice)); } return sz; } /** \brief Copy data into dest. Should be same size, but will narrow if needed */ - size_t copyTo(ManagedGridBase& dest) const { - if(dest.ongpu()) { + size_t copyTo(ManagedGridBase &dest) const + { + if (dest.ongpu()) + { return copyTo(dest.gpu_grid); - } else { + } + else + { return copyTo(dest.cpu_grid); } } /** \brief Copy data from src. Should be same size, but will narrow if needed */ - size_t copyFrom(const cpu_grid_t& src) { + size_t copyFrom(const cpu_grid_t &src) + { size_t sz = std::min(size(), src.size()); - if(sz == 0) return 0; - if(ongpu()) { - LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data(), src.data(), sz*sizeof(Dtype), cudaMemcpyHostToDevice)); - } else { - memcpy(cpu_grid.data(),src.data(),sz*sizeof(Dtype)); + if (sz == 0) + return 0; + if (ongpu()) + { + LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data(), src.data(), sz * sizeof(Dtype), cudaMemcpyHostToDevice)); + } + else + { + memcpy(cpu_grid.data(), src.data(), sz * sizeof(Dtype)); } return sz; } /** \brief Copy data from src. Should be same size, but will narrow if needed */ - size_t copyFrom(const gpu_grid_t& src) { + size_t copyFrom(const gpu_grid_t &src) + { size_t sz = std::min(size(), src.size()); - if(sz == 0) return 0; - if(ongpu()) { - LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data(),src.data(),sz*sizeof(Dtype),cudaMemcpyDeviceToDevice)); - } else { - LMG_CUDA_CHECK(cudaMemcpy(cpu_grid.data(),src.data(),sz*sizeof(Dtype),cudaMemcpyDeviceToHost)); + if (sz == 0) + return 0; + if (ongpu()) + { + LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data(), src.data(), sz * sizeof(Dtype), cudaMemcpyDeviceToDevice)); + } + else + { + LMG_CUDA_CHECK(cudaMemcpy(cpu_grid.data(), src.data(), sz * sizeof(Dtype), cudaMemcpyDeviceToHost)); } return sz; } /** \brief Copy data from src. Should be same size, but will narrow if needed */ - size_t copyFrom(const ManagedGridBase& src) { - if(src.ongpu()) { + size_t copyFrom(const ManagedGridBase &src) + { + if (src.ongpu()) + { return copyFrom(src.gpu_grid); - } else { //on host + } + else + { // on host return copyFrom(src.cpu_grid); } } /** \brief Copy data from src into this starting at start. Should be same size, but will narrow if needed */ - size_t copyInto(size_t start, const ManagedGridBase& src) { - size_t off = offset(0)*start; - size_t sz = size()-off; + size_t copyInto(size_t start, const ManagedGridBase &src) + { + size_t off = offset(0) * start; + size_t sz = size() - off; sz = std::min(sz, src.size()); - if(sz == 0) return 0; - if(src.ongpu()) { - if(ongpu()) { - LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data()+off,src.gpu_grid.data(),sz*sizeof(Dtype),cudaMemcpyDeviceToDevice)); - } else { - LMG_CUDA_CHECK(cudaMemcpy(cpu_grid.data()+off,src.gpu_grid.data(),sz*sizeof(Dtype),cudaMemcpyDeviceToHost)); + if (sz == 0) + return 0; + if (src.ongpu()) + { + if (ongpu()) + { + LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data() + off, src.gpu_grid.data(), sz * sizeof(Dtype), cudaMemcpyDeviceToDevice)); + } + else + { + LMG_CUDA_CHECK(cudaMemcpy(cpu_grid.data() + off, src.gpu_grid.data(), sz * sizeof(Dtype), cudaMemcpyDeviceToHost)); + } + } + else + { // on host + if (ongpu()) + { + LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data() + off, src.data(), sz * sizeof(Dtype), cudaMemcpyHostToDevice)); } - } else { //on host - if(ongpu()) { - LMG_CUDA_CHECK(cudaMemcpy(gpu_grid.data()+off, src.data(), sz*sizeof(Dtype), cudaMemcpyHostToDevice)); - } else { - memcpy(cpu_grid.data()+off,src.data(),sz*sizeof(Dtype)); + else + { + memcpy(cpu_grid.data() + off, src.data(), sz * sizeof(Dtype)); } } return sz; @@ -248,22 +313,29 @@ class ManagedGridBase { * This function is provided so code can be optimized to avoid unnecessary allocations and * should be used carefully. */ - template::type> - ManagedGrid resized(I... sizes) { + template ::type> + ManagedGrid resized(I... sizes) + { cpu_grid_t g(nullptr, sizes...); - if(g.size() <= capacity) { - //no need to allocate and copy; capacity stays the same + if (g.size() <= capacity) + { + // no need to allocate and copy; capacity stays the same ManagedGrid tmp; tmp.cpu_ptr = cpu_ptr; tmp.gpu_info = gpu_info; tmp.cpu_grid = cpu_grid_t(cpu_ptr.get(), sizes...); - if(gpu_info) tmp.gpu_grid = gpu_grid_t(gpu_info->gpu_ptr, sizes...); + if (gpu_info) + tmp.gpu_grid = gpu_grid_t(gpu_info->gpu_ptr, sizes...); tmp.capacity = capacity; return tmp; - } else { + } + else + { ManagedGrid tmp(sizes...); - if(size() > 0 && tmp.size() > 0) { - if(ongpu()) tmp.togpu(); //allocate gpu memory + if (size() > 0 && tmp.size() > 0) + { + if (ongpu()) + tmp.togpu(); // allocate gpu memory copyTo(tmp); } return tmp; @@ -273,102 +345,143 @@ class ManagedGridBase { /** \brief Return GPU Grid view. Host code should not access the grid * until the GPU code is complete. */ - const gpu_grid_t& gpu() const { togpu(); return gpu_grid; } - gpu_grid_t& gpu() { togpu(); return gpu_grid; } + const gpu_grid_t &gpu() const + { + togpu(); + return gpu_grid; + } + gpu_grid_t &gpu() + { + togpu(); + return gpu_grid; + } /** \brief Return CPU Grid view. GPU code should no longer access this memory. */ - const cpu_grid_t& cpu() const { tocpu(); return cpu_grid; } - cpu_grid_t& cpu() { tocpu(); return cpu_grid; } + const cpu_grid_t &cpu() const + { + tocpu(); + return cpu_grid; + } + cpu_grid_t &cpu() + { + tocpu(); + return cpu_grid; + } /** \brief Transfer data to GPU */ - void togpu(bool dotransfer=true) const { - if(capacity == 0) return; - //check that memory is allocated - even if data is on gpu, may still need to set this mgrid's gpu_grid - if(gpu_grid.data() == nullptr) { - if(gpu_info->gpu_ptr == nullptr) { + void togpu(bool dotransfer = true) const + { + if (capacity == 0) + return; + // check that memory is allocated - even if data is on gpu, may still need to set this mgrid's gpu_grid + if (gpu_grid.data() == nullptr) + { + if (gpu_info->gpu_ptr == nullptr) + { alloc_and_set_gpu(capacity); - } //otherwise some other copy has already allocated memory, just need to set - size_t offset = cpu_grid.data() - cpu_ptr.get(); //might be subgrid - gpu_grid.set_buffer(gpu_info->gpu_ptr+offset); + } // otherwise some other copy has already allocated memory, just need to set + size_t offset = cpu_grid.data() - cpu_ptr.get(); // might be subgrid + gpu_grid.set_buffer(gpu_info->gpu_ptr + offset); } - if(oncpu() && dotransfer) { - LMG_CUDA_CHECK(cudaMemcpy(gpu_info->gpu_ptr,cpu_ptr.get(),capacity*sizeof(Dtype),cudaMemcpyHostToDevice)); + if (oncpu() && dotransfer) + { + LMG_CUDA_CHECK(cudaMemcpy(gpu_info->gpu_ptr, cpu_ptr.get(), capacity * sizeof(Dtype), cudaMemcpyHostToDevice)); } - if(gpu_info) gpu_info->sent_to_gpu = true; + if (gpu_info) + gpu_info->sent_to_gpu = true; } /** \brief Transfer data to CPU. If not dotransfer, data is not copied back. */ - void tocpu(bool dotransfer=true) const { - if(ongpu() && capacity > 0 && dotransfer) { - LMG_CUDA_CHECK(cudaMemcpy(cpu_ptr.get(),gpu_info->gpu_ptr,capacity*sizeof(Dtype),cudaMemcpyDeviceToHost)); + void tocpu(bool dotransfer = true) const + { + if (ongpu() && capacity > 0 && dotransfer) + { + LMG_CUDA_CHECK(cudaMemcpy(cpu_ptr.get(), gpu_info->gpu_ptr, capacity * sizeof(Dtype), cudaMemcpyDeviceToHost)); } - if(gpu_info) gpu_info->sent_to_gpu = false; + if (gpu_info) + gpu_info->sent_to_gpu = false; } /** \brief Return true if memory is currently on GPU */ - bool ongpu() const { + bool ongpu() const + { bool ret = gpu_info && gpu_info->sent_to_gpu; - if(ret && gpu_grid.data() == nullptr) togpu(); //needed if this is a copy made before another transfered + if (ret && gpu_grid.data() == nullptr) + togpu(); // needed if this is a copy made before another transfered return ret; } /** \brief Return true if memory is currently on CPU */ bool oncpu() const { return gpu_info == nullptr || !gpu_info->sent_to_gpu; } - operator cpu_grid_t() const { return cpu(); } - operator cpu_grid_t&() {return cpu(); } + operator cpu_grid_t &() { return cpu(); } operator gpu_grid_t() const { return gpu(); } - operator gpu_grid_t&() {return gpu(); } + operator gpu_grid_t &() { return gpu(); } /// Return pointer to CPU data - inline const Dtype * data() const { tocpu(); return cpu().data(); } - inline Dtype * data() { tocpu(); return cpu().data(); } + inline const Dtype *data() const + { + tocpu(); + return cpu().data(); + } + inline Dtype *data() + { + tocpu(); + return cpu().data(); + } - //pointer equality - bool operator==(const ManagedGridBase& rhs) const { + // pointer equality + bool operator==(const ManagedGridBase &rhs) const + { return cpu_ptr == rhs.cpu_ptr; } + protected: // constructor used by operator[] - friend ManagedGridBase; - explicit ManagedGridBase(const ManagedGridBase& G, size_t i): - gpu_grid(G.gpu_grid, i), cpu_grid(G.cpu_grid, i), cpu_ptr(G.cpu_ptr), - capacity(G.capacity), gpu_info(G.gpu_info) {} -}; - -/** \brief A dense grid whose memory is managed by the class. - * - * Memory is allocated as unified memory so it can be safely accessed - * from either the CPU or GPU. Note that while the memory is accessible - * on the GPU, ManagedGrid objects should only be used directly on the host. - * Device code should use a Grid view of the ManagedGrid. Accessing ManagedGrid - * data from the host while the GPU is writing to the grid will result in - * undefined behavior. - * - * If CUDA fails to allocate unified memory (presumably due to lack of GPUs), - * host-only memory will be used instead. - * - * Grid is a container class of ManagedGrid instead of a base class. Explicit - * transformation to a Grid view is required to clearly indicate how the memory - * should be accessed. This can be done with cpu and gpu methods or by an - * explicit cast to Grid. - * - * There are two class specialization to support bracket indexing. - */ -template -class ManagedGrid : public ManagedGridBase { + friend ManagedGridBase; + explicit ManagedGridBase(const ManagedGridBase &G, size_t i) : gpu_grid(G.gpu_grid, i), cpu_grid(G.cpu_grid, i), cpu_ptr(G.cpu_ptr), + capacity(G.capacity), gpu_info(G.gpu_info) {} + }; + + /** \brief A dense grid whose memory is managed by the class. + * + * Memory is allocated as unified memory so it can be safely accessed + * from either the CPU or GPU. Note that while the memory is accessible + * on the GPU, ManagedGrid objects should only be used directly on the host. + * Device code should use a Grid view of the ManagedGrid. Accessing ManagedGrid + * data from the host while the GPU is writing to the grid will result in + * undefined behavior. + * + * If CUDA fails to allocate unified memory (presumably due to lack of GPUs), + * host-only memory will be used instead. + * + * Grid is a container class of ManagedGrid instead of a base class. Explicit + * transformation to a Grid view is required to clearly indicate how the memory + * should be accessed. This can be done with cpu and gpu methods or by an + * explicit cast to Grid. + * + * There are two class specialization to support bracket indexing. + */ + template + class ManagedGrid : public ManagedGridBase + { + protected: + ManagedGrid(size_t *sizes) : ManagedGridBase(sizes) + { + } public: - using subgrid_t = ManagedGrid; + using subgrid_t = ManagedGrid; using base_t = ManagedGridBase; - //empty, unusable grid + // empty, unusable grid ManagedGrid() = default; - template::type> - ManagedGrid(I... sizes): ManagedGridBase(sizes...) { + template ::type> + ManagedGrid(I... sizes) : ManagedGridBase(sizes...) + { } /** \brief Bracket indexing. @@ -377,79 +490,128 @@ class ManagedGrid : public ManagedGridBase { * but not maximally efficient (unless the compiler is really good). * Use operator() for fastest (but unchecked) access or access data directly. */ - subgrid_t operator[](size_t i) const { - if(i >= this->cpu_grid.dimension(0)) - throw std::out_of_range("Index "+boost::lexical_cast(i)+" out of bounds of dimension "+boost::lexical_cast(this->cpu_grid.dimension(0))); - return ManagedGrid(*static_cast *>(this), i); + subgrid_t operator[](size_t i) const + { + if (i >= this->cpu_grid.dimension(0)) + throw std::out_of_range("Index " + boost::lexical_cast(i) + " out of bounds of dimension " + boost::lexical_cast(this->cpu_grid.dimension(0))); + return ManagedGrid(*static_cast *>(this), i); } /** \brief Return a copy of this grid */ - ManagedGrid clone() const { + ManagedGrid clone() const + { ManagedGrid ret(*this); ret.clone_ptrs(); return ret; } - protected: - //only called from regular Grid - friend ManagedGrid; - explicit ManagedGrid(const ManagedGridBase& G, size_t i): - ManagedGridBase(G, i) {} -}; + template + void save(Archive &ar, const unsigned int version) const + { + this->tocpu(); + for (size_t i = 0; i < NumDims; i++) + { + ar << this->dimension(i); + } + ar << boost::serialization::make_array(this->data(), this->size()); + } + + template + void load(Archive &ar, const unsigned int version) + { + size_t newdims[NumDims] = {0,}; + for (size_t i = 0; i < NumDims; i++) + { + ar >> newdims[i]; + } + ManagedGrid tmp(newdims); + ar >> boost::serialization::make_array(tmp.data(), tmp.size()); + *this = tmp; + } + BOOST_SERIALIZATION_SPLIT_MEMBER() -// class specialization of managed grid to make final operator[] return scalar -template -class ManagedGrid : public ManagedGridBase { + protected: + // only called from regular Grid + friend ManagedGrid; + explicit ManagedGrid(const ManagedGridBase &G, size_t i) : ManagedGridBase(G, i) {} + }; + + // class specialization of managed grid to make final operator[] return scalar + template + class ManagedGrid : public ManagedGridBase + { public: using subgrid_t = Dtype; using base_t = ManagedGridBase; ManagedGrid() = default; - ManagedGrid(size_t sz): ManagedGridBase(sz) { + ManagedGrid(size_t sz) : ManagedGridBase(sz) + { } - inline Dtype& operator[](size_t i) { + inline Dtype &operator[](size_t i) + { this->tocpu(); return this->cpu_grid[i]; } - inline Dtype operator[](size_t i) const { + inline Dtype operator[](size_t i) const + { this->tocpu(); return this->cpu_grid[i]; } - inline Dtype& operator()(size_t a) { + inline Dtype &operator()(size_t a) + { this->tocpu(); return this->cpu_grid(a); } - inline Dtype operator()(size_t a) const { + inline Dtype operator()(size_t a) const + { this->tocpu(); return this->cpu_grid(a); } - ManagedGrid clone() const { + ManagedGrid clone() const + { ManagedGrid ret(*this); ret.clone_ptrs(); return ret; } - protected: - //only called from regular Grid - friend ManagedGrid; - explicit ManagedGrid(const ManagedGridBase& G, size_t i): - ManagedGridBase(G, i) {} + template + void save(Archive &ar, const unsigned int version) const + { + this->tocpu(); + ar << this->dimension(0); + ar << boost::serialization::make_array(this->data(), this->size()); + } -}; + template + void load(Archive &ar, const unsigned int version) + { + size_t newdim = 0; + ar >> newdim; + ManagedGrid tmp(newdim); + ar >> boost::serialization::make_array(tmp.data(), tmp.size()); + *this = tmp; + } + BOOST_SERIALIZATION_SPLIT_MEMBER() -#define EXPAND_MGRID_DEFINITIONS(Z,SIZE,_) \ -typedef ManagedGrid MGrid##SIZE##f; \ -typedef ManagedGrid MGrid##SIZE##d; + protected: + // only called from regular Grid + friend ManagedGrid; + explicit ManagedGrid(const ManagedGridBase &G, size_t i) : ManagedGridBase(G, i) {} + }; +#define EXPAND_MGRID_DEFINITIONS(Z, SIZE, _) \ + typedef ManagedGrid MGrid##SIZE##f; \ + typedef ManagedGrid MGrid##SIZE##d; -BOOST_PP_REPEAT_FROM_TO(1,LIBMOLGRID_MAX_GRID_DIM, EXPAND_MGRID_DEFINITIONS, 0); + BOOST_PP_REPEAT_FROM_TO(1, LIBMOLGRID_MAX_GRID_DIM, EXPAND_MGRID_DEFINITIONS, 0); } #endif /* MANAGED_GRID_H_ */ diff --git a/include/libmolgrid/string_cache.h b/include/libmolgrid/string_cache.h new file mode 100644 index 0000000..fe37ac6 --- /dev/null +++ b/include/libmolgrid/string_cache.h @@ -0,0 +1,30 @@ +/** \file string_cache.h + * + * Class for avoiding duplicate memory allocation of identical strings. + * Use const char* + */ + +#ifndef STRING_CACHE_H_ +#define STRING_CACHE_H_ + +#include +#include + +namespace libmolgrid { + +//for memory efficiency, only store a given string once and use the const char* +class StringCache { + std::unordered_set strings; +public: + const char* get(const std::string& s) + { + strings.insert(s); + //we assume even as the set is resized that strings never get allocated + return strings.find(s)->c_str(); + } +}; + +extern StringCache string_cache; + +}; //namespace libmolgrid +#endif // STRING_CACHE_H_ \ No newline at end of file diff --git a/python/bindings.h b/python/bindings.h index 592bbea..6fe619a 100644 --- a/python/bindings.h +++ b/python/bindings.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/python/bindings.in.cpp b/python/bindings.in.cpp index 0549560..00319b3 100644 --- a/python/bindings.in.cpp +++ b/python/bindings.in.cpp @@ -768,6 +768,9 @@ MAKE_ALL_GRIDS() .def("get_large_epoch_num", &ExampleProvider::get_large_epoch_num, "Return large epoch number, where an epoch means every example has been seen at LEAST once.") .def("small_epoch_size", &ExampleProvider::small_epoch_size,"Return size of small epoch") .def("large_epoch_size", &ExampleProvider::large_epoch_size,"Return size of large epoch") + .def("mem_caches_size", &ExampleProvider::mem_caches_size, "Return number of elements currently cached in-memory.") + .def("save_mem_caches", static_cast(&ExampleProvider::save_mem_caches),arg("file_name"),"Write current in-memory cache state to specified file.") + .def("load_mem_caches", static_cast(&ExampleProvider::load_mem_caches),arg("file_name"),"Read specified file into in-memory cache.") .def("reset", &ExampleProvider::reset, "Reset iterator to beginning") .def("__iter__", +[](object self) { return self;}) .def("__next__", +[](ExampleProvider& self) -> std::vector { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dc047e6..8aa92d6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ set( LIBMOLGRID_SOURCES grid_interpolater.cpp grid_interpolater.cu cartesian_grid.cpp + string_cache.cpp ) set( LIBMOLGRID_HEADERS @@ -39,6 +40,7 @@ set( LIBMOLGRID_HEADERS ../include/libmolgrid/common.h ../include/libmolgrid/grid_io.h ../include/libmolgrid/cartesian_grid.h + ../include/libmolgrid/string_cache.h ) set ( LIBMOLGRID_HEADERS ${LIBMOLGRID_HEADERS} PARENT_SCOPE) diff --git a/src/coord_cache.cpp b/src/coord_cache.cpp index e1bd36f..d285ce2 100644 --- a/src/coord_cache.cpp +++ b/src/coord_cache.cpp @@ -6,13 +6,13 @@ */ #include "libmolgrid/coord_cache.h" +#include "libmolgrid/string_cache.h" #include -#include #include -#include +#include #include - +#include namespace libmolgrid { @@ -21,135 +21,139 @@ using namespace OpenBabel; unsigned do_not_optimize_away; -//read in molcache if present -CoordCache::CoordCache(std::shared_ptr t, const ExampleProviderSettings& settings, - const std::string& mc): typer(t), data_root(settings.data_root), molcache(mc), - use_cache(settings.cache_structs), addh(settings.add_hydrogens), make_vector_types(settings.make_vector_types) { - if(molcache.length() > 0) { +// read in molcache if present +CoordCache::CoordCache(std::shared_ptr t, + const ExampleProviderSettings &settings, + const std::string &mc) + : typer(t), data_root(settings.data_root), molcache(mc), + use_cache(settings.cache_structs), addh(settings.add_hydrogens), + make_vector_types(settings.make_vector_types) { + if (molcache.length() > 0) { static_assert(sizeof(size_t) == 8, "size_t must be 8 bytes"); - if(!boost::filesystem::exists(molcache)) { - //try looking in dataroot + if (!boost::filesystem::exists(molcache)) { + // try looking in dataroot molcache = (boost::filesystem::path(data_root) / molcache).string(); } ifstream mcache(molcache.c_str()); - if(!mcache) throw invalid_argument("Could not open file: "+molcache); + if (!mcache) + throw invalid_argument("Could not open file: " + molcache); int version = 0; - mcache.read((char*)&version,sizeof(int)); - if(version != -1) { - throw invalid_argument(molcache+" is not a valid molcache2 file"); + mcache.read((char *)&version, sizeof(int)); + if (version != -1) { + throw invalid_argument(molcache + " is not a valid molcache2 file"); } size_t start = 0; - mcache.read((char*)&start, sizeof(size_t)); + mcache.read((char *)&start, sizeof(size_t)); mcache.seekg(start); - //open memory map for moldata - cache_map.open(molcache.c_str(),start); - if(!cache_map.is_open()) throw logic_error("Could not memory map "+molcache); + // open memory map for moldata + cache_map.open(molcache.c_str(), start); + if (!cache_map.is_open()) + throw logic_error("Could not memory map " + molcache); - //read in name + offset + // read in name + offset unsigned char len = 0; char molname[256]; size_t offset = 0; - while(mcache.read((char*)&len, 1)) { - //read name then offset + while (mcache.read((char *)&len, 1)) { + // read name then offset mcache.read(molname, len); molname[len] = 0; - mcache.read((char*)&offset, sizeof(size_t)); + mcache.read((char *)&offset, sizeof(size_t)); string mname(molname); offsets[string_cache.get(mname)] = offset; } - //prefetch into file cache + // prefetch into file cache unsigned sum = 0; - for(unsigned i = 0, n = cache_map.size(); i < n; i += 1024) { + for (unsigned i = 0, n = cache_map.size(); i < n; i += 1024) { sum += cache_map.data()[i]; } do_not_optimize_away = sum; } - } -//set coords using the cache -void CoordCache::set_coords(const char *fname, CoordinateSet& coord) { +// set coords using the cache +void CoordCache::set_coords(const char *fname, CoordinateSet &coord) { struct info { - float x,y,z; + float x, y, z; int type; }; - if(offsets.count(fname)) { + if (offsets.count(fname)) { size_t off = offsets[fname]; - const char *data = cache_map.data()+off; - unsigned natoms = *(unsigned*)data; - info *atoms = (info*)(data+sizeof(unsigned)); + const char *data = cache_map.data() + off; + unsigned natoms = *(unsigned *)data; + info *atoms = (info *)(data + sizeof(unsigned)); - if(typer->is_vector_typer()) + if (typer->is_vector_typer()) throw invalid_argument("Vector typer used with molcache files"); - vector c; c.reserve(natoms); - vector r; r.reserve(natoms); - vector t; t.reserve(natoms); - for(unsigned i = 0; i < natoms; i++) - { - info& atom = atoms[i]; + vector c; + c.reserve(natoms); + vector r; + r.reserve(natoms); + vector t; + t.reserve(natoms); + for (unsigned i = 0; i < natoms; i++) { + info &atom = atoms[i]; auto t_r = typer->get_int_type(atom.type); - if(t_r.first >= 0) { //ignore neg + if (t_r.first >= 0) { // ignore neg t.push_back(t_r.first); r.push_back(t_r.second); - c.push_back(make_float3(atom.x,atom.y,atom.z)); + c.push_back(make_float3(atom.x, atom.y, atom.z)); } } coord = CoordinateSet(c, t, r, typer->num_types()); coord.src = fname; - } - else if(memcache.count(fname)) { - coord = memcache[fname].clone(); //always copy out of cache + } else if (memcache.count(fname)) { + coord = memcache[fname].clone(); // always copy out of cache } else { std::string fullname = fname; - if(data_root.length()) { - boost::filesystem::path p = boost::filesystem::path(data_root) / boost::filesystem::path(fname); + if (data_root.length()) { + boost::filesystem::path p = + boost::filesystem::path(data_root) / boost::filesystem::path(fname); fullname = p.string(); } - //check for custom gninatypes file - if(boost::algorithm::ends_with(fname,".gninatypes")) - { - if(typer->is_vector_typer()) + // check for custom gninatypes file + if (boost::algorithm::ends_with(fname, ".gninatypes")) { + if (typer->is_vector_typer()) throw invalid_argument("Vector typer used with gninatypes files"); ifstream in(fullname.c_str()); - if(!in) throw invalid_argument("Could not read "+fullname); + if (!in) + throw invalid_argument("Could not read " + fullname); vector c; vector r; vector t; info atom; - while(in.read((char*)&atom, sizeof(atom))) - { + while (in.read((char *)&atom, sizeof(atom))) { auto t_r = typer->get_int_type(atom.type); - if(t_r.first >= 0) { //ignore neg + if (t_r.first >= 0) { // ignore neg t.push_back(t_r.first); r.push_back(t_r.second); - c.push_back(make_float3(atom.x,atom.y,atom.z)); + c.push_back(make_float3(atom.x, atom.y, atom.z)); } } coord = CoordinateSet(c, t, r, typer->num_types()); coord.src = fname; - } - else if(!boost::algorithm::ends_with(fname,"none")) //reserved word + } else if (!boost::algorithm::ends_with(fname, "none")) // reserved word { - //read mol from file and set mol info (atom coords and grid positions) + // read mol from file and set mol info (atom coords and grid positions) OBConversion conv; OBMol mol; - if(!conv.ReadFile(&mol, fullname.c_str())) + if (!conv.ReadFile(&mol, fullname.c_str())) throw invalid_argument("Could not read " + fullname); - if(addh) { + if (addh) { mol.AddHydrogens(); } @@ -157,18 +161,38 @@ void CoordCache::set_coords(const char *fname, CoordinateSet& coord) { coord.src = fname; } else { coord = CoordinateSet(); - coord.max_type = typer->num_types(); //empty, but include type size + coord.max_type = typer->num_types(); // empty, but include type size } - AtomIndexTyper *ityper = dynamic_cast(typer.get()); - if(make_vector_types && ityper) { + AtomIndexTyper *ityper = dynamic_cast(typer.get()); + if (make_vector_types && ityper) { coord.make_vector_types(false, ityper->get_type_radii()); } - if(use_cache) { //save coord - memcache[fname] = coord.clone(); //save a copy in case the returned set is modified + if (use_cache) { // save coord + memcache[fname] = + coord.clone(); // save a copy in case the returned set is modified } } } +void CoordCache::save_mem_cache(std::ostream &out) const { + boost::archive::binary_oarchive oarch(out); + + // must convert to std::string as storing pointers makes no sense + std::unordered_map tmp; + tmp.insert(memcache.begin(), memcache.end()); + oarch << tmp; +} + + +void CoordCache::load_mem_cache(std::istream &in) { + boost::archive::binary_iarchive iarch(in); + std::unordered_map tmp; + iarch >> tmp; + for (const auto& kv : tmp) { + const char *fname = string_cache.get(kv.first); + memcache[fname] = kv.second; + } +} } /* namespace libmolgrid */ diff --git a/src/example.cpp b/src/example.cpp index 30fb648..4ef2850 100644 --- a/src/example.cpp +++ b/src/example.cpp @@ -17,8 +17,6 @@ namespace libmolgrid { using namespace std; -StringCache string_cache; - size_t Example::num_coordinates() const { unsigned N = 0; for(unsigned i = 0, n = sets.size(); i < n; i++) { diff --git a/src/example_extractor.cpp b/src/example_extractor.cpp index 527c5ba..dc1c134 100644 --- a/src/example_extractor.cpp +++ b/src/example_extractor.cpp @@ -10,59 +10,63 @@ #include #include -#include #include +#include namespace libmolgrid { using namespace std; using namespace OpenBabel; -void ExampleExtractor::extract(const ExampleRef& ref, Example& ex) { - ex.labels = ref.labels; //the easy part +void ExampleExtractor::extract(const ExampleRef &ref, Example &ex) { + ex.labels = ref.labels; // the easy part ex.group = ref.group; ex.seqcont = ref.seqcont; - //for each file in ref, get a coordinate set using the matching typer + // for each file in ref, get a coordinate set using the matching typer ex.sets.clear(); - if(!duplicate_poses || ref.files.size() < 3) { + if (!duplicate_poses || ref.files.size() < 3) { ex.sets.resize(ref.files.size()); - for(unsigned i = 0, n = ref.files.size(); i < n; i++) { - const char* fname = ref.files[i]; + for (unsigned i = 0, n = ref.files.size(); i < n; i++) { + const char *fname = ref.files[i]; unsigned t = i; - if(t >= coord_caches.size()) t = coord_caches.size()-1; //repeat last typer if necessary + if (t >= coord_caches.size()) + t = coord_caches.size() - 1; // repeat last typer if necessary coord_caches[t].set_coords(fname, ex.sets[i]); } - } else { //duplicate first pose (receptor) to match each of the remaining poses + } else { // duplicate first pose (receptor) to match each of the remaining + // poses unsigned N = ref.files.size() - 1; - ex.sets.resize(N*2); + ex.sets.resize(N * 2); coord_caches[0].set_coords(ref.files[0], ex.sets[0]); - for(unsigned i = 1, n = ref.files.size(); i < n; i++) { - const char* fname = ref.files[i]; + for (unsigned i = 1, n = ref.files.size(); i < n; i++) { + const char *fname = ref.files[i]; unsigned t = i; - if(t >= coord_caches.size()) t = coord_caches.size()-1; //repeat last typer if necessary - coord_caches[t].set_coords(fname, ex.sets[2*(i-1)+1]); + if (t >= coord_caches.size()) + t = coord_caches.size() - 1; // repeat last typer if necessary + coord_caches[t].set_coords(fname, ex.sets[2 * (i - 1) + 1]); - //duplicate receptor by copying - if(i > 1) ex.sets[2*(i-1)] = ex.sets[0]; + // duplicate receptor by copying + if (i > 1) + ex.sets[2 * (i - 1)] = ex.sets[0]; } } - } -//assume there are n files, return number oftypes +// assume there are n files, return number oftypes size_t ExampleExtractor::count_types(unsigned n) const { size_t ret = 0; - for(unsigned i = 0; i < n; i++) { + for (unsigned i = 0; i < n; i++) { unsigned t = i; - if(t >= coord_caches.size()) t = coord_caches.size()-1; + if (t >= coord_caches.size()) + t = coord_caches.size() - 1; ret += coord_caches[t].num_types(); } - if(duplicate_poses && coord_caches.size() > 2) { + if (duplicate_poses && coord_caches.size() > 2) { size_t rsize = coord_caches[0].num_types(); size_t dups = coord_caches.size() - 2; - ret += rsize*dups; + ret += rsize * dups; } return ret; } @@ -70,19 +74,39 @@ size_t ExampleExtractor::num_types() const { return count_types(coord_caches.size()); } -size_t ExampleExtractor::num_types(const ExampleRef& ref) const { +size_t ExampleExtractor::num_types(const ExampleRef &ref) const { return count_types(ref.files.size()); } std::vector ExampleExtractor::get_type_names() const { vector ret; - for(unsigned i = 0, n = coord_caches.size(); i < n; i++) { + for (unsigned i = 0, n = coord_caches.size(); i < n; i++) { vector names = coord_caches[i].get_type_names(); - for(auto name : names) { - ret.push_back(itoa(i)+"_"+name); + for (auto name : names) { + ret.push_back(itoa(i) + "_" + name); } } return ret; } +void ExampleExtractor::save_mem_caches(std::ostream &out) const { + for (const auto &c : coord_caches) { + c.save_mem_cache(out); + } +} + +void ExampleExtractor::load_mem_caches(std::istream &in) { + for (auto &c : coord_caches) { + c.load_mem_cache(in); + } +} + +size_t ExampleExtractor::mem_caches_size() const { + size_t num = 0; + for (const auto &c : coord_caches) { + num += c.mem_cache_size(); + } + return num; +} + } /* namespace libmolgrid */ diff --git a/src/string_cache.cpp b/src/string_cache.cpp new file mode 100644 index 0000000..007fefc --- /dev/null +++ b/src/string_cache.cpp @@ -0,0 +1,7 @@ +#include "libmolgrid/string_cache.h" + +namespace libmolgrid { + +StringCache string_cache; + +} \ No newline at end of file diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 414abb4..d426567 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -3,6 +3,7 @@ import numpy as np import os import torch +import tempfile from pytest import approx from numpy import around @@ -37,6 +38,28 @@ def test_mol_example_provider(capsys): assert (6.0000, 3.8697, -6.6990, -4.3010, -9.0000, 3.3747, 6.0000, 3.8697, -6.6990, -4.3010) == approx(tuple(l1)) +def test_mem_caches_example_provider(capsys): + fname = datadir + "/smallmol.types" + e = molgrid.ExampleProvider(data_root=datadir + "/structs") + e2 = molgrid.ExampleProvider(data_root=datadir + "/structs") + + e.populate(fname) + e2.populate(fname) + assert e.mem_caches_size() == 0 + with capsys.disabled(): # bunch openbabel garbage + ex = e.next() + assert e.mem_caches_size() == 2 + with tempfile.NamedTemporaryFile() as tmp: + e.save_mem_caches(tmp.name) + e.load_mem_caches(tmp.name) + e2.load_mem_caches(tmp.name) + assert e.mem_caches_size() == 2 + assert e2.mem_caches_size() == 2 + with capsys.disabled(): # bunch openbabel garbage + ex = e.next() + ex = e2.next() + assert e.mem_caches_size() == 4 + assert e2.mem_caches_size() == 2 def test_custom_typer_example_provider(): fname = datadir + "/small.types" From 4a0443edfd5681671c8ae9afed8b98b559309019 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Tue, 2 Jul 2024 15:23:42 -0400 Subject: [PATCH 08/35] no more forloops, more clean --- python/torch_bindings.py | 55 ++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/python/torch_bindings.py b/python/torch_bindings.py index 3632422..3ab2757 100644 --- a/python/torch_bindings.py +++ b/python/torch_bindings.py @@ -159,7 +159,8 @@ def extra_repr(self): class MolDataset(torch.utils.data.Dataset): '''A pytorch mappable dataset for molgrid training files.''' - def __init__(self, *args, **kwargs): + def __init__(self, *args, + **kwargs): '''Initialize mappable MolGridDataset. :param input(s): File name(s) of training example files :param typers: A tuple of AtomTypers to use @@ -174,8 +175,7 @@ def __init__(self, *args, **kwargs): ''' if 'typers' in kwargs: - typers = kwargs['typers'] - del kwargs['typers'] + typers = kwargs.pop('typers') self.examples = mg.ExampleDataset(*typers,**kwargs) self.typers = typers else: @@ -184,27 +184,26 @@ def __init__(self, *args, **kwargs): self.types_files = list(args) self.examples.populate(self.types_files) - self.num_labels = self.examples.num_labels() - - def __len__(self): return len(self.examples) def __getitem__(self, idx): ex = self.examples[idx] - center = torch.tensor([i for i in ex.coord_sets[-1].center()]) + center = torch.tensor(list(ex.coord_sets[-1].center())) coordinates = ex.merge_coordinates() if coordinates.has_vector_types() and coordinates.size() > 0: atomtypes = torch.tensor(coordinates.type_vector.tonumpy(),dtype=torch.long).type('torch.FloatTensor') else: atomtypes = torch.tensor(coordinates.type_index.tonumpy(),dtype=torch.long).type('torch.FloatTensor') coords = torch.tensor(coordinates.coords.tonumpy()) + length = len(coords) radii = torch.tensor(coordinates.radii.tonumpy()) - labels = [ex.labels[lab] for lab in range(self.num_labels)] - return center, coords, atomtypes, radii, labels + labels = torch.tensor(ex.labels) + return length, center, coords, atomtypes, radii, labels + def __getstate__(self): - settings = self.examples.settings() + settings = self.examples.settings() keyword_dict = {sett: getattr(settings, sett) for sett in dir(settings) if not sett.startswith('__')} if self.typers is not None: ## This will fail if self.typers is not none, need a way to pickle AtomTypers raise NotImplementedError('MolDataset does not support pickling when not using the default Gnina atom typers, this uses %s'.format(str(self.typers))) @@ -213,10 +212,8 @@ def __getstate__(self): def __setstate__(self,state): kwargs=state[0] - if 'typers' in kwargs: - typers = kwargs['typers'] - del kwargs['typers'] + typers = kwargs.pop('typers') self.examples = mg.ExampleDataset(*typers, **kwargs) self.typers = typers else: @@ -225,33 +222,19 @@ def __setstate__(self,state): self.types_files = list(state[1]) self.examples.populate(self.types_files) + self.num_labels = self.examples.num_labels() @staticmethod def collateMolDataset(batch): '''collate_fn for use in torch.utils.data.Dataloader when using the MolDataset. Returns lengths, centers, coords, types, radii, labels all padded to fit maximum size of batch''' - lens = [] - centers = [] - lcoords = [] - ltypes = [] - lradii = [] - labels = [] - for center,coords,types,radii,label in batch: - lens.append(coords.shape[0]) - centers.append(center) - lcoords.append(coords) - ltypes.append(types) - lradii.append(radii) - labels.append(torch.tensor(label)) - - - lengths = torch.tensor(lens) - lcoords = torch.nn.utils.rnn.pad_sequence(lcoords, batch_first=True) - ltypes = torch.nn.utils.rnn.pad_sequence(ltypes, batch_first=True) - lradii = torch.nn.utils.rnn.pad_sequence(lradii, batch_first=True) - - centers = torch.stack(centers,dim=0) - labels = torch.stack(labels,dim=0) + batch_list = list(zip(*batch)) + lengths = torch.tensor(batch_list[0]) + centers = torch.stack(batch_list[1], dim=0) + coords = torch.nn.utils.rnn.pad_sequence(batch_list[2], batch_first=True) + types = torch.nn.utils.rnn.pad_sequence(batch_list[3], batch_first=True) + radii = torch.nn.utils.rnn.pad_sequence(batch_list[4], batch_first=True) + labels = torch.stack(batch_list[5], dim=0) - return lengths, centers, lcoords, ltypes, lradii, labels + return lengths, centers, coords, types, radii, labels From 7a74f54a1cf92e11e4c8b34624a5594bee10b076 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Tue, 2 Jul 2024 15:24:58 -0400 Subject: [PATCH 09/35] can handle random rotations and translations of the dataset --- python/torch_bindings.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/python/torch_bindings.py b/python/torch_bindings.py index 3ab2757..3cb972f 100644 --- a/python/torch_bindings.py +++ b/python/torch_bindings.py @@ -160,6 +160,8 @@ def extra_repr(self): class MolDataset(torch.utils.data.Dataset): '''A pytorch mappable dataset for molgrid training files.''' def __init__(self, *args, + random_translation: float=0.0, + random_rotation: bool=False, **kwargs): '''Initialize mappable MolGridDataset. :param input(s): File name(s) of training example files @@ -174,6 +176,8 @@ def __init__(self, *args, :param ligmolcache: precalculated molcache2 file for ligand; if doesn't exist, will look in data_root ''' + self._random_translation, self._random_rotation = random_translation, random_rotation + print(self._random_translation, self._random_rotation) if 'typers' in kwargs: typers = kwargs.pop('typers') self.examples = mg.ExampleDataset(*typers,**kwargs) @@ -191,6 +195,8 @@ def __getitem__(self, idx): ex = self.examples[idx] center = torch.tensor(list(ex.coord_sets[-1].center())) coordinates = ex.merge_coordinates() + if self._random_translation > 0 or self._random_rotation: + mg.Transform(ex.coord_sets[-1].center(), self._random_translation, self._random_rotation).forward(coordinates, coordinates) if coordinates.has_vector_types() and coordinates.size() > 0: atomtypes = torch.tensor(coordinates.type_vector.tonumpy(),dtype=torch.long).type('torch.FloatTensor') else: @@ -208,10 +214,14 @@ def __getstate__(self): if self.typers is not None: ## This will fail if self.typers is not none, need a way to pickle AtomTypers raise NotImplementedError('MolDataset does not support pickling when not using the default Gnina atom typers, this uses %s'.format(str(self.typers))) keyword_dict['typers'] = self.typers + keyword_dict['random_translation'] = self._random_translation + keyword_dict['random_rotation'] = self._random_rotation return keyword_dict, self.types_files def __setstate__(self,state): kwargs=state[0] + self._random_translation = kwargs.pop('random_translation') + self._random_rotation = kwargs.pop('random_rotation') if 'typers' in kwargs: typers = kwargs.pop('typers') self.examples = mg.ExampleDataset(*typers, **kwargs) @@ -238,3 +248,32 @@ def collateMolDataset(batch): labels = torch.stack(batch_list[5], dim=0) return lengths, centers, coords, types, radii, labels + + # @staticmethod + # def collateMolDataset(batch): + # '''collate_fn for use in torch.utils.data.Dataloader when using the MolDataset. + # Returns lengths, centers, coords, types, radii, labels all padded to fit maximum size of batch''' + # lens = [] + # centers = [] + # lcoords = [] + # ltypes = [] + # lradii = [] + # labels = [] + # for center,coords,types,radii,label in batch: + # lens.append(coords.shape[0]) + # centers.append(center) + # lcoords.append(coords) + # ltypes.append(types) + # lradii.append(radii) + # labels.append(torch.tensor(label)) + + + # lengths = torch.tensor(lens) + # lcoords = torch.nn.utils.rnn.pad_sequence(lcoords, batch_first=True) + # ltypes = torch.nn.utils.rnn.pad_sequence(ltypes, batch_first=True) + # lradii = torch.nn.utils.rnn.pad_sequence(lradii, batch_first=True) + + # centers = torch.stack(centers,dim=0) + # labels = torch.stack(labels,dim=0) + + # return lengths, centers, lcoords, ltypes, lradii, labels From d1f61e47f73990911f987bab5a6d0911270b502f Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Tue, 2 Jul 2024 15:31:33 -0400 Subject: [PATCH 10/35] remove commented code --- python/torch_bindings.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/python/torch_bindings.py b/python/torch_bindings.py index 3cb972f..febad42 100644 --- a/python/torch_bindings.py +++ b/python/torch_bindings.py @@ -248,32 +248,3 @@ def collateMolDataset(batch): labels = torch.stack(batch_list[5], dim=0) return lengths, centers, coords, types, radii, labels - - # @staticmethod - # def collateMolDataset(batch): - # '''collate_fn for use in torch.utils.data.Dataloader when using the MolDataset. - # Returns lengths, centers, coords, types, radii, labels all padded to fit maximum size of batch''' - # lens = [] - # centers = [] - # lcoords = [] - # ltypes = [] - # lradii = [] - # labels = [] - # for center,coords,types,radii,label in batch: - # lens.append(coords.shape[0]) - # centers.append(center) - # lcoords.append(coords) - # ltypes.append(types) - # lradii.append(radii) - # labels.append(torch.tensor(label)) - - - # lengths = torch.tensor(lens) - # lcoords = torch.nn.utils.rnn.pad_sequence(lcoords, batch_first=True) - # ltypes = torch.nn.utils.rnn.pad_sequence(ltypes, batch_first=True) - # lradii = torch.nn.utils.rnn.pad_sequence(lradii, batch_first=True) - - # centers = torch.stack(centers,dim=0) - # labels = torch.stack(labels,dim=0) - - # return lengths, centers, lcoords, ltypes, lradii, labels From 8bc5ec1db4bbcab006ff5f7d1e21a354a26e6ab2 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Tue, 2 Jul 2024 15:46:17 -0400 Subject: [PATCH 11/35] length provided --- test/test_example_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index d426567..7918d3f 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -366,7 +366,7 @@ def test_pytorch_dataset(): ex = e.next() coordinates = ex.merge_coordinates() - center, coords, types, radii, labels = m[0] + lengths, center, coords, types, radii, labels = m[0] assert list(center.shape) == [3] np.testing.assert_allclose(coords, coordinates.coords.tonumpy()) From 0b10133b22b959cdda81349cf3da16a855fba0f0 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Tue, 2 Jul 2024 15:56:29 -0400 Subject: [PATCH 12/35] length provided --- test/test_example_provider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 7918d3f..2565ee2 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -378,7 +378,7 @@ def test_pytorch_dataset(): np.testing.assert_allclose(labels[1], 6.05) np.testing.assert_allclose(labels[-1], 0.162643) - center, coords, types, radii, labels = m[-1] + lengths, center, coords, types, radii, labels = m[-1] assert labels[0] == 0 np.testing.assert_allclose(labels[1], -10.3) @@ -396,7 +396,7 @@ def test_pytorch_dataset(): assert radii.shape[0] == 8 assert labels.shape[0] == 8 - mcenter, mcoords, mtypes, mradii, mlabels = m[10] + mlengths, mcenter, mcoords, mtypes, mradii, mlabels = m[10] np.testing.assert_allclose(center[2], mcenter) np.testing.assert_allclose(coords[2][:lengths[2]], mcoords) np.testing.assert_allclose(types[2][:lengths[2]], mtypes) From 5cba69aebc0411fc6ba32b72dc3e276ab9ffc4f6 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 14:23:30 -0400 Subject: [PATCH 13/35] MolDataset -> MolMapDataset and MolIterDataset Created a MolIterDataset which uses the ExampleProvider to provide a dataset with all the ExampleProviderSettings. MolIterDataset is designed to be used with torch.utils.data.Dataloader(batch_size=None) since the ExampleProvider already provides a batch. Similarly to MolMapDataset, it returns a tuple of: lengths, centers, coords, atom_types, radii, and labels. These can be passed directly to gridmaker.forward to voxelize the batch. Translation and rotation of the examples is handled by the Dataset. --- python/torch_bindings.py | 118 ++++++++++++++++++++++++++++++++-- test/test_example_provider.py | 69 +++++++++++++++++++- 2 files changed, 179 insertions(+), 8 deletions(-) diff --git a/python/torch_bindings.py b/python/torch_bindings.py index febad42..275c9c4 100644 --- a/python/torch_bindings.py +++ b/python/torch_bindings.py @@ -1,6 +1,8 @@ import torch import molgrid as mg import types +from itertools import islice + def tensor_as_grid(t): '''Return a Grid view of tensor t''' gname = 'Grid' @@ -157,7 +159,7 @@ def extra_repr(self): self.gmaker.get_resolution(), self.gmaker.get_dimension(), self.center[0], self.center[1], self.center[2]) -class MolDataset(torch.utils.data.Dataset): +class MolMapDataset(torch.utils.data.Dataset): '''A pytorch mappable dataset for molgrid training files.''' def __init__(self, *args, random_translation: float=0.0, @@ -177,7 +179,6 @@ def __init__(self, *args, ''' self._random_translation, self._random_rotation = random_translation, random_rotation - print(self._random_translation, self._random_rotation) if 'typers' in kwargs: typers = kwargs.pop('typers') self.examples = mg.ExampleDataset(*typers,**kwargs) @@ -212,7 +213,7 @@ def __getstate__(self): settings = self.examples.settings() keyword_dict = {sett: getattr(settings, sett) for sett in dir(settings) if not sett.startswith('__')} if self.typers is not None: ## This will fail if self.typers is not none, need a way to pickle AtomTypers - raise NotImplementedError('MolDataset does not support pickling when not using the default Gnina atom typers, this uses %s'.format(str(self.typers))) + raise NotImplementedError('MolMapDataset does not support pickling when not using the default Gnina atom typers, this uses %s'.format(str(self.typers))) keyword_dict['typers'] = self.typers keyword_dict['random_translation'] = self._random_translation keyword_dict['random_rotation'] = self._random_rotation @@ -233,11 +234,9 @@ def __setstate__(self,state): self.examples.populate(self.types_files) - self.num_labels = self.examples.num_labels() - @staticmethod def collateMolDataset(batch): - '''collate_fn for use in torch.utils.data.Dataloader when using the MolDataset. + '''collate_fn for use in torch.utils.data.Dataloader when using the MolMapDataset. Returns lengths, centers, coords, types, radii, labels all padded to fit maximum size of batch''' batch_list = list(zip(*batch)) lengths = torch.tensor(batch_list[0]) @@ -248,3 +247,110 @@ def collateMolDataset(batch): labels = torch.stack(batch_list[5], dim=0) return lengths, centers, coords, types, radii, labels + +class MolIterDataset(torch.utils.data.IterableDataset): + '''A pytorch iterable dataset for molgrid training files. Use with a DataLoader(batch_size=None) for best results.''' + def __init__(self, *args, + random_translation: float=0.0, + random_rotation: bool=False, + **kwargs): + '''Initialize mappable MolGridDataset. + :param input(s): File name(s) of training example files + :param typers: A tuple of AtomTypers to use + :type typers: tuple + :param cache_structs: retain coordinates in memory for faster training + :param add_hydrogens: protonate molecules read using openbabel + :param duplicate_first: clone the first coordinate set to be paired with each of the remaining (receptor-ligand pairs) + :param make_vector_types: convert index types into one-hot encoded vector types + :param data_root: prefix for data files + :param recmolcache: precalculated molcache2 file for receptor (first molecule); if doesn't exist, will look in data _root + :param ligmolcache: precalculated molcache2 file for ligand; if doesn't exist, will look in data_root + ''' + + # molgrid.set_random_seed(kwargs['random_seed']) + self._random_translation, self._random_rotation = random_translation, random_rotation + if 'typers' in kwargs: + typers = kwargs.pop('typers') + self.examples = mg.ExampleProvider(*typers,**kwargs) + self.typers = typers + else: + self.examples = mg.ExampleProvider(**kwargs) + self.typers = None + self.types_files = list(args) + self.examples.populate(self.types_files) + + self._num_labels = self.examples.num_labels() + + def generate(self): + for batch in self.examples: + yield self.batch_to_tensors(batch) + + def batch_to_tensors(self, batch): + batch_lengths = torch.zeros(len(batch), dtype=torch.int64) + batch_centers = torch.zeros((len(batch), 3), dtype=torch.float32) + batch_coords = [] + batch_atomtypes = [] + batch_radii = [] + batch_labels = torch.zeros((len(batch),self._num_labels), dtype=torch.float32) + for idx, ex in enumerate(batch): + length, center, coords, atomtypes, radii, labels = self.example_to_tensor(ex) + batch_lengths[idx] = length + batch_centers[idx,:] = center + batch_coords.append(coords) + batch_atomtypes.append(atomtypes) + batch_radii.append(radii) + batch_labels[idx,:] = labels + pad_coords = torch.nn.utils.rnn.pad_sequence(batch_coords, batch_first=True) + pad_atomtypes = torch.nn.utils.rnn.pad_sequence(batch_atomtypes, batch_first=True) + pad_radii = torch.nn.utils.rnn.pad_sequence(batch_radii, batch_first=True) + return batch_lengths, batch_centers, pad_coords, pad_atomtypes, pad_radii, batch_labels + + + def example_to_tensor(self, ex): + center = torch.tensor(list(ex.coord_sets[-1].center())) + coordinates = ex.merge_coordinates() + if self._random_translation > 0 or self._random_rotation: + mg.Transform(ex.coord_sets[-1].center(), self._random_translation, self._random_rotation).forward(coordinates, coordinates) + if coordinates.has_vector_types() and coordinates.size() > 0: + atomtypes = torch.tensor(coordinates.type_vector.tonumpy(),dtype=torch.long).type('torch.FloatTensor') + else: + atomtypes = torch.tensor(coordinates.type_index.tonumpy(),dtype=torch.long).type('torch.FloatTensor') + coords = torch.tensor(coordinates.coords.tonumpy()) + length = len(coords) + radii = torch.tensor(coordinates.radii.tonumpy()) + labels = torch.tensor(ex.labels) + return length, center, coords, atomtypes, radii, labels + + def __iter__(self): + worker_info = torch.utils.data.get_worker_info() + if worker_info is None: + return self.generate() + dataset = worker_info.dataset + worker_id = worker_info.id + n_workers = worker_info.num_workers + + return islice(self.generate(), worker_id, None, n_workers) + + def __getstate__(self): + settings = self.examples.settings() + keyword_dict = {sett: getattr(settings, sett) for sett in dir(settings) if not sett.startswith('__')} + if self.typers is not None: ## This will fail if self.typers is not none, need a way to pickle AtomTypers + raise NotImplementedError('MolIterDataset does not support pickling when not using the default Gnina atom typers, this uses %s'.format(str(self.typers))) + keyword_dict['typers'] = self.typers + keyword_dict['random_translation'] = self._random_translation + keyword_dict['random_rotation'] = self._random_rotation + return keyword_dict, self.types_files + + def __setstate__(self,state): + kwargs=state[0] + self._random_translation = kwargs.pop('random_translation') + self._random_rotation = kwargs.pop('random_rotation') + if 'typers' in kwargs: + typers = kwargs.pop('typers') + self.examples = mg.ExampleProvider(*typers, **kwargs) + self.typers = typers + else: + self.examples = mg.ExampleProvider(**kwargs) + self.typers = None + self.types_files = list(state[1]) + self.examples.populate(self.types_files) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 2565ee2..1b6adfc 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -354,12 +354,12 @@ def test_example_provider_iterator_interface(): break -def test_pytorch_dataset(): +def test_pytorch_mapdataset(): fname = datadir + "/small.types" e = molgrid.ExampleProvider(data_root=datadir + "/structs") e.populate(fname) - m = molgrid.MolDataset(fname, data_root=datadir + "/structs") + m = molgrid.MolMapDataset(fname, data_root=datadir + "/structs") assert len(m) == 1000 @@ -422,6 +422,71 @@ def test_pytorch_dataset(): singlegrid = molgrid.MGrid4f(*shape) gmaker.forward(ex, singlegrid.cpu()) np.testing.assert_allclose(mgrid[2].tonumpy(),singlegrid.tonumpy(),atol=1e-5) + +def test_pytorch_iterdataset(): + fname = datadir + "/small.types" + + BSIZE = 25 + e = molgrid.ExampleProvider(data_root=datadir + "/structs", default_batch_size=BSIZE) + e.populate(fname) + m = molgrid.MolIterDataset(fname, data_root=datadir + "/structs", default_batch_size=BSIZE) + m_iter = iter(m) + + ex = e.next() + coordinates = ex[0].merge_coordinates() + + lengths, centers, coords, types, radii, labels = next(m_iter) + + assert list(center.shape) == [3] + np.testing.assert_allclose(coords[0,:lengths[0],:], coordinates.coords.tonumpy()) + np.testing.assert_allclose(types[0,:lengths[0]], coordinates.type_index.tonumpy()) + np.testing.assert_allclose(radii[0,:lengths[0]], coordinates.radii.tonumpy()) + + assert len(labels) == BSIZE + assert len(labels[0]) == 3 + assert labels[0,0] == 1 + np.testing.assert_allclose(labels[0,1], 6.05) + np.testing.assert_allclose(labels[0,-1], 0.162643) + + # ensure it works with more than 1 worker + torch_loader = torch.utils.data.DataLoader( + m, batch_size=None, num_workers=2) + iterator = iter(torch_loader) + next(iterator) + lengths, center, coords, types, radii, labels = next(iterator) + assert len(lengths) == BSIZE + assert center.shape[0] == BSIZE + assert coords.shape[0] == BSIZE + assert types.shape[0] == BSIZE + assert radii.shape[0] == BSIZE + assert radii.shape[0] == BSIZE + assert labels.shape[0] == BSIZE + + ex = e.next() + coordinates = ex[2].merge_coordinates() + np.testing.assert_allclose(center[2], coordinates.coord_sets.center().tonumpy()) + np.testing.assert_allclose(coords[2,:lengths[2]], coordinates.coords.tonumpy()) + np.testing.assert_allclose(types[2,:lengths[2]], coordinates.type_index.tonumpy()) + np.testing.assert_allclose(radii[2,:lengths[2]], coordinates.radii.tonumpy()) + assert len(labels[2]) == e.num_labels() + assert labels[2,0] == ex[2].labels[0] + assert labels[2,1] == ex[2].labels[1] + + gmaker = molgrid.GridMaker() + shape = gmaker.grid_dimensions(e.num_types()) + mgrid = molgrid.MGrid5f(BSIZE,*shape) + + gmaker.forward(center, coords, types, radii, mgrid.cpu()) + + mgridg = molgrid.MGrid5f(BSIZE,*shape) + gmaker.forward(center.cuda(), coords.cuda(), types.cuda(), radii.cuda(), mgridg.gpu()) + + np.testing.assert_allclose(mgrid.tonumpy(),mgridg.tonumpy(),atol=1e-5) + + #compare against standard provider + egrid = molgrid.MGrid5f(BSIZE,*shape) + gmaker.forward(ex, egrid.cpu()) + np.testing.assert_allclose(mgridg.tonumpy(),egrid.tonumpy(),atol=1e-5) def test_duplicated_examples(): From d83b0c900d43f41ac5b706422a2c0e624e00bb73 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 15:23:28 -0400 Subject: [PATCH 14/35] test debug --- test/test_example_provider.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 1b6adfc..22d554d 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -384,7 +384,7 @@ def test_pytorch_mapdataset(): '''Testing out the collate_fn when used with torch.utils.data.DataLoader''' torch_loader = torch.utils.data.DataLoader( - m, batch_size=8, collate_fn=molgrid.MolDataset.collateMolDataset) + m, batch_size=8, collate_fn=molgrid.MolMapDataset.collateMolDataset) iterator = iter(torch_loader) next(iterator) lengths, center, coords, types, radii, labels = next(iterator) @@ -462,7 +462,9 @@ def test_pytorch_iterdataset(): assert radii.shape[0] == BSIZE assert labels.shape[0] == BSIZE - ex = e.next() + e.reset() + e.next_batch() + ex = e.next_batch() coordinates = ex[2].merge_coordinates() np.testing.assert_allclose(center[2], coordinates.coord_sets.center().tonumpy()) np.testing.assert_allclose(coords[2,:lengths[2]], coordinates.coords.tonumpy()) From f22428ced0e81b956a24fab67ffc0dc6524fc24f Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 15:33:53 -0400 Subject: [PATCH 15/35] test debug --- test/test_example_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 22d554d..c73d13e 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -433,7 +433,7 @@ def test_pytorch_iterdataset(): m_iter = iter(m) ex = e.next() - coordinates = ex[0].merge_coordinates() + coordinates = ex.merge_coordinates() lengths, centers, coords, types, radii, labels = next(m_iter) From 2412510e64720bf5204eace04c5863c6dda16fdc Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 16:25:42 -0400 Subject: [PATCH 16/35] test debug --- test/test_example_provider.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index c73d13e..8a8ac46 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -437,7 +437,7 @@ def test_pytorch_iterdataset(): lengths, centers, coords, types, radii, labels = next(m_iter) - assert list(center.shape) == [3] + assert list(centers.shape) == [BSIZE,3] np.testing.assert_allclose(coords[0,:lengths[0],:], coordinates.coords.tonumpy()) np.testing.assert_allclose(types[0,:lengths[0]], coordinates.type_index.tonumpy()) np.testing.assert_allclose(radii[0,:lengths[0]], coordinates.radii.tonumpy()) @@ -459,7 +459,6 @@ def test_pytorch_iterdataset(): assert coords.shape[0] == BSIZE assert types.shape[0] == BSIZE assert radii.shape[0] == BSIZE - assert radii.shape[0] == BSIZE assert labels.shape[0] == BSIZE e.reset() From d9c2e8a4ffd179fb522a6efd4c5644424a54a2bf Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 16:35:00 -0400 Subject: [PATCH 17/35] test debug --- test/test_example_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 8a8ac46..b6ea062 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -465,7 +465,7 @@ def test_pytorch_iterdataset(): e.next_batch() ex = e.next_batch() coordinates = ex[2].merge_coordinates() - np.testing.assert_allclose(center[2], coordinates.coord_sets.center().tonumpy()) + np.testing.assert_allclose(center[2], ex[2].coord_sets[-1].center().tonumpy()) np.testing.assert_allclose(coords[2,:lengths[2]], coordinates.coords.tonumpy()) np.testing.assert_allclose(types[2,:lengths[2]], coordinates.type_index.tonumpy()) np.testing.assert_allclose(radii[2,:lengths[2]], coordinates.radii.tonumpy()) From da2aa72ccdf1c40b6b86e13cb6ed81401bf67d1e Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 16:46:19 -0400 Subject: [PATCH 18/35] test debug --- test/test_example_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index b6ea062..929bef5 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -465,7 +465,7 @@ def test_pytorch_iterdataset(): e.next_batch() ex = e.next_batch() coordinates = ex[2].merge_coordinates() - np.testing.assert_allclose(center[2], ex[2].coord_sets[-1].center().tonumpy()) + np.testing.assert_allclose(center[2], np.array(list(ex[2].coord_sets[-1].center()))) np.testing.assert_allclose(coords[2,:lengths[2]], coordinates.coords.tonumpy()) np.testing.assert_allclose(types[2,:lengths[2]], coordinates.type_index.tonumpy()) np.testing.assert_allclose(radii[2,:lengths[2]], coordinates.radii.tonumpy()) From c3551457526907e68ae56f5012605611f9f0f194 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 8 Jul 2024 16:59:13 -0400 Subject: [PATCH 19/35] test debug --- test/test_example_provider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_example_provider.py b/test/test_example_provider.py index 929bef5..4f7fe02 100644 --- a/test/test_example_provider.py +++ b/test/test_example_provider.py @@ -449,6 +449,7 @@ def test_pytorch_iterdataset(): np.testing.assert_allclose(labels[0,-1], 0.162643) # ensure it works with more than 1 worker + m.examples.reset() torch_loader = torch.utils.data.DataLoader( m, batch_size=None, num_workers=2) iterator = iter(torch_loader) From 4674cfa0835ae777c0eb9a75562bfb16e28d1bbe Mon Sep 17 00:00:00 2001 From: David Koes Date: Fri, 12 Jul 2024 12:31:00 -0400 Subject: [PATCH 20/35] Rearrange thrust includes for CUDA 12.5 compat --- test/test_mgrid.cpp | 40 ---------------------------------------- test/test_mgrid.cu | 38 ++++++++++++++++++++++++++++++++++++++ test/test_transform.cpp | 2 -- 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/test/test_mgrid.cpp b/test/test_mgrid.cpp index 15a856c..014d892 100644 --- a/test/test_mgrid.cpp +++ b/test/test_mgrid.cpp @@ -7,9 +7,6 @@ #define BOOST_TEST_MODULE grid_test #include -#include -#include - #include "libmolgrid/managed_grid.h" using namespace libmolgrid; @@ -88,43 +85,6 @@ BOOST_AUTO_TEST_CASE( indirect_indexing ) } -BOOST_AUTO_TEST_CASE( grid_conversion ) -{ - MGrid3f g3(7,13,11); - MGrid1f g1(100); - - for(unsigned i = 0; i < 7; i++) - for(unsigned j = 0; j < 13; j++) - for(unsigned k = 0; k < 11; k++) { - g3[i][j][k] = i+j+k; - } - - for(unsigned i = 0; i < 100; i++) { - g1(i) = i; - } - - Grid3f cpu3(g3); - Grid1f cpu1 = g1.cpu(); - - float sum3 = thrust::reduce(thrust::host, cpu3.data(), cpu3.data()+cpu3.size()); - BOOST_CHECK_EQUAL(sum3,14014); - - float sum1 = thrust::reduce(thrust::host, cpu1.data(), cpu1.data()+cpu1.size()); - BOOST_CHECK_EQUAL(sum1,4950); - - MGrid6d g6(3,4,5,2,1,10); - g6[2][2][2][0][0][5] = 3.14; - Grid6d cpu6 = (Grid6d)g6; //cast conversion - BOOST_CHECK_EQUAL(cpu6.size(),1200); - BOOST_CHECK_EQUAL(cpu6(2,2,2,0,0,5), 3.14); - - Grid6dCUDA gpu6 = (Grid6dCUDA)g6; - double *cudaptr = gpu6.address(2,2,2,0,0,5); - double val = 0; - cudaMemcpy(&val, cudaptr, sizeof(double), cudaMemcpyDeviceToHost); - BOOST_CHECK_EQUAL(val, 3.14); - -} BOOST_AUTO_TEST_CASE( blank_mgrid ) diff --git a/test/test_mgrid.cu b/test/test_mgrid.cu index b4b1a42..71764c6 100644 --- a/test/test_mgrid.cu +++ b/test/test_mgrid.cu @@ -60,3 +60,41 @@ BOOST_AUTO_TEST_CASE( grid_conversion ) cudaError_t error = cudaGetLastError(); BOOST_CHECK_EQUAL(error,cudaSuccess); } + +BOOST_AUTO_TEST_CASE( grid_conversion2 ) +{ + MGrid3f g3(7,13,11); + MGrid1f g1(100); + + for(unsigned i = 0; i < 7; i++) + for(unsigned j = 0; j < 13; j++) + for(unsigned k = 0; k < 11; k++) { + g3[i][j][k] = i+j+k; + } + + for(unsigned i = 0; i < 100; i++) { + g1(i) = i; + } + + Grid3f cpu3(g3); + Grid1f cpu1 = g1.cpu(); + + float sum3 = thrust::reduce(thrust::host, cpu3.data(), cpu3.data()+cpu3.size()); + BOOST_CHECK_EQUAL(sum3,14014); + + float sum1 = thrust::reduce(thrust::host, cpu1.data(), cpu1.data()+cpu1.size()); + BOOST_CHECK_EQUAL(sum1,4950); + + MGrid6d g6(3,4,5,2,1,10); + g6[2][2][2][0][0][5] = 3.14; + Grid6d cpu6 = (Grid6d)g6; //cast conversion + BOOST_CHECK_EQUAL(cpu6.size(),1200); + BOOST_CHECK_EQUAL(cpu6(2,2,2,0,0,5), 3.14); + + Grid6dCUDA gpu6 = (Grid6dCUDA)g6; + double *cudaptr = gpu6.address(2,2,2,0,0,5); + double val = 0; + cudaMemcpy(&val, cudaptr, sizeof(double), cudaMemcpyDeviceToHost); + BOOST_CHECK_EQUAL(val, 3.14); + +} diff --git a/test/test_transform.cpp b/test/test_transform.cpp index 88976fd..bc63f64 100644 --- a/test/test_transform.cpp +++ b/test/test_transform.cpp @@ -8,8 +8,6 @@ #define BOOST_TEST_MODULE transform_test #include #include -#include -#include #include "libmolgrid/libmolgrid.h" #include "libmolgrid/transform.h" From 641021085100433b09a2e627cf69162ce674f41f Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 22 Jul 2024 15:51:48 -0400 Subject: [PATCH 21/35] add length and DDP support to MolIterDataset --- python/torch_bindings.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/python/torch_bindings.py b/python/torch_bindings.py index 275c9c4..d99ef1f 100644 --- a/python/torch_bindings.py +++ b/python/torch_bindings.py @@ -1,4 +1,7 @@ +import math import torch +from torch.distributed import get_rank, get_world_size +from torch.utils.data import DistributedSampler import molgrid as mg import types from itertools import islice @@ -323,13 +326,26 @@ def example_to_tensor(self, ex): def __iter__(self): worker_info = torch.utils.data.get_worker_info() - if worker_info is None: - return self.generate() - dataset = worker_info.dataset - worker_id = worker_info.id - n_workers = worker_info.num_workers + worker_id = worker_info.id if worker_info is not None else 0 + n_workers = worker_info.num_workers if worker_info is not None else 1 + + world_size = get_world_size() + if world_size == 1: + return islice(self.generate(), worker_id, None, n_workers) + process_rank = get_rank() + + return islice(self.generate(), process_rank * n_workers + worker_id, None, n_workers * world_size) + + def __len__(self): + settings = self.examples.settings() + batch_size = settings.default_batch_size + if settings.iteration_scheme == mg.IterationScheme.SmallEpoch: + return self.examples.small_epoch_size() // batch_size + elif settings.iteration_scheme == mg.IterationScheme.LargeEpoch: + return math.ceil(self.examples.large_epoch_size() / batch_size) + else: + NotImplementedError('Iteration scheme %s not supported'.format(itr_scheme)) - return islice(self.generate(), worker_id, None, n_workers) def __getstate__(self): settings = self.examples.settings() From 3ef67aeb063c9e153e207c8c145a8c6e333637a2 Mon Sep 17 00:00:00 2001 From: Drew McNutt Date: Mon, 22 Jul 2024 16:00:54 -0400 Subject: [PATCH 22/35] check if distributed used before world check --- python/torch_bindings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/torch_bindings.py b/python/torch_bindings.py index d99ef1f..fd60596 100644 --- a/python/torch_bindings.py +++ b/python/torch_bindings.py @@ -329,7 +329,7 @@ def __iter__(self): worker_id = worker_info.id if worker_info is not None else 0 n_workers = worker_info.num_workers if worker_info is not None else 1 - world_size = get_world_size() + world_size = get_world_size() if torch.distributed.is_initialized() else 1 if world_size == 1: return islice(self.generate(), worker_id, None, n_workers) process_rank = get_rank() From 025b7b2c4539492cc9df67647b4e2f39e1cc44e9 Mon Sep 17 00:00:00 2001 From: David Koes Date: Fri, 4 Oct 2024 10:35:07 -0400 Subject: [PATCH 23/35] Fix gcc13 compiling problems. --- .vscode/settings.json | 5 ++++- CMakeLists.txt | 1 + python/bindings.h | 1 + python/bindings.in.cpp | 8 ++++---- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 59f9872..03f2e1b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -66,7 +66,10 @@ "cinttypes": "cpp", "typeindex": "cpp", "typeinfo": "cpp", - "variant": "cpp" + "variant": "cpp", + "codecvt": "cpp", + "cfenv": "cpp", + "filesystem": "cpp" }, "C_Cpp.files.exclude": { "/usr/local/include/libmolgrid/**": true diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ac2211..c08fbb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES OFF) option(BUILD_SHARED "Build shared library" ON) option(BUILD_STATIC "Build static library" ON) diff --git a/python/bindings.h b/python/bindings.h index 6fe619a..2723d6d 100644 --- a/python/bindings.h +++ b/python/bindings.h @@ -26,6 +26,7 @@ #define TYPEARG(Z, N, T) BOOST_PP_COMMA_IF(N) T #define NTYPES(N, T) BOOST_PP_REPEAT(N, TYPEARG, T) + extern bool python_gpu_enabled; bool init_numpy(); diff --git a/python/bindings.in.cpp b/python/bindings.in.cpp index 00319b3..65158ff 100644 --- a/python/bindings.in.cpp +++ b/python/bindings.in.cpp @@ -190,7 +190,7 @@ std::vector< std::vector > listlist_to_vecvec(list l) { class PythonCallbackIndexTyper: public CallbackIndexTyper { boost::python::object callback; - + using CallbackIndexTyper::get_atom_type_index; public: /// iniitalize callbacktyper, if names are not provided, numerical names will be generated @@ -217,7 +217,7 @@ class PythonCallbackIndexTyper: public CallbackIndexTyper { class PythonCallbackVectorTyper: public CallbackVectorTyper { boost::python::object callback; - + using CallbackVectorTyper::get_atom_type_vector; public: /// iniitalize callbacktyper, if names are not provided, numerical names will be generated @@ -555,7 +555,7 @@ MAKE_ALL_GRIDS() init( (arg("func"), arg("num_types"), arg("names") = list() ) )) .def("num_types", &PythonCallbackIndexTyper::num_types) - .def("get_atom_type_index", &PythonCallbackIndexTyper::get_atom_type_index) + .def (PythonCallbackIndexTyper::*)(object a) const>("get_atom_type_index", &PythonCallbackIndexTyper::get_atom_type_index) .def("get_type_names",&PythonCallbackIndexTyper::get_type_names); implicitly_convertible, std::shared_ptr >(); @@ -574,7 +574,7 @@ MAKE_ALL_GRIDS() init( (arg("func"), arg("num_types"), arg("names") = list() ) )) .def("num_types", &PythonCallbackVectorTyper::num_types) - .def("get_atom_type_vector", &PythonCallbackVectorTyper::get_atom_type_vector) + .def< tuple (PythonCallbackVectorTyper::*)(object a) const >("get_atom_type_vector", &PythonCallbackVectorTyper::get_atom_type_vector) .def("get_type_names",&PythonCallbackVectorTyper::get_type_names); implicitly_convertible, std::shared_ptr >(); From 4d3936df6026f212dd7c8ccaf8224c866939fcbf Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Mon, 3 Jul 2023 10:32:00 +0800 Subject: [PATCH 24/35] fix: lib-only build --- CMakeLists.txt | 8 ++++---- default.nix | 44 ++++++++++++++++++++++++++++++++++++++++++++ flake.lock | 25 +++++++++++++++++++++++++ flake.nix | 9 +++++++++ result | 1 + 5 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 default.nix create mode 100644 flake.lock create mode 100644 flake.nix create mode 120000 result diff --git a/CMakeLists.txt b/CMakeLists.txt index c08fbb0..63d055c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,11 +83,11 @@ include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) include_directories(SYSTEM ${Boost_INCLUDE_DIR}) add_subdirectory(src) -add_subdirectory(python) +# add_subdirectory(python) add_subdirectory(docs) # enable testing -include(CTest) -enable_testing() +#include(CTest) +#enable_testing() # define tests -add_subdirectory(test) +#add_subdirectory(test) diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..33476d2 --- /dev/null +++ b/default.nix @@ -0,0 +1,44 @@ +{ lib, stdenv, fetchFromGitHub, cmake, cudatoolkit, openbabel, zlib, boost +, python310Packages, python310 }: + +stdenv.mkDerivation rec { + pname = "libmolgrid"; + version = "0.5.3"; + + src = ./.; + # fetchFromGitHub { + # owner = "gnina"; + # repo = "libmolgrid"; + # rev = "v${version}"; + # hash = "sha256-YdEjXfrTf9hw0nMbC2JWZ7Gf/psZ4RQ6v6GUrx5yIoA="; + # }; + + nativeBuildInputs = [ cmake ]; + + buildInputs = [ + cudatoolkit + openbabel + zlib + boost.dev + python310Packages.boost + python310Packages.pytest + python310Packages.numpy + python310Packages.pyquaternion + python310Packages.openbabel-bindings + python310 + ]; + + doCheck = false; + + OPENBABEL3_INCLUDE_DIR = "${openbabel}/include/openbabel3"; + + cmakeFlags = [ "-DOPENBABEL3_INCLUDE_DIR=${OPENBABEL3_INCLUDE_DIR}" ]; + + meta = with lib; { + description = + "Comprehensive library for fast, GPU accelerated molecular gridding for deep learning workflows"; + homepage = "https://github.com/gnina/libmolgrid"; + license = licenses.asl20; + maintainers = with maintainers; [ ]; + }; +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..699e544 --- /dev/null +++ b/flake.lock @@ -0,0 +1,25 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1687977148, + "narHash": "sha256-gUcXiU2GgjYIc65GOIemdBJZ+lkQxuyIh7OkR9j0gCo=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "60a783e00517fce85c42c8c53fe0ed05ded5b2a4", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..f8403a0 --- /dev/null +++ b/flake.nix @@ -0,0 +1,9 @@ +{ + description = "A very basic flake"; + + outputs = { self, nixpkgs }: { + + packages.x86_64-linux.default = nixpkgs.legacyPackages.x86_64-linux.callPackage (import ./default.nix) { }; + + }; +} diff --git a/result b/result new file mode 120000 index 0000000..a153142 --- /dev/null +++ b/result @@ -0,0 +1 @@ +/nix/store/jgz224k2yfw0rpv9p8ykswm7ks35vbir-libmolgrid-0.5.3 \ No newline at end of file From 7e1da1c2c3b9684e46a79bde1a268bca989b7193 Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Tue, 4 Jul 2023 16:54:45 +0800 Subject: [PATCH 25/35] fix: add the tests back in --- CMakeLists.txt | 6 +++--- default.nix | 31 ++++++++++++++++++++++++++----- flake.lock | 12 +++++++----- flake.nix | 1 + result | 2 +- 5 files changed, 38 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 63d055c..6891a5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,7 @@ add_subdirectory(src) add_subdirectory(docs) # enable testing -#include(CTest) -#enable_testing() +include(CTest) +enable_testing() # define tests -#add_subdirectory(test) +add_subdirectory(test) diff --git a/default.nix b/default.nix index 33476d2..1d504be 100644 --- a/default.nix +++ b/default.nix @@ -1,5 +1,19 @@ -{ lib, stdenv, fetchFromGitHub, cmake, cudatoolkit, openbabel, zlib, boost -, python310Packages, python310 }: +{ lib +, stdenv +, fetchFromGitHub +, cmake +, cudatoolkit +, openbabel +, zlib +, boost +, python310Packages +, python310 +, pkgconfig +, llvmPackages_15 +, libcxx +, icu72 +, gcc +}: stdenv.mkDerivation rec { pname = "libmolgrid"; @@ -13,11 +27,20 @@ stdenv.mkDerivation rec { # hash = "sha256-YdEjXfrTf9hw0nMbC2JWZ7Gf/psZ4RQ6v6GUrx5yIoA="; # }; - nativeBuildInputs = [ cmake ]; + makeFlags = [ "-stdlib=libstdc++" ]; + + nativeBuildInputs = [ + cmake + pkgconfig + libcxx + ]; buildInputs = [ + gcc cudatoolkit + libcxx openbabel + #stdenv.cc.cc.lib zlib boost.dev python310Packages.boost @@ -28,8 +51,6 @@ stdenv.mkDerivation rec { python310 ]; - doCheck = false; - OPENBABEL3_INCLUDE_DIR = "${openbabel}/include/openbabel3"; cmakeFlags = [ "-DOPENBABEL3_INCLUDE_DIR=${OPENBABEL3_INCLUDE_DIR}" ]; diff --git a/flake.lock b/flake.lock index 699e544..642bc22 100644 --- a/flake.lock +++ b/flake.lock @@ -2,16 +2,18 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1687977148, - "narHash": "sha256-gUcXiU2GgjYIc65GOIemdBJZ+lkQxuyIh7OkR9j0gCo=", + "lastModified": 1685566663, + "narHash": "sha256-btHN1czJ6rzteeCuE/PNrdssqYD2nIA4w48miQAFloM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "60a783e00517fce85c42c8c53fe0ed05ded5b2a4", + "rev": "4ecab3273592f27479a583fb6d975d4aba3486fe", "type": "github" }, "original": { - "id": "nixpkgs", - "type": "indirect" + "owner": "NixOS", + "ref": "23.05", + "repo": "nixpkgs", + "type": "github" } }, "root": { diff --git a/flake.nix b/flake.nix index f8403a0..91da284 100644 --- a/flake.nix +++ b/flake.nix @@ -1,5 +1,6 @@ { description = "A very basic flake"; + inputs.nixpkgs.url = "github:NixOS/nixpkgs/23.05"; outputs = { self, nixpkgs }: { diff --git a/result b/result index a153142..e57d00f 120000 --- a/result +++ b/result @@ -1 +1 @@ -/nix/store/jgz224k2yfw0rpv9p8ykswm7ks35vbir-libmolgrid-0.5.3 \ No newline at end of file +/nix/store/s7vph6rhb977b8zj7cf46k6zqlqh6jns-libmolgrid-0.5.3 \ No newline at end of file From 321914ad642d0d761a571bd45e980d576ab99b1e Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Tue, 4 Jul 2023 17:05:49 +0800 Subject: [PATCH 26/35] fix: libstdc++ --- default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/default.nix b/default.nix index 1d504be..4d3d816 100644 --- a/default.nix +++ b/default.nix @@ -27,7 +27,7 @@ stdenv.mkDerivation rec { # hash = "sha256-YdEjXfrTf9hw0nMbC2JWZ7Gf/psZ4RQ6v6GUrx5yIoA="; # }; - makeFlags = [ "-stdlib=libstdc++" ]; + buildFlags = [ "-stdlib=libstdc++" ]; nativeBuildInputs = [ cmake From b3e45eac3ccbef90884dfd84c1a42a5c1d0caafd Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Wed, 5 Jul 2023 12:16:48 +0800 Subject: [PATCH 27/35] feat: add pkgconfig --- CMakeLists.txt | 9 ++++++--- libmolgrid.pc.in | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 libmolgrid.pc.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 6891a5b..415b870 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,9 +33,9 @@ include(cmake/safeguards.cmake) # place binaries and libraries according to GNU standards include(GNUInstallDirs) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}) +# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) +# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) +# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}) #dependencies find_package(CUDA REQUIRED) @@ -91,3 +91,6 @@ include(CTest) enable_testing() # define tests add_subdirectory(test) + +configure_file(libmolgrid.pc.in ${CMAKE_INSTALL_LIBDIR}/pkgconfig/libmolgrid.pc @ONLY) +#install(FILES ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/pkgconfig/libmolgrid.pc DESTINATION lib/pkgconfig) diff --git a/libmolgrid.pc.in b/libmolgrid.pc.in new file mode 100644 index 0000000..21c4552 --- /dev/null +++ b/libmolgrid.pc.in @@ -0,0 +1,21 @@ +# this template is filled-in by CMake `configure_file(... @ONLY)` +# the `@....@` are filled in by CMake configure_file(), +# from variables set in your CMakeLists.txt or by CMake itself +# +# Good tutoral for understanding .pc files: +# https://people.freedesktop.org/~dbn/pkg-config-guide.html + +prefix="@CMAKE_INSTALL_PREFIX@" +exec_prefix="${prefix}" +libdir="${prefix}/lib" +includedir="${prefix}/include" + +Name: @PROJECT_NAME@ +Description: @CMAKE_PROJECT_DESCRIPTION@ +URL: @CMAKE_PROJECT_HOMEPAGE_URL@ +Version: @PROJECT_VERSION@ +Requires: @pc_req_public@ +Requires.private: @pc_req_private@ +Cflags: -I"${includedir}" +Libs: -L"${libdir}" -l@target1@ -l@target2@ +Libs.private: -L"${libdir}" -l@target1@ -l@target2@ @pc_libs_private@ From c175f02cbf12c81cec52f589a52f9f6674c77e19 Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Thu, 6 Jul 2023 13:35:16 +0800 Subject: [PATCH 28/35] fix: don't build tests --- CMakeLists.txt | 13 +++++++------ default.nix | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 415b870..b271064 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,16 +13,17 @@ endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES OFF) +include(GNUInstallDirs) option(BUILD_SHARED "Build shared library" ON) option(BUILD_STATIC "Build static library" ON) option(BUILD_COVERAGE "Build with code coverage" OFF) -if(${BUILD_CONTAINED_PYTHON}) - #if make a mostly statically linked python module, use static boost - set(BUILD_STATIC ON) - set(Boost_USE_STATIC_LIBS ON) -endif() +#if(${BUILD_CONTAINED_PYTHON}) +# #if make a mostly statically linked python module, use static boost +# set(BUILD_STATIC ON) +# set(Boost_USE_STATIC_LIBS ON) +#endif() list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${PROJECT_SOURCE_DIR}/cmake") # get git hash @@ -90,7 +91,7 @@ add_subdirectory(docs) include(CTest) enable_testing() # define tests -add_subdirectory(test) +#add_subdirectory(test) configure_file(libmolgrid.pc.in ${CMAKE_INSTALL_LIBDIR}/pkgconfig/libmolgrid.pc @ONLY) #install(FILES ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/pkgconfig/libmolgrid.pc DESTINATION lib/pkgconfig) diff --git a/default.nix b/default.nix index 4d3d816..6d8b02b 100644 --- a/default.nix +++ b/default.nix @@ -27,7 +27,7 @@ stdenv.mkDerivation rec { # hash = "sha256-YdEjXfrTf9hw0nMbC2JWZ7Gf/psZ4RQ6v6GUrx5yIoA="; # }; - buildFlags = [ "-stdlib=libstdc++" ]; + #buildFlags = [ "-stdlib=libstdc++" ]; nativeBuildInputs = [ cmake From c7e7875db0da05fcdc82bf9c8fee3d90c9092360 Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Thu, 13 Jul 2023 15:11:47 +0800 Subject: [PATCH 29/35] fix: compile without impure --- CMakeLists.txt | 2 +- default.nix | 12 +++--------- flake.lock | 32 +++++++++++++++++++++++++++----- flake.nix | 7 +++++-- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b271064..7b55ef5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,7 @@ add_subdirectory(docs) include(CTest) enable_testing() # define tests -#add_subdirectory(test) +add_subdirectory(test) configure_file(libmolgrid.pc.in ${CMAKE_INSTALL_LIBDIR}/pkgconfig/libmolgrid.pc @ONLY) #install(FILES ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/pkgconfig/libmolgrid.pc DESTINATION lib/pkgconfig) diff --git a/default.nix b/default.nix index 6d8b02b..b06b223 100644 --- a/default.nix +++ b/default.nix @@ -8,11 +8,7 @@ , boost , python310Packages , python310 -, pkgconfig -, llvmPackages_15 -, libcxx -, icu72 -, gcc +, pkg-config }: stdenv.mkDerivation rec { @@ -31,14 +27,12 @@ stdenv.mkDerivation rec { nativeBuildInputs = [ cmake - pkgconfig - libcxx + pkg-config ]; buildInputs = [ - gcc + #gcc cudatoolkit - libcxx openbabel #stdenv.cc.cc.lib zlib diff --git a/flake.lock b/flake.lock index 642bc22..e875405 100644 --- a/flake.lock +++ b/flake.lock @@ -1,24 +1,46 @@ { "nodes": { "nixpkgs": { + "inputs": { + "nixpkgs": [ + "nixpkgs-base" + ] + }, + "locked": { + "lastModified": 1673400599, + "narHash": "sha256-EnDVDsQ/uoCdO1UMPTSHelJLjJTR794yp370a89n3y4=", + "owner": "numtide", + "repo": "nixpkgs-unfree", + "rev": "7331a9526557393edc2ff86d04ecd74b107f1b81", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "nixpkgs-unfree", + "rev": "7331a9526557393edc2ff86d04ecd74b107f1b81", + "type": "github" + } + }, + "nixpkgs-base": { "locked": { - "lastModified": 1685566663, - "narHash": "sha256-btHN1czJ6rzteeCuE/PNrdssqYD2nIA4w48miQAFloM=", + "lastModified": 1669833724, + "narHash": "sha256-/HEZNyGbnQecrgJnfE8d0WC5c1xuPSD2LUpB6YXlg4c=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "4ecab3273592f27479a583fb6d975d4aba3486fe", + "rev": "4d2b37a84fad1091b9de401eb450aae66f1a741e", "type": "github" }, "original": { "owner": "NixOS", - "ref": "23.05", + "ref": "22.11", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { - "nixpkgs": "nixpkgs" + "nixpkgs": "nixpkgs", + "nixpkgs-base": "nixpkgs-base" } } }, diff --git a/flake.nix b/flake.nix index 91da284..9f7c1cc 100644 --- a/flake.nix +++ b/flake.nix @@ -1,8 +1,11 @@ { description = "A very basic flake"; - inputs.nixpkgs.url = "github:NixOS/nixpkgs/23.05"; + #inputs.nixpkgs.url = "github:NixOS/nixpkgs/22.11"; + inputs.nixpkgs-base.url = "github:NixOS/nixpkgs/22.11"; + inputs.nixpkgs.url = "github:numtide/nixpkgs-unfree/7331a9526557393edc2ff86d04ecd74b107f1b81"; + inputs.nixpkgs.inputs.nixpkgs.follows = "nixpkgs-base"; - outputs = { self, nixpkgs }: { + outputs = { self, nixpkgs, nixpkgs-base }: { packages.x86_64-linux.default = nixpkgs.legacyPackages.x86_64-linux.callPackage (import ./default.nix) { }; From e14a400f5c5f12d5a26ec78fa1d409bc31daf801 Mon Sep 17 00:00:00 2001 From: Ryan Swart Date: Thu, 13 Jul 2023 15:12:38 +0800 Subject: [PATCH 30/35] fix: remove result file --- result | 1 - 1 file changed, 1 deletion(-) delete mode 120000 result diff --git a/result b/result deleted file mode 120000 index e57d00f..0000000 --- a/result +++ /dev/null @@ -1 +0,0 @@ -/nix/store/s7vph6rhb977b8zj7cf46k6zqlqh6jns-libmolgrid-0.5.3 \ No newline at end of file From a576b47e2ccbfb889dfa22926d9b097465b7695b Mon Sep 17 00:00:00 2001 From: Sean L Date: Fri, 13 Jun 2025 00:53:16 +0000 Subject: [PATCH 31/35] feat: update to nixos 25.05 & rebase onto tip of upstream --- CMakeLists.txt | 4 ++-- default.nix | 31 ++++++++++++++++++------------- flake.lock | 32 +++++--------------------------- flake.nix | 23 +++++++++++++++-------- 4 files changed, 40 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b55ef5..aecd790 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,7 +56,7 @@ set(CMAKE_CXX_STANDARD 14) set(CMAKE_CUDA_STANDARD 14) if(CMAKE_CXX_COMPILER_ID MATCHES GNU) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas -Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas -Werror -Wno-error=template-id-cdtor") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") if (BUILD_COVERAGE) set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -fprofile-arcs -ftest-coverage") @@ -69,7 +69,7 @@ if(CUDA_VERSION_MAJOR LESS 11) #compile for all major architectures >= 35 set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70 75) else() - set(CMAKE_CUDA_ARCHITECTURES 52 60 70 75 80) + set(CMAKE_CUDA_ARCHITECTURES 70 80 86) endif() set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -g -lineinfo") diff --git a/default.nix b/default.nix index b06b223..9333d82 100644 --- a/default.nix +++ b/default.nix @@ -2,20 +2,20 @@ , stdenv , fetchFromGitHub , cmake -, cudatoolkit +, cudaPackages , openbabel , zlib , boost -, python310Packages -, python310 +, python3 +, python3Packages , pkg-config }: stdenv.mkDerivation rec { pname = "libmolgrid"; - version = "0.5.3"; - + version = "master"; src = ./.; + # fetchFromGitHub { # owner = "gnina"; # repo = "libmolgrid"; @@ -32,22 +32,27 @@ stdenv.mkDerivation rec { buildInputs = [ #gcc - cudatoolkit + cudaPackages.cuda_cccl + cudaPackages.cuda_cudart + cudaPackages.cuda_nvcc openbabel #stdenv.cc.cc.lib zlib boost.dev - python310Packages.boost - python310Packages.pytest - python310Packages.numpy - python310Packages.pyquaternion - python310Packages.openbabel-bindings - python310 + python3 + python3Packages.boost + python3Packages.numpy + python3Packages.openbabel-bindings + python3Packages.pyquaternion + python3Packages.pytest ]; OPENBABEL3_INCLUDE_DIR = "${openbabel}/include/openbabel3"; - cmakeFlags = [ "-DOPENBABEL3_INCLUDE_DIR=${OPENBABEL3_INCLUDE_DIR}" ]; + cmakeFlags = [ + "-DOPENBABEL3_INCLUDE_DIR=${OPENBABEL3_INCLUDE_DIR}" + "-DCMAKE_CUDA_ARCHITECTURES=70;80;86" + ]; meta = with lib; { description = diff --git a/flake.lock b/flake.lock index e875405..e23651e 100644 --- a/flake.lock +++ b/flake.lock @@ -1,46 +1,24 @@ { "nodes": { "nixpkgs": { - "inputs": { - "nixpkgs": [ - "nixpkgs-base" - ] - }, - "locked": { - "lastModified": 1673400599, - "narHash": "sha256-EnDVDsQ/uoCdO1UMPTSHelJLjJTR794yp370a89n3y4=", - "owner": "numtide", - "repo": "nixpkgs-unfree", - "rev": "7331a9526557393edc2ff86d04ecd74b107f1b81", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "nixpkgs-unfree", - "rev": "7331a9526557393edc2ff86d04ecd74b107f1b81", - "type": "github" - } - }, - "nixpkgs-base": { "locked": { - "lastModified": 1669833724, - "narHash": "sha256-/HEZNyGbnQecrgJnfE8d0WC5c1xuPSD2LUpB6YXlg4c=", + "lastModified": 1749727998, + "narHash": "sha256-mHv/yeUbmL91/TvV95p+mBVahm9mdQMJoqaTVTALaFw=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "4d2b37a84fad1091b9de401eb450aae66f1a741e", + "rev": "fd487183437963a59ba763c0cc4f27e3447dd6dd", "type": "github" }, "original": { "owner": "NixOS", - "ref": "22.11", + "ref": "nixos-25.05", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { - "nixpkgs": "nixpkgs", - "nixpkgs-base": "nixpkgs-base" + "nixpkgs": "nixpkgs" } } }, diff --git a/flake.nix b/flake.nix index 9f7c1cc..c9111b1 100644 --- a/flake.nix +++ b/flake.nix @@ -1,13 +1,20 @@ { - description = "A very basic flake"; - #inputs.nixpkgs.url = "github:NixOS/nixpkgs/22.11"; - inputs.nixpkgs-base.url = "github:NixOS/nixpkgs/22.11"; - inputs.nixpkgs.url = "github:numtide/nixpkgs-unfree/7331a9526557393edc2ff86d04ecd74b107f1b81"; - inputs.nixpkgs.inputs.nixpkgs.follows = "nixpkgs-base"; + description = "A flake for libmolgrid."; - outputs = { self, nixpkgs, nixpkgs-base }: { - - packages.x86_64-linux.default = nixpkgs.legacyPackages.x86_64-linux.callPackage (import ./default.nix) { }; + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + }; + outputs = { self, nixpkgs }: + let + pkgs = import nixpkgs { + system = "x86_64-linux"; + config.allowUnfree = true; + config.cudaSupport = true; + config.cudaCapabilities = ["7.0" "8.0" "8.6"]; + }; + in + { + packages.x86_64-linux.default = pkgs.callPackage (import ./default.nix) { }; }; } From 8c5c5f154d171d6b6b6292379fa82093edcb1b4b Mon Sep 17 00:00:00 2001 From: Sean L Date: Fri, 13 Jun 2025 01:18:33 -0500 Subject: [PATCH 32/35] chore: format and clean up flake.nix & default.nix --- default.nix | 44 ++++++++++++++++---------------------------- flake.nix | 28 +++++++++++++++++----------- 2 files changed, 33 insertions(+), 39 deletions(-) diff --git a/default.nix b/default.nix index 9333d82..cce0eef 100644 --- a/default.nix +++ b/default.nix @@ -1,14 +1,14 @@ -{ lib -, stdenv -, fetchFromGitHub -, cmake -, cudaPackages -, openbabel -, zlib -, boost -, python3 -, python3Packages -, pkg-config +{ + lib, + stdenv, + boost, + cmake, + cudaPackages, + openbabel, + pkg-config, + python3, + python3Packages, + zlib, }: stdenv.mkDerivation rec { @@ -16,35 +16,24 @@ stdenv.mkDerivation rec { version = "master"; src = ./.; - # fetchFromGitHub { - # owner = "gnina"; - # repo = "libmolgrid"; - # rev = "v${version}"; - # hash = "sha256-YdEjXfrTf9hw0nMbC2JWZ7Gf/psZ4RQ6v6GUrx5yIoA="; - # }; - - #buildFlags = [ "-stdlib=libstdc++" ]; - nativeBuildInputs = [ cmake pkg-config ]; buildInputs = [ - #gcc + boost.dev cudaPackages.cuda_cccl cudaPackages.cuda_cudart cudaPackages.cuda_nvcc - openbabel - #stdenv.cc.cc.lib - zlib - boost.dev python3 python3Packages.boost python3Packages.numpy python3Packages.openbabel-bindings python3Packages.pyquaternion python3Packages.pytest + openbabel + zlib ]; OPENBABEL3_INCLUDE_DIR = "${openbabel}/include/openbabel3"; @@ -55,10 +44,9 @@ stdenv.mkDerivation rec { ]; meta = with lib; { - description = - "Comprehensive library for fast, GPU accelerated molecular gridding for deep learning workflows"; + description = "Comprehensive library for fast, GPU accelerated molecular gridding for deep learning workflows"; homepage = "https://github.com/gnina/libmolgrid"; license = licenses.asl20; - maintainers = with maintainers; [ ]; + maintainers = [ ]; }; } diff --git a/flake.nix b/flake.nix index c9111b1..cdc2424 100644 --- a/flake.nix +++ b/flake.nix @@ -5,16 +5,22 @@ nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; }; - outputs = { self, nixpkgs }: - let - pkgs = import nixpkgs { - system = "x86_64-linux"; - config.allowUnfree = true; - config.cudaSupport = true; - config.cudaCapabilities = ["7.0" "8.0" "8.6"]; + outputs = + { self, nixpkgs }: + let + pkgs = import nixpkgs { + system = "x86_64-linux"; + config.allowUnfree = true; + config.cudaSupport = true; + config.cudaCapabilities = [ + "7.0" + "8.0" + "8.6" + ]; + }; + in + { + packages.x86_64-linux.libmolgrid = pkgs.callPackage (import ./default.nix) { }; + packages.x86_64-linux.default = self.packages.x86_64-linux.libmolgrid; }; - in - { - packages.x86_64-linux.default = pkgs.callPackage (import ./default.nix) { }; - }; } From 7f52463cafd17849b21786ce30aab1cde791d3ed Mon Sep 17 00:00:00 2001 From: Sean L Date: Fri, 3 Oct 2025 12:44:04 -0500 Subject: [PATCH 33/35] fix: for build with cudaPackages_12_4 --- CMakeLists.txt | 18 +++++++++--------- default.nix | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aecd790..53c7e9a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,20 +56,20 @@ set(CMAKE_CXX_STANDARD 14) set(CMAKE_CUDA_STANDARD 14) if(CMAKE_CXX_COMPILER_ID MATCHES GNU) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas -Werror -Wno-error=template-id-cdtor") - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") - if (BUILD_COVERAGE) - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -fprofile-arcs -ftest-coverage") - else() - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") - endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas -Werror") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") + if (BUILD_COVERAGE) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -fprofile-arcs -ftest-coverage") + else() + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") + endif() endif() if(CUDA_VERSION_MAJOR LESS 11) #compile for all major architectures >= 35 - set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70 75) + set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70 75) else() - set(CMAKE_CUDA_ARCHITECTURES 70 80 86) + set(CMAKE_CUDA_ARCHITECTURES 70 80 86) endif() set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -g -lineinfo") diff --git a/default.nix b/default.nix index cce0eef..54f23f5 100644 --- a/default.nix +++ b/default.nix @@ -1,13 +1,13 @@ { lib, stdenv, - boost, cmake, - cudaPackages, - openbabel, pkg-config, + boost, + cudaPackages, python3, python3Packages, + openbabel, zlib, }: From ca269440ae523ddd544cc9931489ac99f9dd043a Mon Sep 17 00:00:00 2001 From: Sean L Date: Fri, 3 Oct 2025 13:04:30 -0500 Subject: [PATCH 34/35] chore: use cuda 12.4, correct stdenv, and pinned nixpkgs --- default.nix | 17 ++++++----------- flake.lock | 8 ++++---- flake.nix | 14 ++++++++++---- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/default.nix b/default.nix index 54f23f5..0c47087 100644 --- a/default.nix +++ b/default.nix @@ -1,46 +1,41 @@ { lib, - stdenv, cmake, - pkg-config, boost, - cudaPackages, python3, python3Packages, + cudaPackages, openbabel, zlib, }: -stdenv.mkDerivation rec { +cudaPackages.backendStdenv.mkDerivation { pname = "libmolgrid"; version = "master"; src = ./.; nativeBuildInputs = [ cmake - pkg-config + cudaPackages.cuda_nvcc ]; buildInputs = [ boost.dev - cudaPackages.cuda_cccl - cudaPackages.cuda_cudart - cudaPackages.cuda_nvcc python3 python3Packages.boost python3Packages.numpy python3Packages.openbabel-bindings python3Packages.pyquaternion python3Packages.pytest + cudaPackages.cuda_cudart + cudaPackages.cuda_cccl openbabel zlib ]; - OPENBABEL3_INCLUDE_DIR = "${openbabel}/include/openbabel3"; - cmakeFlags = [ - "-DOPENBABEL3_INCLUDE_DIR=${OPENBABEL3_INCLUDE_DIR}" "-DCMAKE_CUDA_ARCHITECTURES=70;80;86" + "-DOPENBABEL3_INCLUDE_DIR=${openbabel}/include/openbabel3" ]; meta = with lib; { diff --git a/flake.lock b/flake.lock index e23651e..d35a24c 100644 --- a/flake.lock +++ b/flake.lock @@ -2,17 +2,17 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1749727998, - "narHash": "sha256-mHv/yeUbmL91/TvV95p+mBVahm9mdQMJoqaTVTALaFw=", + "lastModified": 1752620740, + "narHash": "sha256-f3pO+9lg66mV7IMmmIqG4PL3223TYMlnlw+pnpelbss=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "fd487183437963a59ba763c0cc4f27e3447dd6dd", + "rev": "32a4e87942101f1c9f9865e04dc3ddb175f5f32e", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-25.05", "repo": "nixpkgs", + "rev": "32a4e87942101f1c9f9865e04dc3ddb175f5f32e", "type": "github" } }, diff --git a/flake.nix b/flake.nix index cdc2424..b2979b5 100644 --- a/flake.nix +++ b/flake.nix @@ -2,14 +2,15 @@ description = "A flake for libmolgrid."; inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + nixpkgs.url = "github:NixOS/nixpkgs/32a4e87942101f1c9f9865e04dc3ddb175f5f32e"; }; outputs = { self, nixpkgs }: let + system = "x86_64-linux"; pkgs = import nixpkgs { - system = "x86_64-linux"; + inherit system; config.allowUnfree = true; config.cudaSupport = true; config.cudaCapabilities = [ @@ -17,10 +18,15 @@ "8.0" "8.6" ]; + cudaForwardCompat = false; }; in { - packages.x86_64-linux.libmolgrid = pkgs.callPackage (import ./default.nix) { }; - packages.x86_64-linux.default = self.packages.x86_64-linux.libmolgrid; + packages.${system} = { + libmolgrid = pkgs.callPackage (import ./default.nix) { + cudaPackages = pkgs.cudaPackages_12_4; + }; + default = self.packages.${system}.libmolgrid; + }; }; } From 4d2d68301e9d6bca6099ba4c41babab6cf992d44 Mon Sep 17 00:00:00 2001 From: Sean L Date: Fri, 3 Oct 2025 14:39:17 -0500 Subject: [PATCH 35/35] feat: provide both regular and bullet builds --- CMakeLists.txt | 6 +++++- flake.nix | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 53c7e9a..903503b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,6 @@ include(GNUInstallDirs) # set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}) #dependencies -find_package(CUDA REQUIRED) find_package(Boost REQUIRED COMPONENTS regex unit_test_framework program_options system filesystem iostreams serialization) find_package(OpenBabel3 REQUIRED) include_directories(SYSTEM ${OPENBABEL3_INCLUDE_DIR}) @@ -57,6 +56,11 @@ set(CMAKE_CUDA_STANDARD 14) if(CMAKE_CXX_COMPILER_ID MATCHES GNU) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas -Werror") + include(CheckCCompilerFlag) + check_c_compiler_flag(-Wno-error=template-id-cdtor HAS_TEMPLATE_ID_CDTOR) + if (HAS_TEMPLATE_ID_CDTOR) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=template-id-cdtor") + endif() set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") if (BUILD_COVERAGE) set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -fprofile-arcs -ftest-coverage") diff --git a/flake.nix b/flake.nix index b2979b5..c0e2679 100644 --- a/flake.nix +++ b/flake.nix @@ -9,23 +9,37 @@ { self, nixpkgs }: let system = "x86_64-linux"; - pkgs = import nixpkgs { - inherit system; - config.allowUnfree = true; - config.cudaSupport = true; - config.cudaCapabilities = [ - "7.0" - "8.0" - "8.6" - ]; - cudaForwardCompat = false; - }; in { packages.${system} = { - libmolgrid = pkgs.callPackage (import ./default.nix) { - cudaPackages = pkgs.cudaPackages_12_4; - }; + libmolgrid = + let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + config.cudaSupport = true; + }; + in + pkgs.callPackage ./default.nix { }; + libmolgrid_bullet = + let + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + overlays = [ + (final: prev: { cudaPackages = prev.cudaPackages_12_4; }) + ]; + config.allowUnfree = true; + config.cudaSupport = true; + config.cudaCapabilities = [ + "7.0" + "8.0" + "8.6" + ]; + cudaForwardCompat = false; + }; + in + pkgs.callPackage ./default.nix { }; default = self.packages.${system}.libmolgrid; }; };