Bio-Acc · Liaukx · Mar 6, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,9 +3,22 @@ cmake_minimum_required(VERSION 3.5)
 set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda)
 project(BLASTP LANGUAGES CXX C CUDA)
 
-SET(CMAKE_BUILD_TYPE "Release")
+# SET(CMAKE_BUILD_TYPE "Release")
+SET(CMAKE_BUILD_TYPE DEBUG)
+
+option(GLF_GPU_SW "Use GLF-GPU Smith-waterman" ON)
+if(GLF_GPU_SW)
+    add_definitions(-DGLF_GPU_SW)
+endif()
+
+option(USE_GPU_SW "Use GPU Smith-waterman" OFF)
+if(USE_GPU_SW)
+    add_definitions(-DUSE_GPU_SW)
+endif()
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread")
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17 -Xcompiler -pthread")
+
 if (NOT ("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8"))
     message(SEND_ERROR "require 64 bit system")
 endif()
@@ -50,14 +63,24 @@ target_compile_options (util
 		${OpenMP_CXX_FLAGS}
 )
 
+# gpu_sw
+add_subdirectory(./gpu-sw)
+
 add_executable(query src/main.cpp ${SOURCES_SEARCH})
-target_link_libraries(query util)
+
+# find_library(GPU_SW_LIBRARY
+#     PATHS ${CMAKE_BINARY_DIR}/gpu-sw
+# )
+# target_link_libraries(query PRIVATE util ${GPU_SW_LIBRARY})
+target_link_libraries(query PRIVATE util sw-lib)
+# target_link_libraries(query util)
 
 add_executable(createDB src/createDB.cpp)
 
 target_include_directories(query
     PRIVATE 
         ${PROJECT_SOURCE_DIR}/include
+        ${PROJECT_SOURCE_DIR}/gpu-sw
 )
 
 target_include_directories(createDB

diff --git a/gpu-sw/CMakeLists.txt b/gpu-sw/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_library(sw-lib STATIC)
+set_property(TARGET sw-lib PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}")
+target_sources(sw-lib PRIVATE
+	"sw.cu"
+	"cuda-utils.cc"
+	"mats.cc"
+	"seqs.cc"
+	"sw-lib.cc"
+	)
+target_link_libraries(sw-lib PUBLIC "${CUDA_LIBRARIES}")
+target_include_directories(sw-lib PUBLIC "${CUDA_INCLUDE_DIRS}" ".")
diff --git a/gpu-sw/core.hh b/gpu-sw/core.hh
@@ -0,0 +1,165 @@
+#ifndef __ECCL_CORE_HH__
+#define __ECCL_CORE_HH__
+
+#include <memory>
+#include <string>
+#include <future>
+#include <cstring>
+#include <ostream>
+#include <cassert>
+
+/*! core types/definitions/functions, cheap to include */
+
+
+#ifdef __CUDACC__
+#define CUDA_HOST __host__
+#define CUDA_DEVICE __device__
+#else
+#define CUDA_HOST
+#define CUDA_DEVICE
+#endif
+
+/*! 10 necleotides per unsigned int (instead of 8) */
+#define ECCL_COMPACT_CODE
+
+namespace eccl {
+
+enum class seq_type {
+	xna, /*! DNA or RNA */
+	//dna,
+	//rna,
+	prot,
+};
+
+template<seq_type Type>
+struct code {
+	unsigned char value{0};
+	constexpr static unsigned int width=(Type==seq_type::xna?3:5);
+#ifndef ECCL_COMPACT_CODE
+	constexpr static unsigned int n_per_word=(Type==seq_type::xna?8:4);
+#else
+	constexpr static unsigned int n_per_word=(Type==seq_type::xna?10:6);
+#endif
+	/*! complement neucleotide */
+	code<Type> operator~() const noexcept {
+		static_assert(Type==seq_type::xna);
+		return code<Type>{static_cast<unsigned char>((~value)&0b111)};
+	}
+};
+using nucleotide=code<seq_type::xna>;
+using amino_acid=code<seq_type::prot>;
+
+template<seq_type Type>
+struct pair {
+	unsigned int value{0};
+	CUDA_HOST CUDA_DEVICE constexpr pair(code<Type> a, code<Type> b) noexcept:
+		value{(static_cast<unsigned int>(a.value)<<a.width)+b.value} { }
+};
+
+template<seq_type Type>
+inline unsigned int padded_len(unsigned int len) noexcept {
+#ifndef ECCL_COMPACT_CODE
+	return Type==seq_type::xna?(len+7)/8*8:(len+3)/4*4;
+#else
+	//return (len+9)/10*10;
+	return Type==seq_type::xna?(len+39)/40*40:(len+23)/24*24;
+#endif
+}
+
+template<eccl::seq_type Type>
+inline CUDA_DEVICE code<Type> get_code(const unsigned int* buf, unsigned long idx);
+template<>
+inline CUDA_DEVICE code<eccl::seq_type::xna> get_code(const unsigned int* buf, unsigned long idx) {
+#ifndef ECCL_COMPACT_CODE
+	auto v=buf[idx/8];
+	return {static_cast<unsigned char>((v>>((7-idx%8)*4))&0x0f)};
+#else
+	auto v=buf[idx/10];
+	return {static_cast<unsigned char>((v>>((9-idx%10)*3))&0b0111)};
+#endif
+};
+template<>
+inline CUDA_DEVICE code<eccl::seq_type::prot> get_code(const unsigned int* buf, unsigned long idx) {
+#ifndef ECCL_COMPACT_CODE
+	auto v=buf[idx/4];
+	return {static_cast<unsigned char>((v>>((3-idx%4)*8))&0x1f)};
+#else
+	auto v=buf[idx/6];
+	return {static_cast<unsigned char>((v>>((5-idx%6)*5))&0b011111)};
+#endif
+};
+
+template<typename T>
+class chunk {
+public:
+	constexpr chunk() noexcept: _p{nullptr}, _s{0} { }
+	explicit chunk(std::size_t size): _p{std::make_unique<T[]>(size)}, _s{size<<1} { }
+
+	explicit operator bool() const noexcept { return _p.get(); }
+	std::size_t size() const noexcept { return _s>>1; }
+	T& operator[](std::size_t i) noexcept { return _p[i]; }
+	const T& operator[](std::size_t i) const noexcept { return _p[i]; }
+	bool eof() const noexcept { return _s&1; }
+
+	void shrink(std::size_t size, bool eof=false) noexcept {
+		assert(size<=(_s>>1));
+		_s=(size<<1)|(eof?1:0);
+	}
+
+private:
+	std::unique_ptr<T[]> _p;
+	/*! use last bit for eof */
+	std::size_t _s;
+};
+
+template<typename T>
+class source {
+public:
+	chunk<T> get() {
+		auto chk=_fut.get();
+		_fut=prepare();
+		return chk;
+	}
+
+protected:
+	constexpr source() noexcept { }
+	void post_ctor() {
+		_fut=prepare();
+	}
+	virtual ~source() { }
+
+	virtual std::future<chunk<T>> prepare() =0;
+private:
+	std::future<chunk<T>> _fut;
+};
+
+
+}
+
+#if __cplusplus < 201703L
+namespace std {
+class string_view {
+public:
+	constexpr string_view(const char* p, std::size_t s) noexcept:
+		_p{p}, _s{s} { }
+	string_view(const char* p):
+		_p{p}, _s{std::strlen(p)} { }
+	std::size_t size() const noexcept { return _s; }
+	const char& operator[](std::size_t i) const noexcept { return _p[i]; }
+private:
+	const char* _p;
+	std::size_t _s;
+};
+inline bool operator!=(const std::string& a, std::string_view b) noexcept {
+	return a.compare(0, a.size(), &b[0], b.size())!=0;
+}
+inline bool operator==(const std::string& a, std::string_view b) noexcept {
+	return !(a!=b);
+}
+inline std::ostream& operator<<(std::ostream& oss, std::string_view b) {
+	return oss.write(&b[0], b.size());
+}
+}
+#endif
+
+#endif
diff --git a/gpu-sw/cuda-utils.cc b/gpu-sw/cuda-utils.cc
@@ -0,0 +1,29 @@
+#include "cuda-utils.hh"
+
+#include <cstdio>
+#include <iostream>
+
+void eccl::dump_device_info(int device) {
+	struct cudaDeviceProp prop;
+	cudaGetDeviceProperties(&prop, device);
+	printf("%s\n", prop.name);
+	printf("Major revision number:         %d\n", prop.major);
+	printf("Minor revision number:         %d\n", prop.minor);
+	printf("Total global memory:           %zu", prop.totalGlobalMem);
+	printf(" bytes\n");
+	printf("Number of multiprocessors:     %d\n", prop.multiProcessorCount);
+	printf("Total amount of shared memory per block: %zu\n",prop.sharedMemPerBlock);
+	printf("Total registers per block:     %d\n", prop.regsPerBlock);
+	printf("Warp size:                     %d\n", prop.warpSize);
+	printf("Maximum memory pitch:          %zu\n", prop.memPitch);
+	printf("Total amount of constant memory:         %zu\n",   prop.totalConstMem);
+}
+
+namespace eccl {
+void operator,(cudaError_t error, eccl::check_cuda checker) {
+	if(!error)
+		return;
+	std::cerr<<"error: "<<checker._msg<<": "<<cudaGetErrorName(error)<<": "<<cudaGetErrorString(error)<<"\n";
+	std::exit(EXIT_FAILURE);
+}
+}