Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,22 @@ cmake_minimum_required(VERSION 3.5)
set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda)
project(BLASTP LANGUAGES CXX C CUDA)

SET(CMAKE_BUILD_TYPE "Release")
# SET(CMAKE_BUILD_TYPE "Release")
SET(CMAKE_BUILD_TYPE DEBUG)

option(GLF_GPU_SW "Use GLF-GPU Smith-waterman" ON)
if(GLF_GPU_SW)
add_definitions(-DGLF_GPU_SW)
endif()

option(USE_GPU_SW "Use GPU Smith-waterman" OFF)
if(USE_GPU_SW)
add_definitions(-DUSE_GPU_SW)
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17 -Xcompiler -pthread")

if (NOT ("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8"))
message(SEND_ERROR "require 64 bit system")
endif()
Expand Down Expand Up @@ -50,14 +63,24 @@ target_compile_options (util
${OpenMP_CXX_FLAGS}
)

# gpu_sw
add_subdirectory(./gpu-sw)

add_executable(query src/main.cpp ${SOURCES_SEARCH})
target_link_libraries(query util)

# find_library(GPU_SW_LIBRARY
# PATHS ${CMAKE_BINARY_DIR}/gpu-sw
# )
# target_link_libraries(query PRIVATE util ${GPU_SW_LIBRARY})
target_link_libraries(query PRIVATE util sw-lib)
# target_link_libraries(query util)

add_executable(createDB src/createDB.cpp)

target_include_directories(query
PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/gpu-sw
)

target_include_directories(createDB
Expand Down
11 changes: 11 additions & 0 deletions gpu-sw/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
add_library(sw-lib STATIC)
set_property(TARGET sw-lib PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}")
target_sources(sw-lib PRIVATE
"sw.cu"
"cuda-utils.cc"
"mats.cc"
"seqs.cc"
"sw-lib.cc"
)
target_link_libraries(sw-lib PUBLIC "${CUDA_LIBRARIES}")
target_include_directories(sw-lib PUBLIC "${CUDA_INCLUDE_DIRS}" ".")
165 changes: 165 additions & 0 deletions gpu-sw/core.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#ifndef __ECCL_CORE_HH__
#define __ECCL_CORE_HH__

#include <memory>
#include <string>
#include <future>
#include <cstring>
#include <ostream>
#include <cassert>

/*! core types/definitions/functions, cheap to include */


#ifdef __CUDACC__
#define CUDA_HOST __host__
#define CUDA_DEVICE __device__
#else
#define CUDA_HOST
#define CUDA_DEVICE
#endif

/*! 10 necleotides per unsigned int (instead of 8) */
#define ECCL_COMPACT_CODE

namespace eccl {

enum class seq_type {
xna, /*! DNA or RNA */
//dna,
//rna,
prot,
};

template<seq_type Type>
struct code {
unsigned char value{0};
constexpr static unsigned int width=(Type==seq_type::xna?3:5);
#ifndef ECCL_COMPACT_CODE
constexpr static unsigned int n_per_word=(Type==seq_type::xna?8:4);
#else
constexpr static unsigned int n_per_word=(Type==seq_type::xna?10:6);
#endif
/*! complement neucleotide */
code<Type> operator~() const noexcept {
static_assert(Type==seq_type::xna);
return code<Type>{static_cast<unsigned char>((~value)&0b111)};
}
};
using nucleotide=code<seq_type::xna>;
using amino_acid=code<seq_type::prot>;

template<seq_type Type>
struct pair {
unsigned int value{0};
CUDA_HOST CUDA_DEVICE constexpr pair(code<Type> a, code<Type> b) noexcept:
value{(static_cast<unsigned int>(a.value)<<a.width)+b.value} { }
};

template<seq_type Type>
inline unsigned int padded_len(unsigned int len) noexcept {
#ifndef ECCL_COMPACT_CODE
return Type==seq_type::xna?(len+7)/8*8:(len+3)/4*4;
#else
//return (len+9)/10*10;
return Type==seq_type::xna?(len+39)/40*40:(len+23)/24*24;
#endif
}

template<eccl::seq_type Type>
inline CUDA_DEVICE code<Type> get_code(const unsigned int* buf, unsigned long idx);
template<>
inline CUDA_DEVICE code<eccl::seq_type::xna> get_code(const unsigned int* buf, unsigned long idx) {
#ifndef ECCL_COMPACT_CODE
auto v=buf[idx/8];
return {static_cast<unsigned char>((v>>((7-idx%8)*4))&0x0f)};
#else
auto v=buf[idx/10];
return {static_cast<unsigned char>((v>>((9-idx%10)*3))&0b0111)};
#endif
};
template<>
inline CUDA_DEVICE code<eccl::seq_type::prot> get_code(const unsigned int* buf, unsigned long idx) {
#ifndef ECCL_COMPACT_CODE
auto v=buf[idx/4];
return {static_cast<unsigned char>((v>>((3-idx%4)*8))&0x1f)};
#else
auto v=buf[idx/6];
return {static_cast<unsigned char>((v>>((5-idx%6)*5))&0b011111)};
#endif
};

template<typename T>
class chunk {
public:
constexpr chunk() noexcept: _p{nullptr}, _s{0} { }
explicit chunk(std::size_t size): _p{std::make_unique<T[]>(size)}, _s{size<<1} { }

explicit operator bool() const noexcept { return _p.get(); }
std::size_t size() const noexcept { return _s>>1; }
T& operator[](std::size_t i) noexcept { return _p[i]; }
const T& operator[](std::size_t i) const noexcept { return _p[i]; }
bool eof() const noexcept { return _s&1; }

void shrink(std::size_t size, bool eof=false) noexcept {
assert(size<=(_s>>1));
_s=(size<<1)|(eof?1:0);
}

private:
std::unique_ptr<T[]> _p;
/*! use last bit for eof */
std::size_t _s;
};

template<typename T>
class source {
public:
chunk<T> get() {
auto chk=_fut.get();
_fut=prepare();
return chk;
}

protected:
constexpr source() noexcept { }
void post_ctor() {
_fut=prepare();
}
virtual ~source() { }

virtual std::future<chunk<T>> prepare() =0;
private:
std::future<chunk<T>> _fut;
};


}

#if __cplusplus < 201703L
namespace std {
class string_view {
public:
constexpr string_view(const char* p, std::size_t s) noexcept:
_p{p}, _s{s} { }
string_view(const char* p):
_p{p}, _s{std::strlen(p)} { }
std::size_t size() const noexcept { return _s; }
const char& operator[](std::size_t i) const noexcept { return _p[i]; }
private:
const char* _p;
std::size_t _s;
};
inline bool operator!=(const std::string& a, std::string_view b) noexcept {
return a.compare(0, a.size(), &b[0], b.size())!=0;
}
inline bool operator==(const std::string& a, std::string_view b) noexcept {
return !(a!=b);
}
inline std::ostream& operator<<(std::ostream& oss, std::string_view b) {
return oss.write(&b[0], b.size());
}
}
#endif

#endif
29 changes: 29 additions & 0 deletions gpu-sw/cuda-utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "cuda-utils.hh"

#include <cstdio>
#include <iostream>

void eccl::dump_device_info(int device) {
struct cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device);
printf("%s\n", prop.name);
printf("Major revision number: %d\n", prop.major);
printf("Minor revision number: %d\n", prop.minor);
printf("Total global memory: %zu", prop.totalGlobalMem);
printf(" bytes\n");
printf("Number of multiprocessors: %d\n", prop.multiProcessorCount);
printf("Total amount of shared memory per block: %zu\n",prop.sharedMemPerBlock);
printf("Total registers per block: %d\n", prop.regsPerBlock);
printf("Warp size: %d\n", prop.warpSize);
printf("Maximum memory pitch: %zu\n", prop.memPitch);
printf("Total amount of constant memory: %zu\n", prop.totalConstMem);
}

namespace eccl {
void operator,(cudaError_t error, eccl::check_cuda checker) {
if(!error)
return;
std::cerr<<"error: "<<checker._msg<<": "<<cudaGetErrorName(error)<<": "<<cudaGetErrorString(error)<<"\n";
std::exit(EXIT_FAILURE);
}
}
Loading