diff --git a/README.md b/README.md index 1d6d329f..54d7cc5c 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,35 @@ At the moment, GLIMPSE2 performs imputation only from a reference panel of sampl To build the source code, please refer to the [step-by-step guide on the website](https://odelaneau.github.io/GLIMPSE/docs/installation). +#### Building on macOS + +GLIMPSE2 supports macOS builds, including Apple Silicon (ARM64) devices. The build system automatically detects dependencies installed via Homebrew. + +**Prerequisites:** +```bash +# Install dependencies via Homebrew +brew install boost htslib openssl@3 libdeflate +``` + +**Building:** +```bash +# Build all tools +make + +# Or build individual tools +make phase +make chunk +make ligate +make split_reference +make concordance +``` + +**Platform-specific notes:** + +- **⚠️ Performance Note (WIP):** On Apple Silicon and other ARM platforms, GLIMPSE2 currently uses scalar fallback implementations instead of AVX2 SIMD instructions. This means **significantly slower performance** compared to x86_64 with AVX2. Native ARM NEON optimizations are planned for future releases. +- On x86_64 platforms, native AVX2/FMA instructions are used for optimal performance +- The build system automatically detects the architecture and applies appropriate compiler flags + #### Docker images ##### Dockerhub diff --git a/chunk/makefile b/chunk/makefile index f6fafcb0..4aeba3dd 100644 --- a/chunk/makefile +++ b/chunk/makefile @@ -15,6 +15,7 @@ LDFLAG=-O3 #DYNAMIC LIBRARIES DYN_LIBS=-lz -lpthread -lbz2 -llzma -lcurl -lcrypto -ldeflate +BASE_DYN_LIBS := $(DYN_LIBS) HFILE=$(shell find src -name *.h) CFILE=$(shell find src -name *.cpp) @@ -25,6 +26,34 @@ NAME=$(shell basename $(CURDIR)) BFILE=bin/GLIMPSE2_$(NAME) EXEFILE=bin/GLIMPSE2_$(NAME)_static +BREW_AVAILABLE := $(shell command -v brew 2>/dev/null) +BOOST_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix boost 2>/dev/null),) +HTSLIB_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix htslib 2>/dev/null),) + +BOOST_INC_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/include,) +BOOST_LIB_DIR_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/lib,) +BOOST_LIB_IO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.a $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.dylib $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.so)) +BOOST_LIB_PO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_program_options.a $(BOOST_LIB_DIR_AUTO)/libboost_program_options.dylib $(BOOST_LIB_DIR_AUTO)/libboost_program_options.so)) +BOOST_LIB_SE_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_serialization.a $(BOOST_LIB_DIR_AUTO)/libboost_serialization.dylib $(BOOST_LIB_DIR_AUTO)/libboost_serialization.so)) + +HTSLIB_INC_AUTO := $(if $(HTSLIB_PREFIX_AUTO),$(HTSLIB_PREFIX_AUTO)/include,) +HTSLIB_LIB_AUTO := $(firstword $(wildcard $(HTSLIB_PREFIX_AUTO)/lib/libhts.a $(HTSLIB_PREFIX_AUTO)/lib/libhts.dylib $(HTSLIB_PREFIX_AUTO)/lib/libhts.so)) + +OPENSSL_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix openssl@3 2>/dev/null),) +LIBDEFLATE_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix libdeflate 2>/dev/null),) + +EXTRA_LDFLAGS_AUTO := $(strip $(if $(BOOST_LIB_DIR_AUTO),-L$(BOOST_LIB_DIR_AUTO)) $(if $(HTSLIB_PREFIX_AUTO),-L$(HTSLIB_PREFIX_AUTO)/lib) $(if $(OPENSSL_PREFIX_AUTO),-L$(OPENSSL_PREFIX_AUTO)/lib) $(if $(LIBDEFLATE_PREFIX_AUTO),-L$(LIBDEFLATE_PREFIX_AUTO)/lib)) + +DEFAULT_BOOST_INC := $(or $(BOOST_INC_AUTO),/usr/include) +DEFAULT_BOOST_LIB_IO := $(or $(BOOST_LIB_IO_AUTO),-lboost_iostreams) +DEFAULT_BOOST_LIB_PO := $(or $(BOOST_LIB_PO_AUTO),-lboost_program_options) +DEFAULT_BOOST_LIB_SE := $(or $(BOOST_LIB_SE_AUTO),-lboost_serialization) + +DEFAULT_HTSLIB_INC := $(or $(HTSLIB_INC_AUTO),/home/srubinac/git/htslib-1.17) +DEFAULT_HTSLIB_LIB := $(or $(HTSLIB_LIB_AUTO),/home/srubinac/git/htslib-1.17/libhts.a) + +.DEFAULT_GOAL := auto + #COMMIT_VERS=$(shell git rev-parse --short HEAD) #COMMIT_DATE=$(shell git log -1 --format=%cd --date=short) #CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\" @@ -147,10 +176,19 @@ static_exe: BOOST_LIB_SE=/usr/local/lib/libboost_serialization.a static_exe: $(EXEFILE) #COMPILATION RULES -all: desktop +all: auto + +auto: BOOST_INC=$(DEFAULT_BOOST_INC) +auto: BOOST_LIB_IO=$(DEFAULT_BOOST_LIB_IO) +auto: BOOST_LIB_PO=$(DEFAULT_BOOST_LIB_PO) +auto: BOOST_LIB_SE=$(DEFAULT_BOOST_LIB_SE) +auto: HTSLIB_INC=$(DEFAULT_HTSLIB_INC) +auto: HTSLIB_LIB=$(DEFAULT_HTSLIB_LIB) +auto: AUTO_LDFLAG=$(strip $(LDFLAG) $(EXTRA_LDFLAGS_AUTO)) +auto: $(BFILE) $(BFILE): $(OFILE) - $(CXX) $(LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) + $(CXX) $(AUTO_LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) $(EXEFILE): $(OFILE) $(CXX) $(LDFLAG) -static -static-libgcc -static-libstdc++ -pthread -o $(EXEFILE) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) -Wl,-Bstatic $(DYN_LIBS) diff --git a/concordance/makefile b/concordance/makefile index f6fafcb0..4aeba3dd 100644 --- a/concordance/makefile +++ b/concordance/makefile @@ -15,6 +15,7 @@ LDFLAG=-O3 #DYNAMIC LIBRARIES DYN_LIBS=-lz -lpthread -lbz2 -llzma -lcurl -lcrypto -ldeflate +BASE_DYN_LIBS := $(DYN_LIBS) HFILE=$(shell find src -name *.h) CFILE=$(shell find src -name *.cpp) @@ -25,6 +26,34 @@ NAME=$(shell basename $(CURDIR)) BFILE=bin/GLIMPSE2_$(NAME) EXEFILE=bin/GLIMPSE2_$(NAME)_static +BREW_AVAILABLE := $(shell command -v brew 2>/dev/null) +BOOST_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix boost 2>/dev/null),) +HTSLIB_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix htslib 2>/dev/null),) + +BOOST_INC_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/include,) +BOOST_LIB_DIR_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/lib,) +BOOST_LIB_IO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.a $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.dylib $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.so)) +BOOST_LIB_PO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_program_options.a $(BOOST_LIB_DIR_AUTO)/libboost_program_options.dylib $(BOOST_LIB_DIR_AUTO)/libboost_program_options.so)) +BOOST_LIB_SE_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_serialization.a $(BOOST_LIB_DIR_AUTO)/libboost_serialization.dylib $(BOOST_LIB_DIR_AUTO)/libboost_serialization.so)) + +HTSLIB_INC_AUTO := $(if $(HTSLIB_PREFIX_AUTO),$(HTSLIB_PREFIX_AUTO)/include,) +HTSLIB_LIB_AUTO := $(firstword $(wildcard $(HTSLIB_PREFIX_AUTO)/lib/libhts.a $(HTSLIB_PREFIX_AUTO)/lib/libhts.dylib $(HTSLIB_PREFIX_AUTO)/lib/libhts.so)) + +OPENSSL_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix openssl@3 2>/dev/null),) +LIBDEFLATE_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix libdeflate 2>/dev/null),) + +EXTRA_LDFLAGS_AUTO := $(strip $(if $(BOOST_LIB_DIR_AUTO),-L$(BOOST_LIB_DIR_AUTO)) $(if $(HTSLIB_PREFIX_AUTO),-L$(HTSLIB_PREFIX_AUTO)/lib) $(if $(OPENSSL_PREFIX_AUTO),-L$(OPENSSL_PREFIX_AUTO)/lib) $(if $(LIBDEFLATE_PREFIX_AUTO),-L$(LIBDEFLATE_PREFIX_AUTO)/lib)) + +DEFAULT_BOOST_INC := $(or $(BOOST_INC_AUTO),/usr/include) +DEFAULT_BOOST_LIB_IO := $(or $(BOOST_LIB_IO_AUTO),-lboost_iostreams) +DEFAULT_BOOST_LIB_PO := $(or $(BOOST_LIB_PO_AUTO),-lboost_program_options) +DEFAULT_BOOST_LIB_SE := $(or $(BOOST_LIB_SE_AUTO),-lboost_serialization) + +DEFAULT_HTSLIB_INC := $(or $(HTSLIB_INC_AUTO),/home/srubinac/git/htslib-1.17) +DEFAULT_HTSLIB_LIB := $(or $(HTSLIB_LIB_AUTO),/home/srubinac/git/htslib-1.17/libhts.a) + +.DEFAULT_GOAL := auto + #COMMIT_VERS=$(shell git rev-parse --short HEAD) #COMMIT_DATE=$(shell git log -1 --format=%cd --date=short) #CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\" @@ -147,10 +176,19 @@ static_exe: BOOST_LIB_SE=/usr/local/lib/libboost_serialization.a static_exe: $(EXEFILE) #COMPILATION RULES -all: desktop +all: auto + +auto: BOOST_INC=$(DEFAULT_BOOST_INC) +auto: BOOST_LIB_IO=$(DEFAULT_BOOST_LIB_IO) +auto: BOOST_LIB_PO=$(DEFAULT_BOOST_LIB_PO) +auto: BOOST_LIB_SE=$(DEFAULT_BOOST_LIB_SE) +auto: HTSLIB_INC=$(DEFAULT_HTSLIB_INC) +auto: HTSLIB_LIB=$(DEFAULT_HTSLIB_LIB) +auto: AUTO_LDFLAG=$(strip $(LDFLAG) $(EXTRA_LDFLAGS_AUTO)) +auto: $(BFILE) $(BFILE): $(OFILE) - $(CXX) $(LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) + $(CXX) $(AUTO_LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) $(EXEFILE): $(OFILE) $(CXX) $(LDFLAG) -static -static-libgcc -static-libstdc++ -pthread -o $(EXEFILE) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) -Wl,-Bstatic $(DYN_LIBS) diff --git a/ligate/makefile b/ligate/makefile index f6fafcb0..4aeba3dd 100644 --- a/ligate/makefile +++ b/ligate/makefile @@ -15,6 +15,7 @@ LDFLAG=-O3 #DYNAMIC LIBRARIES DYN_LIBS=-lz -lpthread -lbz2 -llzma -lcurl -lcrypto -ldeflate +BASE_DYN_LIBS := $(DYN_LIBS) HFILE=$(shell find src -name *.h) CFILE=$(shell find src -name *.cpp) @@ -25,6 +26,34 @@ NAME=$(shell basename $(CURDIR)) BFILE=bin/GLIMPSE2_$(NAME) EXEFILE=bin/GLIMPSE2_$(NAME)_static +BREW_AVAILABLE := $(shell command -v brew 2>/dev/null) +BOOST_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix boost 2>/dev/null),) +HTSLIB_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix htslib 2>/dev/null),) + +BOOST_INC_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/include,) +BOOST_LIB_DIR_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/lib,) +BOOST_LIB_IO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.a $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.dylib $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.so)) +BOOST_LIB_PO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_program_options.a $(BOOST_LIB_DIR_AUTO)/libboost_program_options.dylib $(BOOST_LIB_DIR_AUTO)/libboost_program_options.so)) +BOOST_LIB_SE_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_serialization.a $(BOOST_LIB_DIR_AUTO)/libboost_serialization.dylib $(BOOST_LIB_DIR_AUTO)/libboost_serialization.so)) + +HTSLIB_INC_AUTO := $(if $(HTSLIB_PREFIX_AUTO),$(HTSLIB_PREFIX_AUTO)/include,) +HTSLIB_LIB_AUTO := $(firstword $(wildcard $(HTSLIB_PREFIX_AUTO)/lib/libhts.a $(HTSLIB_PREFIX_AUTO)/lib/libhts.dylib $(HTSLIB_PREFIX_AUTO)/lib/libhts.so)) + +OPENSSL_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix openssl@3 2>/dev/null),) +LIBDEFLATE_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix libdeflate 2>/dev/null),) + +EXTRA_LDFLAGS_AUTO := $(strip $(if $(BOOST_LIB_DIR_AUTO),-L$(BOOST_LIB_DIR_AUTO)) $(if $(HTSLIB_PREFIX_AUTO),-L$(HTSLIB_PREFIX_AUTO)/lib) $(if $(OPENSSL_PREFIX_AUTO),-L$(OPENSSL_PREFIX_AUTO)/lib) $(if $(LIBDEFLATE_PREFIX_AUTO),-L$(LIBDEFLATE_PREFIX_AUTO)/lib)) + +DEFAULT_BOOST_INC := $(or $(BOOST_INC_AUTO),/usr/include) +DEFAULT_BOOST_LIB_IO := $(or $(BOOST_LIB_IO_AUTO),-lboost_iostreams) +DEFAULT_BOOST_LIB_PO := $(or $(BOOST_LIB_PO_AUTO),-lboost_program_options) +DEFAULT_BOOST_LIB_SE := $(or $(BOOST_LIB_SE_AUTO),-lboost_serialization) + +DEFAULT_HTSLIB_INC := $(or $(HTSLIB_INC_AUTO),/home/srubinac/git/htslib-1.17) +DEFAULT_HTSLIB_LIB := $(or $(HTSLIB_LIB_AUTO),/home/srubinac/git/htslib-1.17/libhts.a) + +.DEFAULT_GOAL := auto + #COMMIT_VERS=$(shell git rev-parse --short HEAD) #COMMIT_DATE=$(shell git log -1 --format=%cd --date=short) #CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\" @@ -147,10 +176,19 @@ static_exe: BOOST_LIB_SE=/usr/local/lib/libboost_serialization.a static_exe: $(EXEFILE) #COMPILATION RULES -all: desktop +all: auto + +auto: BOOST_INC=$(DEFAULT_BOOST_INC) +auto: BOOST_LIB_IO=$(DEFAULT_BOOST_LIB_IO) +auto: BOOST_LIB_PO=$(DEFAULT_BOOST_LIB_PO) +auto: BOOST_LIB_SE=$(DEFAULT_BOOST_LIB_SE) +auto: HTSLIB_INC=$(DEFAULT_HTSLIB_INC) +auto: HTSLIB_LIB=$(DEFAULT_HTSLIB_LIB) +auto: AUTO_LDFLAG=$(strip $(LDFLAG) $(EXTRA_LDFLAGS_AUTO)) +auto: $(BFILE) $(BFILE): $(OFILE) - $(CXX) $(LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) + $(CXX) $(AUTO_LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) $(EXEFILE): $(OFILE) $(CXX) $(LDFLAG) -static -static-libgcc -static-libstdc++ -pthread -o $(EXEFILE) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) -Wl,-Bstatic $(DYN_LIBS) diff --git a/makefile b/makefile index 5896d32c..d8b5e5d7 100644 --- a/makefile +++ b/makefile @@ -1,14 +1,23 @@ projects = chunk concordance ligate phase split_reference -.PHONY: all $(projects) +BOOST_INC := $(shell brew --prefix boost)/include +BOOST_LIB := $(shell brew --prefix boost)/lib + +CXXFLAGS += -I$(BOOST_INC) +LDFLAGS += -L$(BOOST_LIB) +LDLIBS += -lboost_program_options + +.PHONY: all $(projects) clean all: $(projects) $(projects): - $(MAKE) -C $@ $(COMPILATION_ENV) + $(MAKE) -C $@ $(COMPILATION_ENV) \ + CXXFLAGS="$(CXXFLAGS)" \ + LDFLAGS="$(LDFLAGS)" \ + LDLIBS="$(LDLIBS)" clean: for dir in $(projects); do \ - $(MAKE) $@ -C $$dir; \ + $(MAKE) -C $$dir clean; \ done - diff --git a/phase/makefile b/phase/makefile index f6fafcb0..eee9ced3 100644 --- a/phase/makefile +++ b/phase/makefile @@ -9,12 +9,14 @@ dummy_build_folder_obj := $(shell mkdir -p obj) #COMPILER & LINKER FLAGS CXXFLAG=-O3 -Wno-ignored-attributes LDFLAG=-O3 +HOST_ARCH := $(shell uname -m) #CXXFLAG=-O0 -g -Wno-ignored-attributes #LDFLAG=-O0 #DYNAMIC LIBRARIES DYN_LIBS=-lz -lpthread -lbz2 -llzma -lcurl -lcrypto -ldeflate +BASE_DYN_LIBS := $(DYN_LIBS) HFILE=$(shell find src -name *.h) CFILE=$(shell find src -name *.cpp) @@ -25,13 +27,45 @@ NAME=$(shell basename $(CURDIR)) BFILE=bin/GLIMPSE2_$(NAME) EXEFILE=bin/GLIMPSE2_$(NAME)_static +BREW_AVAILABLE := $(shell command -v brew 2>/dev/null) +BOOST_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix boost 2>/dev/null),) +HTSLIB_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix htslib 2>/dev/null),) + +BOOST_INC_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/include,) +BOOST_LIB_DIR_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/lib,) +BOOST_LIB_IO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.a $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.dylib $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.so)) +BOOST_LIB_PO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_program_options.a $(BOOST_LIB_DIR_AUTO)/libboost_program_options.dylib $(BOOST_LIB_DIR_AUTO)/libboost_program_options.so)) +BOOST_LIB_SE_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_serialization.a $(BOOST_LIB_DIR_AUTO)/libboost_serialization.dylib $(BOOST_LIB_DIR_AUTO)/libboost_serialization.so)) + +HTSLIB_INC_AUTO := $(if $(HTSLIB_PREFIX_AUTO),$(HTSLIB_PREFIX_AUTO)/include,) +HTSLIB_LIB_AUTO := $(firstword $(wildcard $(HTSLIB_PREFIX_AUTO)/lib/libhts.a $(HTSLIB_PREFIX_AUTO)/lib/libhts.dylib $(HTSLIB_PREFIX_AUTO)/lib/libhts.so)) + +OPENSSL_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix openssl@3 2>/dev/null),) +LIBDEFLATE_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix libdeflate 2>/dev/null),) + +EXTRA_LDFLAGS_AUTO := $(strip $(if $(BOOST_LIB_DIR_AUTO),-L$(BOOST_LIB_DIR_AUTO)) $(if $(HTSLIB_PREFIX_AUTO),-L$(HTSLIB_PREFIX_AUTO)/lib) $(if $(OPENSSL_PREFIX_AUTO),-L$(OPENSSL_PREFIX_AUTO)/lib) $(if $(LIBDEFLATE_PREFIX_AUTO),-L$(LIBDEFLATE_PREFIX_AUTO)/lib)) + +DEFAULT_BOOST_INC := $(or $(BOOST_INC_AUTO),/usr/include) +DEFAULT_BOOST_LIB_IO := $(or $(BOOST_LIB_IO_AUTO),-lboost_iostreams) +DEFAULT_BOOST_LIB_PO := $(or $(BOOST_LIB_PO_AUTO),-lboost_program_options) +DEFAULT_BOOST_LIB_SE := $(or $(BOOST_LIB_SE_AUTO),-lboost_serialization) + +DEFAULT_HTSLIB_INC := $(or $(HTSLIB_INC_AUTO),/home/srubinac/git/htslib-1.17) +DEFAULT_HTSLIB_LIB := $(or $(HTSLIB_LIB_AUTO),/home/srubinac/git/htslib-1.17/libhts.a) + +.DEFAULT_GOAL := auto + #COMMIT_VERS=$(shell git rev-parse --short HEAD) #COMMIT_DATE=$(shell git log -1 --format=%cd --date=short) #CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\" #CXXFLAG+= -D__COMMIT_DATE__=\"$(COMMIT_DATE)\" ifeq ($(NAME),phase) - CXXFLAG+=-mavx2 -mfma + ifeq ($(HOST_ARCH),x86_64) + CXXFLAG+=-mavx2 -mfma + else + $(info Skipping AVX2/FMA flags for architecture $(HOST_ARCH)) + endif endif COMMIT_VERS=3bed6d9 @@ -147,10 +181,19 @@ static_exe: BOOST_LIB_SE=/usr/local/lib/libboost_serialization.a static_exe: $(EXEFILE) #COMPILATION RULES -all: desktop +all: auto + +auto: BOOST_INC=$(DEFAULT_BOOST_INC) +auto: BOOST_LIB_IO=$(DEFAULT_BOOST_LIB_IO) +auto: BOOST_LIB_PO=$(DEFAULT_BOOST_LIB_PO) +auto: BOOST_LIB_SE=$(DEFAULT_BOOST_LIB_SE) +auto: HTSLIB_INC=$(DEFAULT_HTSLIB_INC) +auto: HTSLIB_LIB=$(DEFAULT_HTSLIB_LIB) +auto: AUTO_LDFLAG=$(strip $(LDFLAG) $(EXTRA_LDFLAGS_AUTO)) +auto: $(BFILE) $(BFILE): $(OFILE) - $(CXX) $(LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) + $(CXX) $(AUTO_LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) $(EXEFILE): $(OFILE) $(CXX) $(LDFLAG) -static -static-libgcc -static-libstdc++ -pthread -o $(EXEFILE) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) -Wl,-Bstatic $(DYN_LIBS) diff --git a/phase/src/models/imputation_hmm.h b/phase/src/models/imputation_hmm.h index 1bda64cb..deadfd1c 100644 --- a/phase/src/models/imputation_hmm.h +++ b/phase/src/models/imputation_hmm.h @@ -28,7 +28,7 @@ #include #include -#include +#include "simd_compat.h" #include template diff --git a/phase/src/models/phasing_hmm.h b/phase/src/models/phasing_hmm.h index 26172473..82f5273a 100644 --- a/phase/src/models/phasing_hmm.h +++ b/phase/src/models/phasing_hmm.h @@ -28,7 +28,7 @@ #include #include -#include +#include "simd_compat.h" #include template diff --git a/phase/src/models/simd_compat.h b/phase/src/models/simd_compat.h new file mode 100644 index 00000000..34a0f3e6 --- /dev/null +++ b/phase/src/models/simd_compat.h @@ -0,0 +1,233 @@ +#pragma once + +// SIMD compatibility layer to allow building on platforms without AVX2 +// support (for example, Apple Silicon). When AVX is available we defer to +// the native intrinsics, otherwise we provide a scalar fallback that mimics +// the API used by the GLIMPSE models. +// +// ⚠️ PERFORMANCE NOTE (WIP): +// The scalar fallback implementation is SIGNIFICANTLY SLOWER than native AVX2. +// This is a functional implementation to enable compilation on ARM platforms, +// but performance optimization using ARM NEON intrinsics is planned for future +// releases. Users on Apple Silicon should expect reduced performance compared +// to x86_64 systems with AVX2 support. + +#if defined(__AVX2__) || defined(__AVX__) + #include + #define GLIMPSE_SIMD_HAS_AVX 1 +#else + #define GLIMPSE_SIMD_HAS_AVX 0 + #include + #include + #include + #include + + namespace glimpse::simd + { + struct alignas(32) Vec256f { + float data[8]; + }; + + struct alignas(32) Vec256i { + std::uint32_t data[8]; + }; + + struct alignas(16) Vec128f { + float data[4]; + }; + + template + inline T bit_cast_copy(const void* src) + { + T dst; + std::memcpy(&dst, src, sizeof(T)); + return dst; + } + + template + inline void bit_copy_to(const T& src, void* dst) + { + std::memcpy(dst, &src, sizeof(T)); + } + } + + using __m256 = glimpse::simd::Vec256f; + using __m256i = glimpse::simd::Vec256i; + using __m128 = glimpse::simd::Vec128f; + + // Floating point helpers ------------------------------------------------- + inline __m256 _mm256_set1_ps(float value) + { + __m256 result; + for (int i = 0; i < 8; ++i) result.data[i] = value; + return result; + } + + inline __m256 _mm256_load_ps(const float* ptr) + { + return glimpse::simd::bit_cast_copy<__m256>(ptr); + } + + inline void _mm256_store_ps(float* ptr, const __m256& value) + { + glimpse::simd::bit_copy_to(value, ptr); + } + + inline __m256 _mm256_add_ps(const __m256& a, const __m256& b) + { + __m256 result; + for (int i = 0; i < 8; ++i) result.data[i] = a.data[i] + b.data[i]; + return result; + } + + inline __m256 _mm256_mul_ps(const __m256& a, const __m256& b) + { + __m256 result; + for (int i = 0; i < 8; ++i) result.data[i] = a.data[i] * b.data[i]; + return result; + } + + inline __m256 _mm256_div_ps(const __m256& a, const __m256& b) + { + __m256 result; + for (int i = 0; i < 8; ++i) result.data[i] = a.data[i] / b.data[i]; + return result; + } + + inline __m256 _mm256_max_ps(const __m256& a, const __m256& b) + { + __m256 result; + for (int i = 0; i < 8; ++i) result.data[i] = std::max(a.data[i], b.data[i]); + return result; + } + + inline __m256 _mm256_min_ps(const __m256& a, const __m256& b) + { + __m256 result; + for (int i = 0; i < 8; ++i) result.data[i] = std::min(a.data[i], b.data[i]); + return result; + } + + inline __m256 _mm256_fmadd_ps(const __m256& a, const __m256& b, const __m256& c) + { + return _mm256_add_ps(_mm256_mul_ps(a, b), c); + } + + inline __m256 _mm256_setzero_ps() + { + return _mm256_set1_ps(0.0f); + } + + // Integer helpers -------------------------------------------------------- + inline __m256i _mm256_set1_epi32(int value) + { + __m256i result; + std::uint32_t converted = static_cast(value); + for (int i = 0; i < 8; ++i) result.data[i] = converted; + return result; + } + + inline __m256i _mm256_set_epi32(int e7, int e6, int e5, int e4, int e3, int e2, int e1, int e0) + { + __m256i result; + result.data[0] = static_cast(e0); + result.data[1] = static_cast(e1); + result.data[2] = static_cast(e2); + result.data[3] = static_cast(e3); + result.data[4] = static_cast(e4); + result.data[5] = static_cast(e5); + result.data[6] = static_cast(e6); + result.data[7] = static_cast(e7); + return result; + } + + inline __m256i _mm256_sllv_epi32(const __m256i& values, const __m256i& counts) + { + __m256i result; + for (int i = 0; i < 8; ++i) + { + std::uint32_t shift = counts.data[i] & 31u; + result.data[i] = values.data[i] << shift; + } + return result; + } + + inline __m256 _mm256_castsi256_ps(const __m256i& value) + { + return glimpse::simd::bit_cast_copy<__m256>(value.data); + } + + inline __m256i _mm256_castps_si256(const __m256& value) + { + return glimpse::simd::bit_cast_copy<__m256i>(value.data); + } + + inline __m256 _mm256_blendv_ps(const __m256& a, const __m256& b, const __m256& mask) + { + __m256 result; + for (int i = 0; i < 8; ++i) + { + std::uint32_t bits = glimpse::simd::bit_cast_copy(&mask.data[i]); + result.data[i] = (bits & 0x80000000u) ? b.data[i] : a.data[i]; + } + return result; + } + + // Mixed helpers ---------------------------------------------------------- + inline __m128 _mm256_castps256_ps128(const __m256& value) + { + __m128 result; + for (int i = 0; i < 4; ++i) result.data[i] = value.data[i]; + return result; + } + + inline __m128 _mm256_extractf128_ps(const __m256& value, const int index) + { + __m128 result; + const int offset = (index & 1) ? 4 : 0; + for (int i = 0; i < 4; ++i) result.data[i] = value.data[offset + i]; + return result; + } + + // SSE fallbacks ---------------------------------------------------------- + inline __m128 _mm_add_ps(const __m128& a, const __m128& b) + { + __m128 result; + for (int i = 0; i < 4; ++i) result.data[i] = a.data[i] + b.data[i]; + return result; + } + + inline __m128 _mm_movehdup_ps(const __m128& a) + { + __m128 result; + result.data[0] = a.data[1]; + result.data[1] = a.data[1]; + result.data[2] = a.data[3]; + result.data[3] = a.data[3]; + return result; + } + + inline __m128 _mm_movehl_ps(const __m128& a, const __m128& b) + { + __m128 result; + result.data[0] = b.data[2]; + result.data[1] = b.data[3]; + result.data[2] = a.data[2]; + result.data[3] = a.data[3]; + return result; + } + + inline __m128 _mm_add_ss(const __m128& a, const __m128& b) + { + __m128 result = a; + result.data[0] = a.data[0] + b.data[0]; + return result; + } + + inline float _mm_cvtss_f32(const __m128& a) + { + return a.data[0]; + } + + // Convenience helpers ---------------------------------------------------- + #endif // GLIMPSE_SIMD_HAS_AVX diff --git a/split_reference/makefile b/split_reference/makefile index 4f8b0e36..1be33c3f 100644 --- a/split_reference/makefile +++ b/split_reference/makefile @@ -15,6 +15,7 @@ LDFLAG=-O3 #DYNAMIC LIBRARIES DYN_LIBS=-lz -lpthread -lbz2 -llzma -lcurl -lcrypto -ldeflate +BASE_DYN_LIBS := $(DYN_LIBS) HFILE=$(shell find src -name *.h) CFILE=$(shell find src -name *.cpp) @@ -25,6 +26,34 @@ NAME=$(shell basename $(CURDIR)) BFILE=bin/GLIMPSE2_$(NAME) EXEFILE=bin/GLIMPSE2_$(NAME)_static +BREW_AVAILABLE := $(shell command -v brew 2>/dev/null) +BOOST_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix boost 2>/dev/null),) +HTSLIB_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix htslib 2>/dev/null),) + +BOOST_INC_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/include,) +BOOST_LIB_DIR_AUTO := $(if $(BOOST_PREFIX_AUTO),$(BOOST_PREFIX_AUTO)/lib,) +BOOST_LIB_IO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.a $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.dylib $(BOOST_LIB_DIR_AUTO)/libboost_iostreams.so)) +BOOST_LIB_PO_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_program_options.a $(BOOST_LIB_DIR_AUTO)/libboost_program_options.dylib $(BOOST_LIB_DIR_AUTO)/libboost_program_options.so)) +BOOST_LIB_SE_AUTO := $(firstword $(wildcard $(BOOST_LIB_DIR_AUTO)/libboost_serialization.a $(BOOST_LIB_DIR_AUTO)/libboost_serialization.dylib $(BOOST_LIB_DIR_AUTO)/libboost_serialization.so)) + +HTSLIB_INC_AUTO := $(if $(HTSLIB_PREFIX_AUTO),$(HTSLIB_PREFIX_AUTO)/include,) +HTSLIB_LIB_AUTO := $(firstword $(wildcard $(HTSLIB_PREFIX_AUTO)/lib/libhts.a $(HTSLIB_PREFIX_AUTO)/lib/libhts.dylib $(HTSLIB_PREFIX_AUTO)/lib/libhts.so)) + +OPENSSL_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix openssl@3 2>/dev/null),) +LIBDEFLATE_PREFIX_AUTO := $(if $(BREW_AVAILABLE),$(shell brew --prefix libdeflate 2>/dev/null),) + +EXTRA_LDFLAGS_AUTO := $(strip $(if $(BOOST_LIB_DIR_AUTO),-L$(BOOST_LIB_DIR_AUTO)) $(if $(HTSLIB_PREFIX_AUTO),-L$(HTSLIB_PREFIX_AUTO)/lib) $(if $(OPENSSL_PREFIX_AUTO),-L$(OPENSSL_PREFIX_AUTO)/lib) $(if $(LIBDEFLATE_PREFIX_AUTO),-L$(LIBDEFLATE_PREFIX_AUTO)/lib)) + +DEFAULT_BOOST_INC := $(or $(BOOST_INC_AUTO),/usr/include) +DEFAULT_BOOST_LIB_IO := $(or $(BOOST_LIB_IO_AUTO),-lboost_iostreams) +DEFAULT_BOOST_LIB_PO := $(or $(BOOST_LIB_PO_AUTO),-lboost_program_options) +DEFAULT_BOOST_LIB_SE := $(or $(BOOST_LIB_SE_AUTO),-lboost_serialization) + +DEFAULT_HTSLIB_INC := $(or $(HTSLIB_INC_AUTO),/home/srubinac/git/htslib) +DEFAULT_HTSLIB_LIB := $(or $(HTSLIB_LIB_AUTO),/home/srubinac/git/htslib/libhts.a) + +.DEFAULT_GOAL := auto + #COMMIT_VERS=$(shell git rev-parse --short HEAD) #COMMIT_DATE=$(shell git log -1 --format=%cd --date=short) #CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\" @@ -147,10 +176,19 @@ static_exe: BOOST_LIB_SE=/usr/local/lib/libboost_serialization.a static_exe: $(EXEFILE) #COMPILATION RULES -all: desktop +all: auto + +auto: BOOST_INC=$(DEFAULT_BOOST_INC) +auto: BOOST_LIB_IO=$(DEFAULT_BOOST_LIB_IO) +auto: BOOST_LIB_PO=$(DEFAULT_BOOST_LIB_PO) +auto: BOOST_LIB_SE=$(DEFAULT_BOOST_LIB_SE) +auto: HTSLIB_INC=$(DEFAULT_HTSLIB_INC) +auto: HTSLIB_LIB=$(DEFAULT_HTSLIB_LIB) +auto: AUTO_LDFLAG=$(strip $(LDFLAG) $(EXTRA_LDFLAGS_AUTO)) +auto: $(BFILE) $(BFILE): $(OFILE) - $(CXX) $(LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) + $(CXX) $(AUTO_LDFLAG) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) $(BGEN_LIB) -o $@ $(DYN_LIBS) $(EXEFILE): $(OFILE) $(CXX) $(LDFLAG) -static -static-libgcc -static-libstdc++ -pthread -o $(EXEFILE) $^ $(HTSLIB_LIB) $(BOOST_LIB_IO) $(BOOST_LIB_PO) $(BOOST_LIB_SE) -Wl,-Bstatic $(DYN_LIBS)