From 8cd1fda753840009a2ffbeeb499c1603c7318cc4 Mon Sep 17 00:00:00 2001 From: Espen Hagen <2492641+espenhgn@users.noreply.github.com> Date: Fri, 9 Jan 2026 18:02:38 +0100 Subject: [PATCH 1/3] testing Dockerfile.gwama --- scripts/run_gwama/Dockerfile.gwama | 39 ++++++++ scripts/run_gwama/README.md | 127 +++++++++++++++++++++--- scripts/run_gwama/entrypoint.gwama.sh | 135 ++++++++++++++++++++++++++ scripts/run_gwama/regenie_to_gwama.py | 2 +- 4 files changed, 290 insertions(+), 13 deletions(-) create mode 100644 scripts/run_gwama/Dockerfile.gwama create mode 100644 scripts/run_gwama/entrypoint.gwama.sh diff --git a/scripts/run_gwama/Dockerfile.gwama b/scripts/run_gwama/Dockerfile.gwama new file mode 100644 index 0000000..3e2942e --- /dev/null +++ b/scripts/run_gwama/Dockerfile.gwama @@ -0,0 +1,39 @@ +FROM ubuntu:24.04 + +# install system dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3>=2.2.0 \ + python3-pandas>=1.26.0 \ + python3-scipy>=1.12.0 \ + wget \ + unzip \ + zlib1g-dev \ + build-essential \ + && \ + apt-get clean \ + && \ + rm -rf /var/lib/apt/lists/* + +# install GWAMA +WORKDIR /tmp +RUN wget --no-check-certificate https://www.geenivaramu.ee/tools/GWAMA_v2.2.2.zip && \ + unzip -d GWAMA GWAMA_v2.2.2.zip && \ + cd GWAMA && \ + make && \ + chmod +x GWAMA && \ + mv GWAMA /usr/local/bin/ && \ + cd .. && \ + rm -rf GWAMA GWAMA_v2.2.2.zip + +# copy conversion script and entrypoint +WORKDIR /tools +COPY regenie_to_gwama.py . +COPY entrypoint.gwama.sh . +RUN chmod +x regenie_to_gwama.py entrypoint.gwama.sh + +# Set entrypoint +ENTRYPOINT ["/tools/entrypoint.gwama.sh"] +CMD ["--help"] + +WORKDIR /home diff --git a/scripts/run_gwama/README.md b/scripts/run_gwama/README.md index 53c9451..1a23e4d 100644 --- a/scripts/run_gwama/README.md +++ b/scripts/run_gwama/README.md @@ -1,10 +1,117 @@ -# Run GWAMA meta-analysis +# GWAMA Meta-Analysis Docker Container -## Download and build GWAMA +This container automates GWAS meta-analysis using GWAMA, converting REGENIE output to the required GWAMA format and performing meta-analysis across multiple sites. -The `GWAMA` binary need to exist on the server site where the meta-analysis is run. +## Quick Start -```{bash} +### Prerequisites + +- Docker installed +- REGENIE output files (.regenie format) from each site + +### Basic Usage + +```bash +docker run --platform=linux/amd64 \ + -v /path/to/data:/data \ + ghcr.io/collaborativebioinformatics/gwama \ + [site2_regenie] ... +``` + +docker run --platform=linux/amd64 -v /Users/espehage/Repositories/Fed_learning_infrastructure/resources/site1_gwas_results:/data -it ghcr.io/collaborativebioinformatics/gwama or gwama_meta /data/regenie_step2_Phen1.regenie + +**Arguments:** +- `mode`: `or` for binary traits or `qt` for quantitative traits +- `output_prefix`: Prefix for output files +- `regenie_files`: Paths to REGENIE output files (use absolute paths) + +### Example: Binary Trait (Case-Control) + +```bash +docker run --platform=linux/amd64 \ + -v /Users/espehage/resources:/data \ + -v /Users/espehage/output:/output \ + ghcr.io/collaborativebioinformatics/gwama \ + or meta_analysis /data/site1.regenie /data/site2.regenie /data/site3.regenie +``` + +### Example: Quantitative Trait + +```bash +docker run --platform=linux/amd64 \ + -v /Users/espehage/resources:/data \ + ghcr.io/collaborativebioinformatics/gwama \ + qt meta_results /data/site1.regenie /data/site2.regenie +``` + +## Important: Volume Mounting + +The container **cannot access relative paths** from the host. You must: + +1. Use **absolute paths** to data files +2. Mount directories with `-v /host/absolute/path:/container/path` +3. Reference files by their mounted paths inside the container + +**❌ This will fail (relative paths don't work in containers):** +```bash +docker run ... gwama or meta ../../resources/site1.regenie +``` + +**✅ This works (absolute path or mounted volume):** +```bash +docker run -v /Users/espehage/resources:/data ... gwama or meta /data/site1.regenie +``` + +## Output Files + +Results are saved in the container's `/work/gwama_analysis/` directory: + +- `.out` - Main GWAMA meta-analysis results +- `.err.out` - Error log from GWAMA +- `.in` - GWAMA input file list +- `_site*.txt` - Converted GWAMA format files per site + +To access results from your host, mount an output volume: + +```bash +docker run --platform=linux/amd64 \ + -v /data/gwas:/input \ + -v /data/output:/output \ + ghcr.io/collaborativebioinformatics/gwama \ + or meta /input/site1.regenie /input/site2.regenie +``` + +Then retrieve results from `/output/`. + +## Help + +View the help message: + +```bash +docker run ghcr.io/collaborativebioinformatics/gwama --help +``` + +## Building Locally + +```bash +cd scripts/run_gwama +docker build --platform=linux/amd64 -t gwama:local -f Dockerfile.gwama . +docker run gwama:local or meta /data/site1.regenie +``` + +## Pipeline Steps + +The entrypoint automatically: + +1. **Converts** each site's REGENIE output to GWAMA format using `regenie_to_gwama.py` +2. **Creates** GWAMA input file list +3. **Runs** GWAMA meta-analysis with appropriate parameters for the trait type + +## Manual Setup (if not using Docker) + +### Download and build GWAMA + +```bash wget https://www.geenivaramu.ee/tools/GWAMA_v2.2.2.zip unzip -d GWAMA GWAMA_v2.2.2.zip cd GWAMA @@ -13,26 +120,22 @@ chmod +x GWAMA cd .. ``` -## Run GWAMA meta-analysis (Regenie data format) - ### Convert Regenie output to GWAMA input format -```{bash} +```bash export SITE=1 export DATA_PATH="../../resources/site${SITE}_gwas_results" export FILEPREFIX="regenie_step2_Phen1.regenie" -python3 regenie_to_gwama.py \ +python3 regenie_to_gwama.py \ "${DATA_PATH}/${FILEPREFIX}" \ "site${SITE}_for_gwama.txt" \ "or" ``` -### create input file list for GWAMA +### Create input file list for GWAMA -Should contain data from all 10 sites, here shown for site 1 only for brevity. - -```{bash} +```bash echo site1_for_gwama.txt > gwama.in ``` diff --git a/scripts/run_gwama/entrypoint.gwama.sh b/scripts/run_gwama/entrypoint.gwama.sh new file mode 100644 index 0000000..79c196e --- /dev/null +++ b/scripts/run_gwama/entrypoint.gwama.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +set -euo pipefail + +# GWAMA meta-analysis entrypoint +# Converts REGENIE results to GWAMA format and runs meta-analysis +# +# Usage: +# entrypoint.gwama.sh [site2_regenie_file] ... +# +# Arguments: +# mode: 'or' for odds ratio (case-control) or 'qt' for quantitative trait +# output_prefix: prefix for GWAMA output files +# regenie_files: paths to .regenie output files (one per site) +# +# Example: +# entrypoint.gwama.sh or gwama_results /data/site1.regenie /data/site2.regenie + +# Show help +if [ $# -eq 0 ] || [[ "$1" == "--help" || "$1" == "-h" ]]; then + cat << 'EOF' +GWAMA Meta-Analysis Pipeline + +Usage: + entrypoint.gwama.sh [site2_regenie_file] ... + +Arguments: + mode: 'or' for odds ratio (case-control) or 'qt' for quantitative trait + output_prefix: prefix for GWAMA output files + regenie_files: paths to .regenie output files (one per site, use absolute paths or mount volumes) + +Example: + docker run -v /path/to/data:/data ghcr.io/collaborativebioinformatics/gwama \ + or gwama_results /data/site1.regenie /data/site2.regenie + +Note: + Use absolute paths or mount input data with -v flag. Relative paths from host do not work in containers. +EOF + exit 0 +fi + +if [ $# -lt 3 ]; then + echo "Error: Insufficient arguments" + echo "Usage: $0 [site2_regenie_file] ..." + echo "Run with --help for more information" + exit 1 +fi + +mode="$1" +output_prefix="$2" +shift 2 +regenie_files=("$@") + +# Validate mode +if [[ ! "$mode" =~ ^(or|qt)$ ]]; then + echo "Error: mode must be 'or' or 'qt', got '$mode'" + exit 1 +fi + +echo "======================================" +echo "GWAMA Meta-Analysis Pipeline" +echo "======================================" +echo "Mode: $mode" +echo "Output prefix: $output_prefix" +echo "Number of sites: ${#regenie_files[@]}" +echo "" + +# Create working directory +WORKDIR="/work/gwama_analysis" +mkdir -p "$WORKDIR" +cd "$WORKDIR" + +# Convert each site's REGENIE output to GWAMA format +echo "Step 1: Converting REGENIE outputs to GWAMA format..." +gwama_input_files=() + +for i in "${!regenie_files[@]}"; do + site_num=$((i + 1)) + input_file="${regenie_files[$i]}" + output_file="${output_prefix}_site${site_num}.txt" + + if [ ! -f "$input_file" ]; then + echo "Error: Input file not found: $input_file" + echo "" + echo "Troubleshooting:" + echo " - Ensure file exists at the specified path" + echo " - Use absolute paths inside the container" + echo " - Mount data directory with: docker run -v /host/path:/data ..." + echo " - Then reference files as: /data/filename" + exit 1 + fi + + echo " Site $site_num: Converting $input_file..." + python3 /tools/regenie_to_gwama.py "$input_file" "$output_file" "$mode" + gwama_input_files+=("$output_file") +done + +echo "" +echo "Step 2: Creating GWAMA input file list..." +gwama_input_list="${output_prefix}.in" +for file in "${gwama_input_files[@]}"; do + echo "$file" >> "$gwama_input_list" +done +echo "Created: $gwama_input_list" +echo "" + +echo "Step 3: Running GWAMA meta-analysis..." +if [ "$mode" == "or" ]; then + GWAMA \ + -i "$gwama_input_list" \ + --output "$output_prefix" \ + --name_marker MARKERNAME \ + --name_ea EA \ + --name_nea NEA \ + --name_or OR \ + --name_or_95l OR_95L \ + --name_or_95u OR_95U +else + # Quantitative trait mode (no OR fields) + GWAMA \ + -i "$gwama_input_list" \ + --output "$output_prefix" \ + --name_marker MARKERNAME \ + --name_ea EA \ + --name_nea NEA +fi + +echo "" +echo "======================================" +echo "GWAMA meta-analysis completed!" +echo "======================================" +echo "Results saved to: $WORKDIR" +echo " - Main output: ${output_prefix}.out" +echo " - Error log: ${output_prefix}.err.out" +echo "" diff --git a/scripts/run_gwama/regenie_to_gwama.py b/scripts/run_gwama/regenie_to_gwama.py index f872eed..31b4404 100644 --- a/scripts/run_gwama/regenie_to_gwama.py +++ b/scripts/run_gwama/regenie_to_gwama.py @@ -38,7 +38,7 @@ def parse_args(): def main(): args = parse_args() - gwas = pd.read_csv(args.input, sep="\s+") + gwas = pd.read_csv(args.input, sep="\\s+") if args.mode == 'or': z_score = norm.ppf(0.975) # 97.5 percentile gwas['OR'] = np.exp(gwas['BETA']) From 814e42ee2d7e3a19580e771109fb146837762eb7 Mon Sep 17 00:00:00 2001 From: Espen Hagen <2492641+espenhgn@users.noreply.github.com> Date: Fri, 9 Jan 2026 18:15:50 +0100 Subject: [PATCH 2/3] entrypoint.gwama.sh works --- scripts/run_gwama/README.md | 16 ++++--- scripts/run_gwama/entrypoint.gwama.sh | 63 +++++++++++++++++++++------ 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/scripts/run_gwama/README.md b/scripts/run_gwama/README.md index 1a23e4d..7712274 100644 --- a/scripts/run_gwama/README.md +++ b/scripts/run_gwama/README.md @@ -14,8 +14,9 @@ This container automates GWAS meta-analysis using GWAMA, converting REGENIE outp ```bash docker run --platform=linux/amd64 \ -v /path/to/data:/data \ + -v /path/to/output:/out \ ghcr.io/collaborativebioinformatics/gwama \ - [site2_regenie] ... + -o /out [site2_regenie] ... ``` docker run --platform=linux/amd64 -v /Users/espehage/Repositories/Fed_learning_infrastructure/resources/site1_gwas_results:/data -it ghcr.io/collaborativebioinformatics/gwama or gwama_meta /data/regenie_step2_Phen1.regenie @@ -30,9 +31,9 @@ docker run --platform=linux/amd64 -v /Users/espehage/Repositories/Fed_learning_i ```bash docker run --platform=linux/amd64 \ -v /Users/espehage/resources:/data \ - -v /Users/espehage/output:/output \ + -v /Users/espehage/output:/out \ ghcr.io/collaborativebioinformatics/gwama \ - or meta_analysis /data/site1.regenie /data/site2.regenie /data/site3.regenie + -o /out or meta_analysis /data/site1.regenie /data/site2.regenie /data/site3.regenie ``` ### Example: Quantitative Trait @@ -40,8 +41,9 @@ docker run --platform=linux/amd64 \ ```bash docker run --platform=linux/amd64 \ -v /Users/espehage/resources:/data \ + -v /Users/espehage/output:/out \ ghcr.io/collaborativebioinformatics/gwama \ - qt meta_results /data/site1.regenie /data/site2.regenie + -o /out qt meta_results /data/site1.regenie /data/site2.regenie ``` ## Important: Volume Mounting @@ -64,7 +66,7 @@ docker run -v /Users/espehage/resources:/data ... gwama or meta /data/site1.rege ## Output Files -Results are saved in the container's `/work/gwama_analysis/` directory: +Results are saved in the container's `/home/` directory by default, or in the directory specified with `-o/--outdir`. - `.out` - Main GWAMA meta-analysis results - `.err.out` - Error log from GWAMA @@ -76,9 +78,9 @@ To access results from your host, mount an output volume: ```bash docker run --platform=linux/amd64 \ -v /data/gwas:/input \ - -v /data/output:/output \ + -v /data/output:/out \ ghcr.io/collaborativebioinformatics/gwama \ - or meta /input/site1.regenie /input/site2.regenie + -o /out or meta /input/site1.regenie /input/site2.regenie ``` Then retrieve results from `/output/`. diff --git a/scripts/run_gwama/entrypoint.gwama.sh b/scripts/run_gwama/entrypoint.gwama.sh index 79c196e..4495c8d 100644 --- a/scripts/run_gwama/entrypoint.gwama.sh +++ b/scripts/run_gwama/entrypoint.gwama.sh @@ -6,7 +6,7 @@ set -euo pipefail # Converts REGENIE results to GWAMA format and runs meta-analysis # # Usage: -# entrypoint.gwama.sh [site2_regenie_file] ... +# entrypoint.gwama.sh [--outdir DIR|-o DIR] [site2_regenie_file] ... # # Arguments: # mode: 'or' for odds ratio (case-control) or 'qt' for quantitative trait @@ -14,7 +14,7 @@ set -euo pipefail # regenie_files: paths to .regenie output files (one per site) # # Example: -# entrypoint.gwama.sh or gwama_results /data/site1.regenie /data/site2.regenie +# entrypoint.gwama.sh -o /output or gwama_results /data/site1.regenie /data/site2.regenie # Show help if [ $# -eq 0 ] || [[ "$1" == "--help" || "$1" == "-h" ]]; then @@ -22,16 +22,17 @@ if [ $# -eq 0 ] || [[ "$1" == "--help" || "$1" == "-h" ]]; then GWAMA Meta-Analysis Pipeline Usage: - entrypoint.gwama.sh [site2_regenie_file] ... + entrypoint.gwama.sh [--outdir DIR|-o DIR] [site2_regenie_file] ... Arguments: mode: 'or' for odds ratio (case-control) or 'qt' for quantitative trait output_prefix: prefix for GWAMA output files regenie_files: paths to .regenie output files (one per site, use absolute paths or mount volumes) + --outdir, -o: output directory inside container (default: /home) Example: - docker run -v /path/to/data:/data ghcr.io/collaborativebioinformatics/gwama \ - or gwama_results /data/site1.regenie /data/site2.regenie + docker run -v /path/to/data:/data -v /path/to/output:/out ghcr.io/collaborativebioinformatics/gwama \ + -o /out or gwama_results /data/site1.regenie /data/site2.regenie Note: Use absolute paths or mount input data with -v flag. Relative paths from host do not work in containers. @@ -39,17 +40,53 @@ EOF exit 0 fi -if [ $# -lt 3 ]; then +# Defaults +OUTDIR="/home" + +# Collect positional args after consuming options (options can appear anywhere) +POSITIONAL=() +while [[ $# -gt 0 ]]; do + case "$1" in + -o|--outdir) + if [[ $# -lt 2 ]]; then + echo "Error: --outdir requires a directory path" + exit 1 + fi + OUTDIR="$2" + shift 2 + ;; + -h|--help) + exec "$0" --help + ;; + --) + shift + while [[ $# -gt 0 ]]; do + POSITIONAL+=("$1") + shift + done + ;; + -*) + echo "Error: Unknown option: $1" + echo "Run with --help for usage." + exit 1 + ;; + *) + POSITIONAL+=("$1") + shift + ;; + esac +done + +if [ ${#POSITIONAL[@]} -lt 3 ]; then echo "Error: Insufficient arguments" - echo "Usage: $0 [site2_regenie_file] ..." + echo "Usage: $0 [--outdir DIR|-o DIR] [site2_regenie_file] ..." echo "Run with --help for more information" exit 1 fi -mode="$1" -output_prefix="$2" -shift 2 -regenie_files=("$@") +mode="${POSITIONAL[0]}" +output_prefix="${POSITIONAL[1]}" +regenie_files=("${POSITIONAL[@]:2}") # Validate mode if [[ ! "$mode" =~ ^(or|qt)$ ]]; then @@ -65,8 +102,8 @@ echo "Output prefix: $output_prefix" echo "Number of sites: ${#regenie_files[@]}" echo "" -# Create working directory -WORKDIR="/work/gwama_analysis" +# Create/ensure output directory and work there +WORKDIR="$OUTDIR" mkdir -p "$WORKDIR" cd "$WORKDIR" From ea82c913aa1460e3516d752cd6b5abf27c72060c Mon Sep 17 00:00:00 2001 From: Espen Hagen <2492641+espenhgn@users.noreply.github.com> Date: Fri, 9 Jan 2026 18:32:33 +0100 Subject: [PATCH 3/3] not so elegant but it works --- scripts/run_gwama/README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/scripts/run_gwama/README.md b/scripts/run_gwama/README.md index 7712274..e513e34 100644 --- a/scripts/run_gwama/README.md +++ b/scripts/run_gwama/README.md @@ -9,6 +9,7 @@ This container automates GWAS meta-analysis using GWAMA, converting REGENIE outp - Docker installed - REGENIE output files (.regenie format) from each site + ### Basic Usage ```bash @@ -19,7 +20,25 @@ docker run --platform=linux/amd64 \ -o /out [site2_regenie] ... ``` -docker run --platform=linux/amd64 -v /Users/espehage/Repositories/Fed_learning_infrastructure/resources/site1_gwas_results:/data -it ghcr.io/collaborativebioinformatics/gwama or gwama_meta /data/regenie_step2_Phen1.regenie +**Example usage:** (using binary trait GWAS in this repository) + +```{bash} +# build the Docker image locally +cd scripts/run_gwama +docker build --platform=linux/amd64 -t ghcr.io/collaborativebioinformatics/gwama -f Dockerfile.gwama . + +# run (help function) +docker run --platform=linux/amd64 \ + -it ghcr.io/collaborativebioinformatics/gwama --help +... + +# run (example) - should put output in working directory +docker run --platform=linux/amd64 \ + -v /resources/site1_gwas_results:/data \ + -v /scripts/run_gwama/:/out \ + -it ghcr.io/collaborativebioinformatics/gwama -o /out or gwama_meta /data/regenie_step2_Phen1.regenie +``` + **Arguments:** - `mode`: `or` for binary traits or `qt` for quantitative traits