diff --git a/scripts/run_gwama/Dockerfile.gwama b/scripts/run_gwama/Dockerfile.gwama new file mode 100644 index 0000000..3e2942e --- /dev/null +++ b/scripts/run_gwama/Dockerfile.gwama @@ -0,0 +1,39 @@ +FROM ubuntu:24.04 + +# install system dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3>=2.2.0 \ + python3-pandas>=1.26.0 \ + python3-scipy>=1.12.0 \ + wget \ + unzip \ + zlib1g-dev \ + build-essential \ + && \ + apt-get clean \ + && \ + rm -rf /var/lib/apt/lists/* + +# install GWAMA +WORKDIR /tmp +RUN wget --no-check-certificate https://www.geenivaramu.ee/tools/GWAMA_v2.2.2.zip && \ + unzip -d GWAMA GWAMA_v2.2.2.zip && \ + cd GWAMA && \ + make && \ + chmod +x GWAMA && \ + mv GWAMA /usr/local/bin/ && \ + cd .. && \ + rm -rf GWAMA GWAMA_v2.2.2.zip + +# copy conversion script and entrypoint +WORKDIR /tools +COPY regenie_to_gwama.py . +COPY entrypoint.gwama.sh . +RUN chmod +x regenie_to_gwama.py entrypoint.gwama.sh + +# Set entrypoint +ENTRYPOINT ["/tools/entrypoint.gwama.sh"] +CMD ["--help"] + +WORKDIR /home diff --git a/scripts/run_gwama/README.md b/scripts/run_gwama/README.md index 53c9451..e513e34 100644 --- a/scripts/run_gwama/README.md +++ b/scripts/run_gwama/README.md @@ -1,10 +1,138 @@ -# Run GWAMA meta-analysis +# GWAMA Meta-Analysis Docker Container -## Download and build GWAMA +This container automates GWAS meta-analysis using GWAMA, converting REGENIE output to the required GWAMA format and performing meta-analysis across multiple sites. -The `GWAMA` binary need to exist on the server site where the meta-analysis is run. +## Quick Start + +### Prerequisites + +- Docker installed +- REGENIE output files (.regenie format) from each site + + +### Basic Usage + +```bash +docker run --platform=linux/amd64 \ + -v /path/to/data:/data \ + -v /path/to/output:/out \ + ghcr.io/collaborativebioinformatics/gwama \ + -o /out [site2_regenie] ... +``` + +**Example usage:** (using binary trait GWAS in this repository) ```{bash} +# build the Docker image locally +cd scripts/run_gwama +docker build --platform=linux/amd64 -t ghcr.io/collaborativebioinformatics/gwama -f Dockerfile.gwama . + +# run (help function) +docker run --platform=linux/amd64 \ + -it ghcr.io/collaborativebioinformatics/gwama --help +... + +# run (example) - should put output in working directory +docker run --platform=linux/amd64 \ + -v /resources/site1_gwas_results:/data \ + -v /scripts/run_gwama/:/out \ + -it ghcr.io/collaborativebioinformatics/gwama -o /out or gwama_meta /data/regenie_step2_Phen1.regenie +``` + + +**Arguments:** +- `mode`: `or` for binary traits or `qt` for quantitative traits +- `output_prefix`: Prefix for output files +- `regenie_files`: Paths to REGENIE output files (use absolute paths) + +### Example: Binary Trait (Case-Control) + +```bash +docker run --platform=linux/amd64 \ + -v /Users/espehage/resources:/data \ + -v /Users/espehage/output:/out \ + ghcr.io/collaborativebioinformatics/gwama \ + -o /out or meta_analysis /data/site1.regenie /data/site2.regenie /data/site3.regenie +``` + +### Example: Quantitative Trait + +```bash +docker run --platform=linux/amd64 \ + -v /Users/espehage/resources:/data \ + -v /Users/espehage/output:/out \ + ghcr.io/collaborativebioinformatics/gwama \ + -o /out qt meta_results /data/site1.regenie /data/site2.regenie +``` + +## Important: Volume Mounting + +The container **cannot access relative paths** from the host. You must: + +1. Use **absolute paths** to data files +2. Mount directories with `-v /host/absolute/path:/container/path` +3. Reference files by their mounted paths inside the container + +**❌ This will fail (relative paths don't work in containers):** +```bash +docker run ... gwama or meta ../../resources/site1.regenie +``` + +**✅ This works (absolute path or mounted volume):** +```bash +docker run -v /Users/espehage/resources:/data ... gwama or meta /data/site1.regenie +``` + +## Output Files + +Results are saved in the container's `/home/` directory by default, or in the directory specified with `-o/--outdir`. + +- `.out` - Main GWAMA meta-analysis results +- `.err.out` - Error log from GWAMA +- `.in` - GWAMA input file list +- `_site*.txt` - Converted GWAMA format files per site + +To access results from your host, mount an output volume: + +```bash +docker run --platform=linux/amd64 \ + -v /data/gwas:/input \ + -v /data/output:/out \ + ghcr.io/collaborativebioinformatics/gwama \ + -o /out or meta /input/site1.regenie /input/site2.regenie +``` + +Then retrieve results from `/output/`. + +## Help + +View the help message: + +```bash +docker run ghcr.io/collaborativebioinformatics/gwama --help +``` + +## Building Locally + +```bash +cd scripts/run_gwama +docker build --platform=linux/amd64 -t gwama:local -f Dockerfile.gwama . +docker run gwama:local or meta /data/site1.regenie +``` + +## Pipeline Steps + +The entrypoint automatically: + +1. **Converts** each site's REGENIE output to GWAMA format using `regenie_to_gwama.py` +2. **Creates** GWAMA input file list +3. **Runs** GWAMA meta-analysis with appropriate parameters for the trait type + +## Manual Setup (if not using Docker) + +### Download and build GWAMA + +```bash wget https://www.geenivaramu.ee/tools/GWAMA_v2.2.2.zip unzip -d GWAMA GWAMA_v2.2.2.zip cd GWAMA @@ -13,26 +141,22 @@ chmod +x GWAMA cd .. ``` -## Run GWAMA meta-analysis (Regenie data format) - ### Convert Regenie output to GWAMA input format -```{bash} +```bash export SITE=1 export DATA_PATH="../../resources/site${SITE}_gwas_results" export FILEPREFIX="regenie_step2_Phen1.regenie" -python3 regenie_to_gwama.py \ +python3 regenie_to_gwama.py \ "${DATA_PATH}/${FILEPREFIX}" \ "site${SITE}_for_gwama.txt" \ "or" ``` -### create input file list for GWAMA - -Should contain data from all 10 sites, here shown for site 1 only for brevity. +### Create input file list for GWAMA -```{bash} +```bash echo site1_for_gwama.txt > gwama.in ``` diff --git a/scripts/run_gwama/entrypoint.gwama.sh b/scripts/run_gwama/entrypoint.gwama.sh new file mode 100644 index 0000000..4495c8d --- /dev/null +++ b/scripts/run_gwama/entrypoint.gwama.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +set -euo pipefail + +# GWAMA meta-analysis entrypoint +# Converts REGENIE results to GWAMA format and runs meta-analysis +# +# Usage: +# entrypoint.gwama.sh [--outdir DIR|-o DIR] [site2_regenie_file] ... +# +# Arguments: +# mode: 'or' for odds ratio (case-control) or 'qt' for quantitative trait +# output_prefix: prefix for GWAMA output files +# regenie_files: paths to .regenie output files (one per site) +# +# Example: +# entrypoint.gwama.sh -o /output or gwama_results /data/site1.regenie /data/site2.regenie + +# Show help +if [ $# -eq 0 ] || [[ "$1" == "--help" || "$1" == "-h" ]]; then + cat << 'EOF' +GWAMA Meta-Analysis Pipeline + +Usage: + entrypoint.gwama.sh [--outdir DIR|-o DIR] [site2_regenie_file] ... + +Arguments: + mode: 'or' for odds ratio (case-control) or 'qt' for quantitative trait + output_prefix: prefix for GWAMA output files + regenie_files: paths to .regenie output files (one per site, use absolute paths or mount volumes) + --outdir, -o: output directory inside container (default: /home) + +Example: + docker run -v /path/to/data:/data -v /path/to/output:/out ghcr.io/collaborativebioinformatics/gwama \ + -o /out or gwama_results /data/site1.regenie /data/site2.regenie + +Note: + Use absolute paths or mount input data with -v flag. Relative paths from host do not work in containers. +EOF + exit 0 +fi + +# Defaults +OUTDIR="/home" + +# Collect positional args after consuming options (options can appear anywhere) +POSITIONAL=() +while [[ $# -gt 0 ]]; do + case "$1" in + -o|--outdir) + if [[ $# -lt 2 ]]; then + echo "Error: --outdir requires a directory path" + exit 1 + fi + OUTDIR="$2" + shift 2 + ;; + -h|--help) + exec "$0" --help + ;; + --) + shift + while [[ $# -gt 0 ]]; do + POSITIONAL+=("$1") + shift + done + ;; + -*) + echo "Error: Unknown option: $1" + echo "Run with --help for usage." + exit 1 + ;; + *) + POSITIONAL+=("$1") + shift + ;; + esac +done + +if [ ${#POSITIONAL[@]} -lt 3 ]; then + echo "Error: Insufficient arguments" + echo "Usage: $0 [--outdir DIR|-o DIR] [site2_regenie_file] ..." + echo "Run with --help for more information" + exit 1 +fi + +mode="${POSITIONAL[0]}" +output_prefix="${POSITIONAL[1]}" +regenie_files=("${POSITIONAL[@]:2}") + +# Validate mode +if [[ ! "$mode" =~ ^(or|qt)$ ]]; then + echo "Error: mode must be 'or' or 'qt', got '$mode'" + exit 1 +fi + +echo "======================================" +echo "GWAMA Meta-Analysis Pipeline" +echo "======================================" +echo "Mode: $mode" +echo "Output prefix: $output_prefix" +echo "Number of sites: ${#regenie_files[@]}" +echo "" + +# Create/ensure output directory and work there +WORKDIR="$OUTDIR" +mkdir -p "$WORKDIR" +cd "$WORKDIR" + +# Convert each site's REGENIE output to GWAMA format +echo "Step 1: Converting REGENIE outputs to GWAMA format..." +gwama_input_files=() + +for i in "${!regenie_files[@]}"; do + site_num=$((i + 1)) + input_file="${regenie_files[$i]}" + output_file="${output_prefix}_site${site_num}.txt" + + if [ ! -f "$input_file" ]; then + echo "Error: Input file not found: $input_file" + echo "" + echo "Troubleshooting:" + echo " - Ensure file exists at the specified path" + echo " - Use absolute paths inside the container" + echo " - Mount data directory with: docker run -v /host/path:/data ..." + echo " - Then reference files as: /data/filename" + exit 1 + fi + + echo " Site $site_num: Converting $input_file..." + python3 /tools/regenie_to_gwama.py "$input_file" "$output_file" "$mode" + gwama_input_files+=("$output_file") +done + +echo "" +echo "Step 2: Creating GWAMA input file list..." +gwama_input_list="${output_prefix}.in" +for file in "${gwama_input_files[@]}"; do + echo "$file" >> "$gwama_input_list" +done +echo "Created: $gwama_input_list" +echo "" + +echo "Step 3: Running GWAMA meta-analysis..." +if [ "$mode" == "or" ]; then + GWAMA \ + -i "$gwama_input_list" \ + --output "$output_prefix" \ + --name_marker MARKERNAME \ + --name_ea EA \ + --name_nea NEA \ + --name_or OR \ + --name_or_95l OR_95L \ + --name_or_95u OR_95U +else + # Quantitative trait mode (no OR fields) + GWAMA \ + -i "$gwama_input_list" \ + --output "$output_prefix" \ + --name_marker MARKERNAME \ + --name_ea EA \ + --name_nea NEA +fi + +echo "" +echo "======================================" +echo "GWAMA meta-analysis completed!" +echo "======================================" +echo "Results saved to: $WORKDIR" +echo " - Main output: ${output_prefix}.out" +echo " - Error log: ${output_prefix}.err.out" +echo "" diff --git a/scripts/run_gwama/regenie_to_gwama.py b/scripts/run_gwama/regenie_to_gwama.py index f872eed..31b4404 100644 --- a/scripts/run_gwama/regenie_to_gwama.py +++ b/scripts/run_gwama/regenie_to_gwama.py @@ -38,7 +38,7 @@ def parse_args(): def main(): args = parse_args() - gwas = pd.read_csv(args.input, sep="\s+") + gwas = pd.read_csv(args.input, sep="\\s+") if args.mode == 'or': z_score = norm.ppf(0.975) # 97.5 percentile gwas['OR'] = np.exp(gwas['BETA'])