...figs/s/starfish/starfish-1.1.0_added-utility-script-for-concatenating-annotate-outp.patch

-Original file line number
+Diff line change
@@ -0,0 +1,122 @@
+    diff --git a/aux/process_annotate_output.sh b/aux/process_annotate_output.sh
+    new file mode 100644
+    index 0000000..a339d4e
+    --- a/dev/null
+    +++ b/aux/process_annotate_output.sh
+    @@ -0,0 +1,112 @@
+    +#!/bin/bash
+    +
+    +# Utility script for combining filt and filt_intersect output from starfish annotate
+    +# across multiple independent runs
+    +
+    +# Function to display help menu
+    +display_help() {
+    +    echo "Usage: $0 [OPTIONS]"
+    +    echo ""
+    +    echo "Utility script for combining filt and filt_intersect output from starfish annotate"
+    +    echo ""
+    +    echo "Options:"
+    +    echo "  -i, --input FILE        Input TSV file with genome codes in the first field (required)"
+    +    echo "  -a, --analysis PREFIX   Path to root analysis directory containing an output directory for each genome (required)"
+    +    echo "  -o, --output PREFIX     Path and Prefix for output files (required)"
+    +    echo "  -h, --help              Display this help menu"
+    +    echo ""
+    +    echo "Example:"
+    +    echo "  $0 -i ome2assembly.txt -a starfish_run1 -o all_annotations"
+    +}
+    +
+    +# Initialize variables
+    +INPUT_FILE=""
+    +ANALYSIS_PREFIX=""
+    +OUTPUT_PREFIX=""
+    +HELP=false
+    +
+    +# Parse command-line arguments
+    +while [[ $# -gt 0 ]]; do
+    +    key="$1"
+    +    case $key in
+    +        -i|--input)
+    +            INPUT_FILE="$2"
+    +            shift 2
+    +            ;;
+    +        -a|--analysis)
+    +            ANALYSIS_PREFIX="$2"
+    +            shift 2
+    +            ;;
+    +        -o|--output)
+    +            OUTPUT_PREFIX="$2"
+    +            shift 2
+    +            ;;
+    +        -h|--help)
+    +            HELP=true
+    +            shift
+    +            ;;
+    +        *)
+    +            echo "Unknown option: $1"
+    +            display_help
+    +            exit 1
+    +            ;;
+    +    esac
+    +done
+    +
+    +# Display help or validate required arguments
+    +if [[ "$HELP" = true ]] || [ -z "$INPUT_FILE" ] || [ -z "$ANALYSIS_PREFIX" ] || [ -z "$OUTPUT_PREFIX" ]; then
+    +    display_help
+    +    exit 1
+    +fi
+    +
+    +output_gff=${OUTPUT_PREFIX}.gff
+    +output_ids=${OUTPUT_PREFIX}.ids
+    +output_fas=${OUTPUT_PREFIX}.fas
+    +
+    +# Check if the input file exists
+    +if [ ! -f "$INPUT_FILE" ]; then
+    +    echo "Error: Input file $INPUT_FILE does not exist."
+    +    exit 1
+    +fi
+    +
+    +# Initialize counters
+    +count_filt_intersect=0
+    +count_filt=0
+    +count_none=0
+    +
+    +# Clear the output file or create it if it doesn't exist
+    +> "$output_gff"
+    +> "$output_ids"
+    +> "$output_fas"
+    +
+    +# Read each genome code from the input file
+    +while IFS=$'\t' read -r genome_code rest_of_line || [ -n "$genome_code" ]; do
+    +
+    +    # Trim any potential whitespace from the genome code
+    +    genome_code=$(echo "$genome_code" | xargs)
+    +
+    +    # Define the directory name based on the genome code
+    +    dir_name="${ANALYSIS_PREFIX}/$genome_code"
+    +
+    +    # Check for 'filt_intersect' file first
+    +    if [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" ]; then
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.gff" >> "$output_gff"
+    +        ((count_filt_intersect++))
+    +    elif [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" ]; then
+    +        # If 'filt_intersect' doesn't exist, use 'filt' file
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.gff" >> "$output_gff"
+    +        ((count_filt++))
+    +    else
+    +        echo "No 'filt_intersect' or 'filt' file found for genome code: $genome_code"
+    +        ((count_none++))
+    +    fi
+    +done < "$INPUT_FILE"
+    +
+    +echo "Total genomes with 'filt_intersect' files: $count_filt_intersect"
+    +echo "Total genomes with 'filt' files: $count_filt"
+    +echo "Total genomes with no files: $count_none"
+    +echo "Processing complete"
+    \ No newline at end of file
+    --
+.43.7

easyconfigs/s/starfish/starfish-1.1.0_bug-fix.patch

-Original file line number
+Diff line change
@@ -0,0 +1,34 @@
+    diff --git a/aux/process_annotate_output.sh b/aux/process_annotate_output.sh
+    old mode 100644
+    new mode 100755
+    index a339d4e..6375eba
+    --- a/aux/process_annotate_output.sh
+    +++ b/aux/process_annotate_output.sh
+    @@ -89,16 +89,16 @@ while IFS=$'\t' read -r genome_code rest_of_line || [ -n "$genome_code" ]; do
+         dir_name="${ANALYSIS_PREFIX}/$genome_code"
+         # Check for 'filt_intersect' file first
+    -    if [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" ]; then
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" >> "$output_ids"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.fas" >> "$output_fas"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.gff" >> "$output_gff"
+    +	if ls "${dir_name}/${genome_code}"*".filt_intersect.ids" &> /dev/null; then
+    +        cat "${dir_name}/${genome_code}"*"filt_intersect.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}"*"filt_intersect.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}"*"filt_intersect.gff" >> "$output_gff"
+             ((count_filt_intersect++))
+    -    elif [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" ]; then
+    +    elif ls "${dir_name}/${genome_code}"*".filt.ids" &> /dev/null; then
+             # If 'filt_intersect' doesn't exist, use 'filt' file
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" >> "$output_ids"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.fas" >> "$output_fas"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.gff" >> "$output_gff"
+    +        cat "${dir_name}/${genome_code}"*"filt.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}"*"filt.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}"*"filt.gff" >> "$output_gff"
+             ((count_filt++))
+         else
+             echo "No 'filt_intersect' or 'filt' file found for genome code: $genome_code"
+    --
+.43.7

fix(starfish110): missing patch #770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

branfosj merged 2 commits into main from starfish110

Jan 28, 2026

-Original file line number
+Diff line change
@@ -0,0 +1,122 @@
+    diff --git a/aux/process_annotate_output.sh b/aux/process_annotate_output.sh
+    new file mode 100644
+    index 0000000..a339d4e
+    --- a/dev/null
+    +++ b/aux/process_annotate_output.sh
+    @@ -0,0 +1,112 @@
+    +#!/bin/bash
+    +
+    +# Utility script for combining filt and filt_intersect output from starfish annotate
+    +# across multiple independent runs
+    +
+    +# Function to display help menu
+    +display_help() {
+    +    echo "Usage: $0 [OPTIONS]"
+    +    echo ""
+    +    echo "Utility script for combining filt and filt_intersect output from starfish annotate"
+    +    echo ""
+    +    echo "Options:"
+    +    echo "  -i, --input FILE        Input TSV file with genome codes in the first field (required)"
+    +    echo "  -a, --analysis PREFIX   Path to root analysis directory containing an output directory for each genome (required)"
+    +    echo "  -o, --output PREFIX     Path and Prefix for output files (required)"
+    +    echo "  -h, --help              Display this help menu"
+    +    echo ""
+    +    echo "Example:"
+    +    echo "  $0 -i ome2assembly.txt -a starfish_run1 -o all_annotations"
+    +}
+    +
+    +# Initialize variables
+    +INPUT_FILE=""
+    +ANALYSIS_PREFIX=""
+    +OUTPUT_PREFIX=""
+    +HELP=false
+    +
+    +# Parse command-line arguments
+    +while [[ $# -gt 0 ]]; do
+    +    key="$1"
+    +    case $key in
+    +        -i|--input)
+    +            INPUT_FILE="$2"
+    +            shift 2
+    +            ;;
+    +        -a|--analysis)
+    +            ANALYSIS_PREFIX="$2"
+    +            shift 2
+    +            ;;
+    +        -o|--output)
+    +            OUTPUT_PREFIX="$2"
+    +            shift 2
+    +            ;;
+    +        -h|--help)
+    +            HELP=true
+    +            shift
+    +            ;;
+    +        *)
+    +            echo "Unknown option: $1"
+    +            display_help
+    +            exit 1
+    +            ;;
+    +    esac
+    +done
+    +
+    +# Display help or validate required arguments
+    +if [[ "$HELP" = true ]] || [ -z "$INPUT_FILE" ] || [ -z "$ANALYSIS_PREFIX" ] || [ -z "$OUTPUT_PREFIX" ]; then
+    +    display_help
+    +    exit 1
+    +fi
+    +
+    +output_gff=${OUTPUT_PREFIX}.gff
+    +output_ids=${OUTPUT_PREFIX}.ids
+    +output_fas=${OUTPUT_PREFIX}.fas
+    +
+    +# Check if the input file exists
+    +if [ ! -f "$INPUT_FILE" ]; then
+    +    echo "Error: Input file $INPUT_FILE does not exist."
+    +    exit 1
+    +fi
+    +
+    +# Initialize counters
+    +count_filt_intersect=0
+    +count_filt=0
+    +count_none=0
+    +
+    +# Clear the output file or create it if it doesn't exist
+    +> "$output_gff"
+    +> "$output_ids"
+    +> "$output_fas"
+    +
+    +# Read each genome code from the input file
+    +while IFS=$'\t' read -r genome_code rest_of_line || [ -n "$genome_code" ]; do
+    +
+    +    # Trim any potential whitespace from the genome code
+    +    genome_code=$(echo "$genome_code" | xargs)
+    +
+    +    # Define the directory name based on the genome code
+    +    dir_name="${ANALYSIS_PREFIX}/$genome_code"
+    +
+    +    # Check for 'filt_intersect' file first
+    +    if [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" ]; then
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.gff" >> "$output_gff"
+    +        ((count_filt_intersect++))
+    +    elif [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" ]; then
+    +        # If 'filt_intersect' doesn't exist, use 'filt' file
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.gff" >> "$output_gff"
+    +        ((count_filt++))
+    +    else
+    +        echo "No 'filt_intersect' or 'filt' file found for genome code: $genome_code"
+    +        ((count_none++))
+    +    fi
+    +done < "$INPUT_FILE"
+    +
+    +echo "Total genomes with 'filt_intersect' files: $count_filt_intersect"
+    +echo "Total genomes with 'filt' files: $count_filt"
+    +echo "Total genomes with no files: $count_none"
+    +echo "Processing complete"
+    \ No newline at end of file
+    --
+.43.7

-Original file line number
+Diff line change
@@ -0,0 +1,34 @@
+    diff --git a/aux/process_annotate_output.sh b/aux/process_annotate_output.sh
+    old mode 100644
+    new mode 100755
+    index a339d4e..6375eba
+    --- a/aux/process_annotate_output.sh
+    +++ b/aux/process_annotate_output.sh
+    @@ -89,16 +89,16 @@ while IFS=$'\t' read -r genome_code rest_of_line || [ -n "$genome_code" ]; do
+         dir_name="${ANALYSIS_PREFIX}/$genome_code"
+         # Check for 'filt_intersect' file first
+    -    if [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" ]; then
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" >> "$output_ids"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.fas" >> "$output_fas"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.gff" >> "$output_gff"
+    +	if ls "${dir_name}/${genome_code}"*".filt_intersect.ids" &> /dev/null; then
+    +        cat "${dir_name}/${genome_code}"*"filt_intersect.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}"*"filt_intersect.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}"*"filt_intersect.gff" >> "$output_gff"
+             ((count_filt_intersect++))
+    -    elif [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" ]; then
+    +    elif ls "${dir_name}/${genome_code}"*".filt.ids" &> /dev/null; then
+             # If 'filt_intersect' doesn't exist, use 'filt' file
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" >> "$output_ids"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.fas" >> "$output_fas"
+    -        cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.gff" >> "$output_gff"
+    +        cat "${dir_name}/${genome_code}"*"filt.ids" >> "$output_ids"
+    +        cat "${dir_name}/${genome_code}"*"filt.fas" >> "$output_fas"
+    +        cat "${dir_name}/${genome_code}"*"filt.gff" >> "$output_gff"
+             ((count_filt++))
+         else
+             echo "No 'filt_intersect' or 'filt' file found for genome code: $genome_code"
+    --
+.43.7

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix(starfish110): missing patch #770

Uh oh!

Diff view

Diff view

There are no files selected for viewing