Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
diff --git a/aux/process_annotate_output.sh b/aux/process_annotate_output.sh
new file mode 100644
index 0000000..a339d4e
--- a/dev/null
+++ b/aux/process_annotate_output.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+# Utility script for combining filt and filt_intersect output from starfish annotate
+# across multiple independent runs
+
+# Function to display help menu
+display_help() {
+ echo "Usage: $0 [OPTIONS]"
+ echo ""
+ echo "Utility script for combining filt and filt_intersect output from starfish annotate"
+ echo ""
+ echo "Options:"
+ echo " -i, --input FILE Input TSV file with genome codes in the first field (required)"
+ echo " -a, --analysis PREFIX Path to root analysis directory containing an output directory for each genome (required)"
+ echo " -o, --output PREFIX Path and Prefix for output files (required)"
+ echo " -h, --help Display this help menu"
+ echo ""
+ echo "Example:"
+ echo " $0 -i ome2assembly.txt -a starfish_run1 -o all_annotations"
+}
+
+# Initialize variables
+INPUT_FILE=""
+ANALYSIS_PREFIX=""
+OUTPUT_PREFIX=""
+HELP=false
+
+# Parse command-line arguments
+while [[ $# -gt 0 ]]; do
+ key="$1"
+ case $key in
+ -i|--input)
+ INPUT_FILE="$2"
+ shift 2
+ ;;
+ -a|--analysis)
+ ANALYSIS_PREFIX="$2"
+ shift 2
+ ;;
+ -o|--output)
+ OUTPUT_PREFIX="$2"
+ shift 2
+ ;;
+ -h|--help)
+ HELP=true
+ shift
+ ;;
+ *)
+ echo "Unknown option: $1"
+ display_help
+ exit 1
+ ;;
+ esac
+done
+
+# Display help or validate required arguments
+if [[ "$HELP" = true ]] || [ -z "$INPUT_FILE" ] || [ -z "$ANALYSIS_PREFIX" ] || [ -z "$OUTPUT_PREFIX" ]; then
+ display_help
+ exit 1
+fi
+
+output_gff=${OUTPUT_PREFIX}.gff
+output_ids=${OUTPUT_PREFIX}.ids
+output_fas=${OUTPUT_PREFIX}.fas
+
+# Check if the input file exists
+if [ ! -f "$INPUT_FILE" ]; then
+ echo "Error: Input file $INPUT_FILE does not exist."
+ exit 1
+fi
+
+# Initialize counters
+count_filt_intersect=0
+count_filt=0
+count_none=0
+
+# Clear the output file or create it if it doesn't exist
+> "$output_gff"
+> "$output_ids"
+> "$output_fas"
+
+# Read each genome code from the input file
+while IFS=$'\t' read -r genome_code rest_of_line || [ -n "$genome_code" ]; do
+
+ # Trim any potential whitespace from the genome code
+ genome_code=$(echo "$genome_code" | xargs)
+
+ # Define the directory name based on the genome code
+ dir_name="${ANALYSIS_PREFIX}/$genome_code"
+
+ # Check for 'filt_intersect' file first
+ if [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" ]; then
+ cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" >> "$output_ids"
+ cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.fas" >> "$output_fas"
+ cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.gff" >> "$output_gff"
+ ((count_filt_intersect++))
+ elif [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" ]; then
+ # If 'filt_intersect' doesn't exist, use 'filt' file
+ cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" >> "$output_ids"
+ cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.fas" >> "$output_fas"
+ cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.gff" >> "$output_gff"
+ ((count_filt++))
+ else
+ echo "No 'filt_intersect' or 'filt' file found for genome code: $genome_code"
+ ((count_none++))
+ fi
+done < "$INPUT_FILE"
+
+echo "Total genomes with 'filt_intersect' files: $count_filt_intersect"
+echo "Total genomes with 'filt' files: $count_filt"
+echo "Total genomes with no files: $count_none"
+echo "Processing complete"
\ No newline at end of file
--
2.43.7

34 changes: 34 additions & 0 deletions easyconfigs/s/starfish/starfish-1.1.0_bug-fix.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
diff --git a/aux/process_annotate_output.sh b/aux/process_annotate_output.sh
old mode 100644
new mode 100755
index a339d4e..6375eba
--- a/aux/process_annotate_output.sh
+++ b/aux/process_annotate_output.sh
@@ -89,16 +89,16 @@ while IFS=$'\t' read -r genome_code rest_of_line || [ -n "$genome_code" ]; do
dir_name="${ANALYSIS_PREFIX}/$genome_code"

# Check for 'filt_intersect' file first
- if [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" ]; then
- cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.ids" >> "$output_ids"
- cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.fas" >> "$output_fas"
- cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt_intersect.gff" >> "$output_gff"
+ if ls "${dir_name}/${genome_code}"*".filt_intersect.ids" &> /dev/null; then
+ cat "${dir_name}/${genome_code}"*"filt_intersect.ids" >> "$output_ids"
+ cat "${dir_name}/${genome_code}"*"filt_intersect.fas" >> "$output_fas"
+ cat "${dir_name}/${genome_code}"*"filt_intersect.gff" >> "$output_gff"
((count_filt_intersect++))
- elif [ -f "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" ]; then
+ elif ls "${dir_name}/${genome_code}"*".filt.ids" &> /dev/null; then
# If 'filt_intersect' doesn't exist, use 'filt' file
- cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.ids" >> "$output_ids"
- cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.fas" >> "$output_fas"
- cat "${dir_name}/${genome_code}.${ANALYSIS_PREFIX}.filt.gff" >> "$output_gff"
+ cat "${dir_name}/${genome_code}"*"filt.ids" >> "$output_ids"
+ cat "${dir_name}/${genome_code}"*"filt.fas" >> "$output_fas"
+ cat "${dir_name}/${genome_code}"*"filt.gff" >> "$output_gff"
((count_filt++))
else
echo "No 'filt_intersect' or 'filt' file found for genome code: $genome_code"
--
2.43.7