diff --git a/README.md b/README.md
index 9b14b8b..af4d722 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ Below is a summary of the main input parameters.
   --seed SEED           Seed used for subsampling.
   --output OUTPUT       Output directory.
   --overwrite           Overwrite output directory.
+  --append              Append results into pre-existing directories (if running >1)
   --getorganelle        Format seed and gene database for get organelle.
   --email EMAIL         Email for Entrez.
   --api API             API key for NCBI. Optional.
@@ -32,17 +33,18 @@ Below is a summary of the main input parameters.
 
 ### How it works
 
-The user provides a taxonomy as a taxonomic name (e.g. "Arabidopsis thaliana") or NCBI taxonomy ID (e.g. "3702"). However, **it is reccomended to use NCBI taxonomy IDs**, as some unrelated groups share names, for example the name "Drosophila" is used for both a genus of fruit flies and basidiomycete fungi. 
+The user provides a taxonomy as a taxonomic name (e.g. "Arabidopsis thaliana") or NCBI taxonomy ID (e.g. "3702"). However, **it is recommended to use NCBI taxonomy IDs**, as some unrelated groups share names, for example the name "Drosophila" is used for both a genus of fruit flies and basidiomycete fungi.
 
-The script will count the number or target sequences available for the user given taxonomy. If there are not enough sequence available based on the `--min` parameter, the script will count the number of sequences available for the parent rank, until the minimum threshold is reached. 
+The script will count the number or target sequences available for the user given taxonomy. If there are not enough sequence available based on the `--min` parameter, the script will count the number of sequences available for the parent rank, until the minimum threshold is reached.
 
-If the maximum number of sequences is exceeded, based on the `--max` parameter, the script will subsample the children lineages, aiming for an even sampling across children. 
+If the maximum number of sequences is exceeded, based on the `--max` parameter, the script will subsample the children lineages, aiming for an even sampling across children.
 
-Note that the `--target` option `ribosomal` will search for sequences with any of the ribosomal annotations (28S/25S, 18S, 5.8S), whereas `ribosomal_complete` will try to find sequences with all annotations. 
+Note that the `--target` option `ribosomal` will search for sequences with any of the ribosomal annotations (28S/25S, 18S, 5.8S), whereas `ribosomal_complete` will try to find sequences with all annotations.
 
 ### Example usage
 
-Below are simple examples of how to use the script. 
+Below are simple examples of how to use the script.
+
 ```
 # Arabidopsis thaliana chloroplast
 ./go_fetch.py \
diff --git a/go_fetch.py b/go_fetch.py
index 803d7d7..1f4255e 100755
--- a/go_fetch.py
+++ b/go_fetch.py
@@ -22,13 +22,14 @@
 # argparse
 parser = argparse.ArgumentParser(prog = "go_batch.py", description=argparse_description, usage=argparse_usage)
 parser.add_argument("--taxonomy",     help="Taxonomy of rank to search for e.g. \"Arabidopsis\"", type=str, required=True)
-parser.add_argument("--target",       help="Target sequence type.", choices=["chloroplast", "mitochondrion", "ribosomal", "ribosomal_complete"], required=True)
+parser.add_argument("--target",       help="Target sequence type.", choices=["chloroplast", "mitochondrion", "CO1", "ribosomal_any", "ribosomal_complete", "ribosomal_28S"], required=True)
 parser.add_argument("--db",           help="Database to search. Either refseq (NCBI) or genbank (INSDC). Default=refseq.", choices=["refseq", "genbank"], required=False, default="refseq")
 parser.add_argument("--min",          help="Minimum number of target sequences to download.", type=int, required=False)
 parser.add_argument("--max",          help="Maximum number of target sequences to download. Must be larger than --min.", type=int, required=False)
 parser.add_argument("--seed",         help="Seed used for subsampling.", type=int, required=False)
 parser.add_argument("--output",       help="Output directory.", required=False)
 parser.add_argument("--overwrite",    help="Overwrite output directory.", action="store_true", required=False)
+parser.add_argument("--append", help="Append to output directory if it exists.", action="store_true", required=False)
 parser.add_argument("--getorganelle", help="Format seed and gene database for get organelle.", action="store_true", required=False)
 parser.add_argument("--email",        help="Email for Entrez.", required=True)
 parser.add_argument("--api",          help="API for NCBI.", type=str, required=False)
@@ -82,16 +83,27 @@
 
 ### functions
 
-# create dir and overwrite if specified
-def create_dir(dirpath, overwrite):
+# create dir and overwrite or append if specified
+def create_dir(dirpath, overwrite, append):
     if os.path.exists(dirpath):
-        if overwrite == True:
+        if overwrite:
             shutil.rmtree(dirpath)
+            os.mkdir(dirpath)
+            os.mkdir(f"{dirpath}/fasta")
+            os.mkdir(f"{dirpath}/genbank")
+        elif append:
+            # Only create subfolders if they don't exist
+            if not os.path.exists(f"{dirpath}/fasta"):
+                os.mkdir(f"{dirpath}/fasta")
+            if not os.path.exists(f"{dirpath}/genbank"):
+                os.mkdir(f"{dirpath}/genbank")
+            # Do not remove anything
         else:
-            sys.exit(f"{dirpath} already exists. Remove or use --overwrite")
-    os.mkdir(dirpath)
-    os.mkdir(f"{dirpath}/fasta")
-    os.mkdir(f"{dirpath}/genbank")
+            sys.exit(f"{dirpath} already exists. Remove or use --overwrite or --append")
+    else:
+        os.mkdir(dirpath)
+        os.mkdir(f"{dirpath}/fasta")
+        os.mkdir(f"{dirpath}/genbank")
 
 # get taxonomic id from scientific name
 def get_taxonomic_id(taxonomy):
@@ -204,21 +216,25 @@ def print_phylogeny(main_lineage, optional_children = []):
             spacer = "|_" + spacer
         else:
             spacer = "  " + spacer
-    if optional_children is not []:
+    if optional_children != []:
         for c in optional_children:
             print(f"{spacer}{c}")
 
 # generate search term
-# target = "chloroplast", "mitochondrion", "ribosomal", "ribosomal_complete"
+# target = "chloroplast", "mitochondrion", "CO1", "ribosomal", "ribosomal_complete", "ribosomal_28S"
 def search_term(taxid, target, db):
     # add taxid to term
     term = f"{taxid}[Organism]"
     if target == "chloroplast" or target == "mitochondrion":
         term += f" AND {target}[Title] AND complete genome[Title]"
-    if target == "ribosomal":
-        term = f"({taxid}[Organism] AND (28S[Title] OR 25S[Title])) OR ({taxid}[Organism] AND 18S[Title]) OR ({taxid}[Organism] AND 5.8S[Title])"
+    if target == "CO1":
+        term += f" AND (CO1[Gene] OR COX1[Gene]) OR COI[Gene]"
+    if target == "ribosomal_any":
+        term = f"({taxid}[Organism] AND (28S[Gene] OR 25S[Gene])) OR ({taxid}[Organism] AND 18S[Gene]) OR ({taxid}[Organism] AND 5.8S[Gene])"
     if target == "ribosomal_complete":
-        term += f" AND (28S[Title] OR 25S[Title]) AND 18S[Title] AND 5.8S[Title]"
+        term += f" AND (28S[Gene] OR 25S[Gene]) AND 18S[Gene] AND 5.8S[Gene]"
+    if target == "ribosomal_28S":
+        term += f"({taxid}[Organism] AND (28S[Gene])"
     # refseq are derived from genbank but not part of
     if db == "refseq":
         term += f" AND refseq[filter]"
@@ -320,7 +336,7 @@ def recursive_search(taxonomy, lineage, target, db, min_th, max_th, idlist):
             print(f"Maximum threshold not exceeded. Downloading {count_idlist_total} sequences\n")
             
             print("\nCreating output directory")
-            create_dir(args.output, args.overwrite)
+            create_dir(args.output, args.overwrite, args.append)
 
             # efetch
             for i in idlist_combined:
@@ -345,7 +361,7 @@ def recursive_search(taxonomy, lineage, target, db, min_th, max_th, idlist):
                 print(f"Downloading the first {count_idlist_subsample} sequences\n")
 
                 print("Creating output directory")
-                create_dir(args.output, args.overwrite)
+                create_dir(args.output, args.overwrite, args.append)
 
                 # efetch
                 for i in idlist_combined[:count_idlist_subsample]:
@@ -401,7 +417,7 @@ def recursive_search(taxonomy, lineage, target, db, min_th, max_th, idlist):
                 list_subsample
 
                 print("\nCreating output directory")
-                create_dir(args.output, args.overwrite)
+                create_dir(args.output, args.overwrite, args.append)
             
                 list_download = idlist.copy()
                 list_download.extend(list_subsample)
@@ -461,7 +477,7 @@ def format_gene(path, target):
     if target == "mitochondrion" or target == "chloroplast":
         cmd_gar.extend(["-o", f"{path}/annotated_regions", "-t", "CDS", "--mix"])
     else:
-        if target == "ribosomal" or target == "ribosomal_complete":
+        if target == "ribosomal_any" or target == "ribosomal_complete" or target == "ribosomal_28S":
             cmd_gar.extend(["-o", f"{path}/annotated_regions", "-t", "rRNA", "--mix"])
     # subprocess run
     result_gar = subprocess.run(cmd_gar, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
@@ -522,6 +538,4 @@ def format_gene(path, target):
     format_seed(args.output)
     format_gene(args.output, args.target)
 
-print("\ngo_fetch complete!")
-
-
+print("\ngo_fetch complete!")
\ No newline at end of file
diff --git a/go_fetch_original.py b/go_fetch_original.py
new file mode 100755
index 0000000..3e49fba
--- /dev/null
+++ b/go_fetch_original.py
@@ -0,0 +1,526 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import shutil
+import argparse
+from Bio import Entrez, SeqIO
+import time
+import urllib
+import subprocess
+from random import Random
+
+# argparse
+argparse_description = """
+Simple python script to fetch organelle or ribosomal reference sequences from NCBI for a given taxonomy.
+"""
+
+argparse_usage = """
+./go_fetch.py --taxonomy 3702--target chloroplast --db genbank --min 5 --max 10 --output arabidopsis_chloroplast --overwrite --getorganelle --email user_email@example.com
+"""
+
+# argparse
+parser = argparse.ArgumentParser(prog = "go_batch.py", description=argparse_description, usage=argparse_usage)
+parser.add_argument("--taxonomy",     help="Taxonomy of rank to search for e.g. \"Arabidopsis\"", type=str, required=True)
+parser.add_argument("--target",       help="Target sequence type.", choices=["chloroplast", "mitochondrion", "ribosomal", "ribosomal_complete"], required=True)
+parser.add_argument("--db",           help="Database to search. Either refseq (NCBI) or genbank (INSDC). Default=refseq.", choices=["refseq", "genbank"], required=False, default="refseq")
+parser.add_argument("--min",          help="Minimum number of target sequences to download.", type=int, required=False)
+parser.add_argument("--max",          help="Maximum number of target sequences to download. Must be larger than --min.", type=int, required=False)
+parser.add_argument("--seed",         help="Seed used for subsampling.", type=int, required=False)
+parser.add_argument("--output",       help="Output directory.", required=False)
+parser.add_argument("--overwrite",    help="Overwrite output directory.", action="store_true", required=False)
+parser.add_argument("--getorganelle", help="Format seed and gene database for get organelle.", action="store_true", required=False)
+parser.add_argument("--email",        help="Email for Entrez.", required=True)
+parser.add_argument("--api",          help="API for NCBI.", type=str, required=False)
+parser.add_argument("--version",      action="version", version='1.0.0')
+args = parser.parse_args()
+
+### additional checks
+
+# additional dependency checks
+
+try:
+    cmd = 'python -c "from Bio import Entrez, SeqIO"'
+    subprocess.call(cmd.split(" "), stderr=subprocess.DEVNULL)
+except FileNotFoundError:
+    sys.exit("Error: Biopython not in path")
+
+try:
+    subprocess.call(["trf"], stderr=subprocess.DEVNULL)
+except FileNotFoundError:
+    sys.exit("Error: trf not in path")
+
+try:
+    subprocess.call(["get_annotated_regions_from_gb.py"], stdout=subprocess.DEVNULL)
+except FileNotFoundError:
+    sys.exit("Error: get_annotated_regions_from_gb.py not in path")
+
+# additional parameter checks
+
+if args.max <= args.min:
+    sys.exit("Error: --max must be larger than --min")
+
+
+### set email
+print(f"Using email {args.email}")
+Entrez.email = args.email
+
+
+### set api if given
+if args.api != None:
+    print(f"Using API key: {args.api}") 
+    Entrez.api_key = args.api
+
+
+### increase time between and number of tries used by entrez
+
+# increase sleep time between tries
+Entrez.sleep_between_tries = 20
+# max tries 
+Entrez.max_tries = 20
+
+
+### functions
+
+# create dir and overwrite if specified
+def create_dir(dirpath, overwrite):
+    if os.path.exists(dirpath):
+        if overwrite == True:
+            shutil.rmtree(dirpath)
+        else:
+            sys.exit(f"{dirpath} already exists. Remove or use --overwrite")
+    os.mkdir(dirpath)
+    os.mkdir(f"{dirpath}/fasta")
+    os.mkdir(f"{dirpath}/genbank")
+
+# get taxonomic id from scientific name
+def get_taxonomic_id(taxonomy):
+    try:
+        handle = Entrez.esearch(db="Taxonomy", term=f"{taxonomy}[Scientific Name]")
+        record = Entrez.read(handle)
+    except urllib.error.HTTPError as e:
+        if e.code == 400:
+            print("HTTP Error 400: get_taxonomic_id bad request. Retrying in 10 seconds...")
+            time.sleep(10)  # Wait for 10 seconds
+            handle = Entrez.esearch(db="Taxonomy", term=f"{taxonomy}[Scientific Name]")
+            record = Entrez.read(handle)
+        else:
+            sys.exit(f"HTTP Error {e}: get_taxonomic_id bad request. Exiting.")
+    return str(record["IdList"][0])
+assert get_taxonomic_id("Arabidopsis thaliana") == "3702"
+
+# check if taxonomy id exists
+def taxid_exists(taxid):
+    try:
+        handle = Entrez.efetch(db="taxonomy", id=taxid)
+        record = Entrez.read(handle)
+    except urllib.error.HTTPError as e:
+        if e.code == 400:
+            print("HTTP Error 400: taxid_exists bad request. Retrying in 10 seconds...")
+            time.sleep(10)  # Wait for 10 seconds
+            handle = Entrez.efetch(db="taxonomy", id=taxid)
+            record = Entrez.read(handle)
+        else:
+            sys.exit(f"HTTP Error {e}: taxid_exists bad request. Exiting.")
+    if record:
+        return True
+    else:
+        return False
+assert taxid_exists(3701) == True
+
+# get scientific name from taxonomic id
+def get_scientific_name(taxonomy):
+    try:
+        handle = Entrez.efetch(db="Taxonomy", id=taxonomy)
+        record = Entrez.read(handle)
+    except urllib.error.HTTPError as e:
+        if e.code == 400:
+            print("HTTP Error 400: get_scientific_name bad request. Retrying in 10 seconds...")
+            time.sleep(10)  # Wait for 10 seconds
+            handle = Entrez.efetch(db="Taxonomy", id=taxonomy)
+            record = Entrez.read(handle)
+        else: 
+            sys.exit(f"HTTP Error {e}: get_scientific_name. Exiting.")
+    return record[0]["ScientificName"]
+assert get_scientific_name("3702") == "Arabidopsis thaliana"
+
+# check if taxonomy name exists
+def scientific_name_exists(taxonomy):
+    try: 
+        handle = Entrez.esearch(db="Taxonomy", term=f"{taxonomy}[Scientific Name]")
+        record = Entrez.read(handle)
+    except urllib.error.HTTPError as e: 
+        if e.code == 400:
+            print("HTTP Error 400: scientific_name_exists bad request. Retrying in 10 seconds...")
+            time.sleep(10)  # Wait for 10 seconds
+            handle = Entrez.esearch(db="Taxonomy", term=f"{taxonomy}[Scientific Name]")
+            record = Entrez.read(handle)
+        else:
+            sys.exit(f"HTTP Error {e}: scientific_name_exists bad request. Exiting.")
+    if int(record["Count"]) >= 1:
+        return True
+    else:
+        return False
+assert scientific_name_exists("Arabidopsis") == True
+
+# get lineage from taxid
+def get_lineage(taxid):
+    try:
+        # efetch
+        handle = Entrez.efetch(db="Taxonomy", id=taxid, retmode="xml")
+        record = Entrez.read(handle)
+    except urllib.error.HTTPError as e:
+        if e.code == 400:
+            print("HTTP Error 400: get_lineage bad request. Retrying in 10 seconds...")
+            time.sleep(10)  # Wait for 10 seconds
+            handle = Entrez.efetch(db="Taxonomy", id=taxid, retmode="xml")
+            record = Entrez.read(handle)
+        else:
+            sys.exit(f"HTTP Error {e}: get_lineage bad request. Exiting.")
+    # get lineage
+    lineage = record[0]["Lineage"].split("; ")[::-1]
+    # return lineage
+    return lineage
+
+#assert get_lineage(3701) == ["Camelineae", "Brassicaceae", "Brassicales", "malvids", "rosids", "Pentapetalae", "Gunneridae", "eudicotyledons", "Mesangiospermae", "Magnoliopsida", "Spermatophyta", "Euphyllophyta", "Tracheophyta", "Embryophyta", "Streptophytina", "Streptophyta", "Viridiplantae", "Eukaryota", "cellular organisms"]
+
+def get_children(taxonomy):
+    handle = Entrez.esearch(db="taxonomy", term=f"{taxonomy}[next level]", retmax=9999)
+    record = Entrez.read(handle)
+    children = []
+    print(f"Found {len(record['IdList'])} children")
+    if len(record['IdList']) > 10: 
+        print("This might take some time :)")
+    for i in record["IdList"]:
+        children.append(get_scientific_name(i))
+    return(children)
+#assert get_children("Arabidopsis lyrata") == ["Arabidopsis petraea subsp. umbrosa", "Arabidopsis petraea subsp. septentrionalis", "Arabidopsis lyrata subsp. lyrata", "Arabidopsis lyrata subsp. petraea"]
+
+def print_phylogeny(main_lineage, optional_children = []):
+    spacer = ""
+    for l in main_lineage:
+        print(f"{spacer}{l}")
+        if spacer == "":
+            spacer = "|_" + spacer
+        else:
+            spacer = "  " + spacer
+    if optional_children is not []:
+        for c in optional_children:
+            print(f"{spacer}{c}")
+
+# generate search term
+# target = "chloroplast", "mitochondrion", "ribosomal", "ribosomal_complete"
+def search_term(taxid, target, db):
+    # add taxid to term
+    term = f"{taxid}[Organism]"
+    if target == "chloroplast" or target == "mitochondrion":
+        term += f" AND {target}[Title] AND complete genome[Title]"
+    if target == "ribosomal":
+        term = f"({taxid}[Organism] AND (28S[Title] OR 25S[Title])) OR ({taxid}[Organism] AND 18S[Title]) OR ({taxid}[Organism] AND 5.8S[Title])"
+    if target == "ribosomal_complete":
+        term += f" AND (28S[Title] OR 25S[Title]) AND 18S[Title] AND 5.8S[Title]"
+    # refseq are derived from genbank but not part of
+    if db == "refseq":
+        term += f" AND refseq[filter]"
+    # genbank is part of the International Nucleotide Sequence Database Collaboration (INSDC) along with the European Nucleotide Archive and the DNA Data Bank of Japan (DDBJ)
+    if db == "genbank":
+        term += f" AND ddbj_embl_genbank[filter]"
+    return term 
+
+# count the number of sequences on ncbi using the term generated
+def entrez_esearch(input_term):
+    # esearch
+    handle = Entrez.esearch(db="Nucleotide", term=input_term, retmax=999)
+    record = Entrez.read(handle)
+    return record["IdList"]
+
+# efetch 
+def entrez_efetch(id, format, output_directory):
+    handle = Entrez.efetch(db="Nucleotide", id=id, rettype=format, retmode="text")
+    seq_record = SeqIO.read(handle, format)
+    output_path = f"{output_directory}/{seq_record.id}.{format}"
+    print(f"   Downloading {seq_record.id}.{format}")
+    SeqIO.write(seq_record, output_path, format)
+
+# subsample a dictionary of taxa (keys), and lists of accessions (values)
+def subsample(input_dictionary, sample_limit, seed):
+    # create copy of input_dictionary
+    input_dictionary_copy = {key: value.copy() for key, value in input_dictionary.items()}
+    # create ouput to populate with counts
+    output_dictionary = {k: [] for k in input_dictionary_copy.keys()}
+    # use a single random instance with proper seeding
+    random_instance = Random(seed)
+    while sample_limit > 0:
+        # shuffle keys to ensure randomness
+        keys = list(input_dictionary_copy.keys())
+        random_instance.shuffle(keys)
+        # iterate through keys
+        for key in keys:
+            if input_dictionary_copy[key]:
+                value = input_dictionary_copy[key].pop(0)
+                output_dictionary[key].append(value)
+                sample_limit -= 1
+                if sample_limit == 0:
+                    break
+    return output_dictionary
+
+# main recursive search function
+def recursive_search(taxonomy, lineage, target, db, min_th, max_th, idlist):
+
+    # define search term
+    term = search_term(taxonomy, target, db)
+    print(f"Using search term = {term}")
+
+    # count the number of sequences in the input idlist
+    count_idlist_input = len(idlist)
+
+    # esearch using term and return idlist of matching accessions
+    idlist_esearch = entrez_esearch(term)
+
+    # count the number of sequences in the idlist
+    count_idlist_esearch = len(idlist_esearch)
+
+    # get idlist for new sequences
+    idlist_new = []
+    for i in idlist_esearch:
+        if i not in idlist:
+            idlist_new.append(i)
+
+    # count new sequences
+    count_idlist_new = len(idlist_new)
+
+    # combine input and new idlist
+    idlist_combined = idlist.copy()
+    idlist_combined.extend(idlist_new)
+
+    # count running total
+    count_idlist_total = len(idlist_combined)
+
+    # print counts
+    print(f"Sequences input: {count_idlist_input}")
+    print(f"Sequences found: {count_idlist_esearch}")
+    print(f"Sequences new:   {count_idlist_new}")
+    print(f"Sequences total: {count_idlist_total}")
+
+    # does the number of sequences meet the minimum threshold
+    if count_idlist_total < min_th:
+        print("Minimum threshold not reached, moving up within lineage")
+        try: 
+            taxonomy = lineage[lineage.index(taxonomy)+1]
+            print(f"Next level is {taxonomy}\n")
+            return recursive_search(taxonomy, lineage, target, db, min_th, max_th, idlist_combined)
+        except IndexError:
+            sys.exit("Error: cannot search any higher within lineage")
+    else:
+
+        print("Minimum threhold reached")
+
+        # if maximum not exceeded, download all
+        if count_idlist_total <= max_th:
+            print(f"Maximum threshold not exceeded. Downloading {count_idlist_total} sequences\n")
+            
+            print("\nCreating output directory")
+            create_dir(args.output, args.overwrite)
+
+            # efetch
+            for i in idlist_combined:
+                entrez_efetch(i, "fasta", f"{args.output}/fasta")
+                entrez_efetch(i, "gb",    f"{args.output}/genbank")
+
+
+        # if maximum exceeded, download subsample
+        else:
+            # get subsample number required
+            count_idlist_subsample = max_th - count_idlist_input
+
+            print(f"Maximum threshold exceeded. Subsampling {count_idlist_subsample} sequences from children\n")
+
+            # get children
+            children = get_children(taxonomy)
+            
+            if len(children) == 0: 
+                
+                print("No children lineages. Must be a terminal rank. i.e species or subspecies\n")
+                
+                print(f"Downloading the first {count_idlist_subsample} sequences\n")
+
+                print("Creating output directory")
+                create_dir(args.output, args.overwrite)
+
+                # efetch
+                for i in idlist_combined[:count_idlist_subsample]:
+                    entrez_efetch(i, "fasta", f"{args.output}/fasta")
+                    entrez_efetch(i, "gb",    f"{args.output}/genbank")
+                
+            else:
+
+                print("All children identified:\n")
+ 
+                # print phylogeny
+                print_phylogeny([taxonomy], children)
+
+                # create dictionary of sequence ids from children
+                dictionary_children = {}
+
+                # iterate through children
+                for c in children:
+
+                    if c != "environmental samples":
+
+                        # define search term
+                        term = search_term(c, target, db)
+
+                        # esearch using term and return idlist of matching accessions
+                        esearch_idlist = entrez_esearch(term)
+
+                        if len(esearch_idlist) != 0:
+                            for i in esearch_idlist:
+                                if i not in idlist:
+                                    # print(i)
+                                    if dictionary_children.get(c) is None:
+                                        dictionary_children[c] = [i]
+                                    else:
+                                        dictionary_children[c].append(i)
+
+                # subsample dictionary
+                dictionary_children_subsample = subsample(dictionary_children, count_idlist_subsample, args.seed)
+
+                # print how many sequences found across children
+                print("\nIdentified sequences:")
+                for key, value, in dictionary_children.items():
+                    print(f"   {len(value)}  {key}")
+
+                # print how many sequences subsampled across children
+                print("\nSampled sequences:")
+                list_subsample = []
+                for key, value, in dictionary_children_subsample.items():
+                    print(f"   {len(value)}  {key}")
+                    for v in value:
+                        list_subsample.append(v)
+
+                list_subsample
+
+                print("\nCreating output directory")
+                create_dir(args.output, args.overwrite)
+            
+                list_download = idlist.copy()
+                list_download.extend(list_subsample)
+
+                print(f"\nDownloading {len(list_download)} sequences")
+                for i in list_download:
+                    entrez_efetch(i, "fasta", f"{args.output}/fasta")
+                    entrez_efetch(i, "gb",    f"{args.output}/genbank")
+
+
+# cat files
+def cat_files(input_dir, file_ending, output_file):
+    # cmd to concatenate files
+    cmd_cat = ["cat"]
+    for f in os.listdir(input_dir):
+        if f.endswith(file_ending):
+             cmd_cat.append(f"{input_dir}/{f}")
+    # subprocess run
+    result_cat = subprocess.run(cmd_cat, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    # capture stdout to text file
+    with open(output_file, "w") as log:
+        log.write(f"{result_cat.stdout}\n")
+
+# format seed
+def format_seed(path):
+    print("\nFormating seed database")
+    print("   Running trf")
+    # get pwd
+    pwd = os.getcwd()
+    # cd to dir containing fasta files
+    os.chdir(f"{path}/fasta")
+    # iterate through fasta files
+    for fasta in os.listdir():
+        if fasta.endswith(".fasta"):
+            # define cmd
+            cmd_trf = f"trf {fasta} 2 7 7 80 10 50 500 -f -d -m -h"
+            print(f"   {cmd_trf}")
+            # subprocess run
+            result_trf = subprocess.run(cmd_trf.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+            # capture stdout and stderr to text file
+            with open(f"{fasta}.log", "w") as log:
+                log.write(f"stdout:\n{result_trf.stdout}\n")
+                log.write(f"stderr:\n{result_trf.stderr}\n")
+    # change back to orginal dir
+    os.chdir(pwd)
+    # cat masked fastas
+    cat_files(f"{path}/fasta", ".mask",  f"{path}/seed.fasta")
+   
+# format gene
+def format_gene(path, target):
+    print("\nFormating gene database")
+    print("    Running get_annotated_regions_from_gb.py")
+    cmd_gar = ["get_annotated_regions_from_gb.py"]
+    for gb in os.listdir(f"{path}/genbank/"):
+        if gb.endswith(".gb"):
+            cmd_gar.append(f"{path}/genbank/{gb}")
+    if target == "mitochondrion" or target == "chloroplast":
+        cmd_gar.extend(["-o", f"{path}/annotated_regions", "-t", "CDS", "--mix"])
+    else:
+        if target == "ribosomal" or target == "ribosomal_complete":
+            cmd_gar.extend(["-o", f"{path}/annotated_regions", "-t", "rRNA", "--mix"])
+    # subprocess run
+    result_gar = subprocess.run(cmd_gar, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    # capture stdout and stderr to text file
+    with open(f"{path}/annotated_regions/get_annotated_regions_from_gb.log", "w") as log:
+        log.write(f"stdout:\n{result_gar.stdout}\n")
+        log.write(f"stderr:\n{result_gar.stderr}\n")
+    # cp gene file to sample dir
+    subprocess.run(["cp", f"{path}/annotated_regions/gene/gene.fasta", f"{path}/"])
+
+
+### main
+
+# get and check ncbi taxonomy_id and scientific_name
+print("Running go_fetch")
+print("Checking user input\n")
+try:
+    int(args.taxonomy)
+    print("Taxonomy input is a numeric NCBI ID.")
+    # check if taxonomy id exists
+    if not taxid_exists(args.taxonomy):
+        sys.exit(f"Taxonomy ID {args.taxonomy} does not exist. Please check for the correct taxonomy id on https://www.ncbi.nlm.nih.gov/taxonomy")
+    else:
+        print("Taxonomy ID exists on NCBI")
+    taxonomy_id = args.taxonomy
+    taxonomy_name = get_scientific_name(taxonomy_id)
+except:
+    print("Taxonomy input is a scientific name.")
+    if not scientific_name_exists(args.taxonomy):
+        sys.exit(f"Scientific name {args.taxonomy} does not exist. Please check for the correct taxonomy id on https://www.ncbi.nlm.nih.gov/taxonomy")
+    taxonomy_name = args.taxonomy
+    taxonomy_id = get_taxonomic_id(taxonomy_name)
+
+print(f"NCBI Id: {taxonomy_id}")
+print(f"Scientific name: {taxonomy_name}")
+
+# get lineage
+print("\nChecking lineage\n")
+
+lineage = get_lineage(taxonomy_id)
+lineage.insert(0, taxonomy_name)
+
+# get children
+# children = get_children(taxonomy_name)
+# only request child lineages if needed to reduce entrez searches
+
+# print phylogeny
+# print_phylogeny(lineage[::-1], optional_children = children)
+print_phylogeny(lineage[::-1])
+
+print("\nStarting search\n")
+
+# start recursive search function                
+recursive_search(taxonomy_name, lineage, args.target, args.db, args.min, args.max, [])
+
+# format gene and seed database if requested
+if args.getorganelle:
+    format_seed(args.output)
+    format_gene(args.output, args.target)
+
+print("\ngo_fetch complete!")
+