From acab990846295ac1d8dc77eb60ebffd9c9c82add Mon Sep 17 00:00:00 2001
From: Hannes Reinwald <hannes.reinwald@bayer.com>
Date: Fri, 13 Jun 2025 14:07:13 +0200
Subject: [PATCH 1/6] Added small stx_tax_groups function to retrieve all
 defined taxonomic groups from the taxa table.

---
 R/standartox.R | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/R/standartox.R b/R/standartox.R
index fea1f76..e8c6fd6 100644
--- a/R/standartox.R
+++ b/R/standartox.R
@@ -330,6 +330,24 @@ stx_taxa = function(silent = TRUE, ...) {
   return(result)
 }
 
+#' Retrieve taxa groups
+#' 
+#' Retrieve a character vector of all tax_groups defined in Standartox.
+#' 
+#' @return Returns a character vector of all tax_groups defined in Standartox.
+#' @author Hannes Reinwald \email{hannes.reinwald@@bayer.com}
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' stx_tax_groups()
+#' }
+#' 
+#' @export
+stx_tax_groups = function(...){
+  taxa = na.omit( unique(stx_taxa(...)$tax_group) )
+  taxa = unique( unlist( strsplit(taxa, "\\s*\\|\\|\\s*") ) )
+  return(sort(taxa))
+}
 
 # IDEA
 # microbenchmark::microbenchmark({

From a9e9cdda6ac7520f72f09cb677e57760649c8be8 Mon Sep 17 00:00:00 2001
From: Hannes Reinwald <hannes.reinwald@bayer.com>
Date: Mon, 16 Jun 2025 12:44:02 +0200
Subject: [PATCH 2/6] Added stx_query() function.

---
 R/standartox.R | 292 +++++++++++++++++++++++++++++++------------------
 1 file changed, 183 insertions(+), 109 deletions(-)

diff --git a/R/standartox.R b/R/standartox.R
index e8c6fd6..cac2083 100644
--- a/R/standartox.R
+++ b/R/standartox.R
@@ -149,119 +149,193 @@ stx_catalog = function(silent = TRUE, ...) {
 #' }
 #' 
 #' @export
-stx_query = function(cas = NULL,
-                     concentration_unit = NULL,
-                     concentration_type = NULL,
-                     duration = NULL,
-                     endpoint = c('XX50', 'NOEX', 'LOEX'),
-                     effect = NULL,
-                     exposure = NULL,
-                     chemical_role = NULL,
-                     chemical_class = NULL,
-                     taxa = NULL,
-                     ecotox_grp = NULL,
-                     trophic_lvl = NULL,
-                     habitat = NULL,
-                     region = NULL,
-                     vers = NULL,
-                     ...) {
-  # to avoid NOTE in R CMD check --as-cran
-  casnr = outlier = concentration = cname = NULL
-  # debuging
-  # browser() # debuging
-  # cas = '1071-83-6'; concentration_unit = NULL; concentration_type = NULL; duration = NULL; endpoint = 'XX50'; effect = NULL; exposure = NULL; chemical_role = NULL; chemical_class = NULL; taxa = NULL; ecotox_grp = NULL; trophic_lvl = NULL; habitat = NULL; region = NULL; vers = NULL
-  # checks
-  endpoint = match.arg(endpoint)
-  # request
-  body = list(casnr = cas,
-              concentration_unit = concentration_unit,
-              concentration_type = concentration_type,
-              duration = duration,
-              endpoint = endpoint,
-              effect = effect,
-              exposure = exposure,
-              chemical_role = chemical_role,
-              chemical_class = chemical_class,
-              taxa = taxa,
-              ecotox_grp = ecotox_grp,
-              trophic_lvl = trophic_lvl,
-              habitat = habitat,
-              region = region,
-              vers = vers)
-  # POST
-  stx_message(body)
-  res = try(httr::POST(
-    file.path(domain(), 'filter'),
-    body = body,
-    encode = 'json',
-  ), silent = TRUE)
-  stx_availability(res)
-  if (res$status_code == 400) {
-    warning(jsonlite::fromJSON(httr::content(res, type = 'text', encoding = 'UTF-8')))
-    out_fil = data.table(NA)
-    filtered = data.table(NA)
-    out_agg = data.table(NA)
+#' 
+stx_query_import = function(silent = TRUE, 
+                            data_type = c('test_fin.fst','phch.fst','taxa.fst','refs.fst'),
+                            ...) {
+  if (!silent) message('Retrieving Standartox listed Chemicals ...')
+  if (silent) {
+    result = suppressMessages( stx_download(data_type = data_type, ...) )
+  } else {
+    result = stx_download(data_type = data_type, ...)
   }
-  if (res$status_code == 200) {
-    out_fil = read_bin_vec(res$content, type = 'fst')
-    if (nrow(out_fil) == 0) {
-      warning('No results found.')
-      out_fil = data.table(NA)
-      filtered = data.table(NA)
-      out_agg = data.table(NA)
-    } else {
-      # CAS column (not sent through API to reduce size)
-      out_fil[ , cas := cas_conv(casnr) ][ , casnr := NULL ]
-      # outliers
-      para = c('cas',
-               'concentration_unit',
-               'concentration_type',
-               'duration',
-               'tax_taxon',
-               'effect',
-               'endpoint',
-               'exposure')
-      out_fil[ ,
-               outlier := flag_outliers(concentration),
-               by = para ]
-      # colorder
-      nam = names(out_fil)
-      col_order = c('cname', 'cas', 'iupac_name', 'inchikey', 'inchi', 'molecularweight',
-                    'result_id', 'endpoint', 'effect', 'exposure', 'trophic_lvl', 'ecotox_grp', 'concentration_type',
-                    'concentration', 'concentration_unit', 'concentration_orig', 'concentration_unit_orig',
-                    'duration', 'duration_unit', 'outlier',
-                    c('species_number', grep('tax_|hab_|reg_', nam, value = TRUE)),
-                    grep('cro_|ccl_', nam, value = TRUE),
-                    grep('ref', nam, value = TRUE))
-      setcolorder(out_fil, col_order)
-      # order
-      setorder(out_fil, cname)
-      # id
-      col_id = c('cname', 'cas', 'inchikey', 'inchi', 'result_id', 'species_number', 'ref_number')
-      id = out_fil[ , .SD, .SDcols = col_id ]
-      # short
-      col_short = c('cname', 'cas', 'inchikey',
-                    'endpoint', 'effect', 'exposure', 'trophic_lvl', 'ecotox_grp', 'concentration_type',
-                    'concentration', 'concentration_unit', 'concentration_orig', 'concentration_unit_orig',
-                    'duration', 'duration_unit', 'outlier',
-                    grep('tax_', nam, value = TRUE))
-      filtered = out_fil[ , .SD, .SDcols = col_short ]
-      # aggregate
-      out_agg = suppressWarnings(
-        stx_aggregate(out_fil)
-      )
+  # Convert data frame to data table
+  result = lapply( result, function(dt) data.table::setDT(dt) )
+  return(result)
+}
+
+filter_dt = function(dt, var_ls){
+  stopifnot(is.data.frame(dt))
+  stopifnot(is.list(var_ls))
+  data.table::setDT(dt) # Ensure dt is a data.table
+  
+  var_check = !unlist( lapply(var_ls, is.null) )
+  var_check = names( which(var_check) ) # select only those variables that are not NULL
+  
+  # Loop through the variables and filter the taxa.fst data frame 
+  if (length(var_check) > 0) {
+    dt.out = list()
+    
+    for(var in var_check){
+      message("Filtering for variable: ", var)
+      if (var %in% colnames(dt)) { # Check if the variable exists as a column in data table
+        if( !is.null(var_ls[[var]]) ){
+          # Filter the data table based on the specified column and the values in the variable
+          filter_val = var_ls[[var]]
+          dt.out[[var]] <- dt[ dt[[var]] %in% filter_val ]
+          if(nrow(dt.out[[var]]) == 0){ warning( paste("No query matches found for:\t", var) ) } 
+        }
+      } else {
+        warning(paste("Variable", var, "not found in data table. Skipping filter."))
+      }
     }
-  }
-  # meta
-  out_meta = stx_meta()
-  # return
-  list(filtered = filtered,
-       filtered_all = rm_col_na(out_fil),
-       # TODO id = id,
-       aggregated = out_agg,
-       meta = out_meta)
+    
+    # Combine results
+    dt.out = distinct( data.table::rbindlist(dt.out) )
+    # Check 
+    if( nrow(dt.out) == 0 ) {
+      warning("No query results for the provided ",paste(var_check, collapse = " & "),
+              ". Please check the input values.")
+      return(NULL) }
+    return(dt.out)
+  } else { dt }
 }
 
+stx_query = function(
+  ## COMPOUND FILTERING ##
+  cas_number = NULL,
+  ## BASIC TOX DATA FILTERING ##
+  endpoint_group = c('XX50', 'NOEX', 'LOEX'),
+  exposure = NULL,           # character vector
+  effect = NULL,             # character vector
+  duration = c(0, Inf),      # numeric vector 
+  duration_unit = "h",       # character vector; set to NULL if you want to keep all results!
+  concentration_unit = NULL, # character vector
+  concentration_type = NULL, # character vector
+  ## TAXA FILTERING ##
+  tax_columns = c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family'), # Taxonomy columns to append to the query results. DEFAULT: c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family')
+  tax_genus  = NULL, # character vector
+  tax_family = NULL, # character vector
+  tax_order  = NULL, # character vector
+  tax_class  = NULL, # character vector
+  ecotox_grp = NULL, # character vector
+  ## REFERENCE SECTION ##
+  include_reference = FALSE, # Default FALSE
+  ...){ 
+  # Import stxDb 
+  message("Reading in Standartox Data ...")
+  stx_table = c('test_fin.fst','phch.fst','taxa.fst') # 
+  if(include_reference) { stx_table = unique(c(stx_table,'refs.fst')) }
+  stxDb = stx_query_import(data_type = stx_table, ...)
+  # stxDb  = stx_query_import(data_type = stx_table)
+  tox.dt = stxDb$test_fin.fst # final output object. LARGE right after import!
+  stxDb  = stxDb[stx_table[-1]] # dump the largest object!
+  
+  # First quick filter steps:
+  # Remove rows where the specified columns contain "NR" <- NA values
+  message("Removing 'NA' values ...")
+  tox.dt <- tox.dt[!grepl("NR", endpoint) & !grepl("NR", duration_unit) ]
+  # Quick fix for endpoint values
+  tox.dt[, endpoint := sub("[/*]+$","", endpoint)]
+  if(!is.null(endpoint_group)){
+    tmp_var = endpoint_group # quick fix
+    tox.dt = tox.dt[endpoint_group %in% tmp_var]
+  }
+  if(!is.null(duration_unit)){
+    tmp_var = duration_unit # quick fix
+    tox.dt = tox.dt[duration_unit %in% tmp_var]
+  }
+  if( nrow(tox.dt) == 0 ) {
+    warning("No query matches found for the provided endpoint_group or duration_unit. Please check the input values.")
+  }
+  
+  # Step 1: Filter for cas_number then merge with toxdata
+  message("Appending chemical information ...")
+  ## Filter chem data for cas_number
+  if(!is.null(cas_number)){
+    stxDb$phch.fst <- stxDb$phch.fst[cas %in% cas_number]
+    if( nrow(stxDb$phch.fst) == 0 ) {
+      warning("No query matches found for the provided CAS numbers. Please check the input values.")
+    }
+  }
+  ## Merge with tox data by cl_id
+  merge(stxDb$phch.fst, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt
+  tox.dt[, c("chem_class","casnr", "cl_id") := NULL] # don't need the cl_id column anymore.
+  
+  
+  # Step 2: Filter for taxonomic groups then merge with toxdata
+  message("Appending taxonomic information ...")
+  ## Filtering ecotox_grp ##
+  if(!is.null(ecotox_grp)){
+    regstr = paste(ecotox_grp, collapse = "|")
+    stxDb$taxa.fst = stxDb$taxa.fst[ grepl(regstr, stxDb$taxa.fst$tax_group) ]
+    if( nrow( stxDb$taxa.fst ) == 0 ) {
+      warning("No query matches found for the provided tax_group. Please check the input values.")
+      return(NULL)
+    }
+  }
+  ## Filtering tax_columns ##
+  # Specify taxonomy columns for which tax filtering can be applied
+  var_ls = list(
+    tax_class  = tax_class,
+    tax_order  = tax_order, 
+    tax_family = tax_family,
+    tax_genus  = tax_genus
+  )
+  tax.out = filter_dt( stxDb$taxa.fst, var_ls)
+  if( is.null(tax.out) ) { return(NULL) } # Check 
+  
+  # Select pre-defined columns for output
+  tax_key = "tl_id"
+  tax.out = tax.out[, c(tax_key, tax_columns), with = FALSE]
+  # Merge taxonomy data with tox data by tl_id
+  merge(tax.out, tox.dt, all.x = TRUE, by = tax_key) -> tox.dt
+  tox.dt[, (tax_key) := NULL] # drop columns here 
+  
+  
+  # Step 3: Final Tox data filtering 
+  # rmv any rows with NA in result_id
+  tox.dt = tox.dt[!is.na(result_id)]
+  # Filter for the selected columns 
+  var_ls = list(
+    concentration_unit  = concentration_unit,
+    concentration_type  = concentration_type, 
+    effect = effect,
+    exposure  = exposure
+  )
+  tox.dt = filter_dt( tox.dt, var_ls )
+  if( is.null(tox.dt) ) { return(NULL) } # Check 
+  
+  # Filter for duration
+  lower_bound = min(duration)
+  upper_bound = max(duration)
+  tox.dt = tox.dt[duration >= lower_bound & duration <= upper_bound]
+  # Check
+  if( nrow( tox.dt ) == 0 ) {
+    warning("No query matches found. Please check the input filter values.")
+    return(NULL)
+  }
+  
+  # Step 4: Append references if wanted 
+  if(include_reference){
+    message("Appending reference information ...")
+    tox.dt = merge(tox.dt, stxDb$refs.fst, all.x = TRUE, by = "ref_number")
+    tox.dt[, c("ref_number") := NULL]
+  } 
+  
+  # Step 5: Final Cleanup 
+  # Replace all occurrences of "NR" with NA
+  tox.dt = tox.dt[, lapply(.SD, function(x) {
+    if (is.character(x)) {
+      x[x == "NR"] <- NA  # Replace "NR" with NA for character columns
+    }
+    return(x)  # Return the modified column
+  })]
+  # Filter out the "result_id" column
+  message("Done!\n")
+  return( tox.dt[, c("result_id") := NULL] )
+} 
+
 
 #' Retrieve chemical data
 #' 

From 9d1998b34a5b5643a9ee9e9e055a1d61fd3fedc4 Mon Sep 17 00:00:00 2001
From: Hannes Reinwald <hannes.reinwald@bayer.com>
Date: Mon, 16 Jun 2025 19:21:01 +0200
Subject: [PATCH 3/6] Added some example code here. Does not need to be merged
 to Andrea's the main branch.

---
 R/example_code.R |  54 ++++++
 R/standartox.R   | 464 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 518 insertions(+)
 create mode 100644 R/example_code.R
 create mode 100644 R/standartox.R

diff --git a/R/example_code.R b/R/example_code.R
new file mode 100644
index 0000000..1506798
--- /dev/null
+++ b/R/example_code.R
@@ -0,0 +1,54 @@
+# filter_dt -----------------------------------
+taxa.dt = stx_taxa()
+colnames(taxa.dt) # inspect column names to filter for
+
+# Specify taxonomy columns for which tax filtering can be applied.
+# Make sure that list names match the column names in your data table!
+var_ls = list(
+  family = 'Cyprinidae',
+  genus  = c('Daphnia','Ceriodaphnia')
+)
+
+tax.out = filter_dt( taxa.dt, var_ls, silent = FALSE)
+View(tax.out) # inspect the output
+
+
+# stx_query ---------------------------------
+# Note that by default stx_query filters for EC50, NOEC and LOEC values and for duration_unit = "h" (hours).
+# if you need something else specify in filter!
+
+# Example 1) - basix stx query
+tox.dt1 = stx_query(verbose = T) # will return results filtered for duration_unit = "h" (hours)
+
+# to keep all duration values, set duration_unit = NULL, default is "h"
+tox.dt2 = stx_query(duration_unit = NULL, verbose = T) # keep NA values )
+
+# To get everything!
+tox.dt3 = stx_query( endpoint_group = NULL,
+                     duration_unit = NULL, 
+                     verbose = T, rm_na = F) # remove NA values
+
+# NOTE: stx_data() will deliver ALL available results without any pre-filtering!
+dt = stx_data()
+
+# Example 2)
+tox.dt = stx_query(
+  verbose = T, 
+  endpoint_group = "XX50",
+  duration = c(12, 96),
+  concentration_unit = "g/l", 
+  tax_genus  = c("Danio","Pimephales") 
+  #tax_family = c("Cyprinidae","Salmonidae")
+  #tax_group  = c("fish","algae") 
+)
+
+# Example 3)
+tox.dt = stx_query(
+  verbose = T,
+  cas_number = c("482-89-3", "50-00-0","1071-83-6"), 
+  #endpoint_group = "XX50",
+  duration = c(12, 96),
+  concentration_unit = "g/l", 
+  tax_genus = c("Danio","Daphnia"),
+  include_reference = TRUE
+)
diff --git a/R/standartox.R b/R/standartox.R
new file mode 100644
index 0000000..a0341e8
--- /dev/null
+++ b/R/standartox.R
@@ -0,0 +1,464 @@
+#' Download Standartox Data Tables from Zenodo.org
+#' 
+#' Downloads the Standartox data tables from Zenodo.org and reads them into R. Specific data_types can be specified.
+#' 
+#' @return Returns a list of data.tables containing the downloaded data. 
+#'
+#' @param data_type character; Specify the type of data to download. Can be one of NULL (default, downloads and imports all), "meta.fst", "phch.fst", "refs.fst", "test_fin.fst", "taxa.fst", etc.
+#' @param dir_out character; Directory to which the downloaded files should be saved. Default is a temporary directory.
+#' 
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' @author Hannes Reinwald
+#' 
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' stxDb = stx_download()
+#' names(stxDb) # files downloaded from zenodo.org
+#' }
+#' @noRd
+#' 
+stx_download = function(data_type, dir_out = file.path(tempdir(), "standartox")) {
+  # Check
+  data_type = match.arg(
+    data_type,
+    c("meta", "phch", "refs", "test_fin", "taxa", "catalog"),
+    several.ok = TRUE
+  )
+  
+  # Output directory
+  dir.create(dir_out, showWarnings = FALSE, recursive = TRUE)
+  
+  # Find download URLS
+  # HACK this has to be done, because doi.org is the only permanent link between versions
+  qurl_permanent = 'https://doi.org/10.5281/zenodo.3785030'
+  req = httr::GET(qurl_permanent)
+  cont = httr::content(req, as = 'text')
+  
+  # Extract all zenodo download links from the content
+  qurl = unique(stringr::str_extract_all(cont, 'https://zenodo.org/records/[0-9]+/files/.+')[[1]])
+  qurl = grep("[.]rds|[.]fst", qurl, value = TRUE) # filter for .rsd and .fst files only! 
+  qurl = sub('\">', '', qurl, fixed = TRUE)
+  qurl = grep(qurl, pattern = paste0(data_type, collapse = '|'), value = TRUE) # filter for data_type
+  
+  # For each link in qurl check if destination file exists and if not download it.
+  l = list()
+  for(URL in qurl){
+    fl = basename(URL)
+    fl_name = sub('.rds|.fst', '', fl) # remove file extension for list name
+
+    # Define destination file path
+    destfile = file.path(dir_out, fl)
+    
+    # Check if the file already exists
+    if ( !file.exists(destfile) ) {
+      curl::curl_download(url = URL,
+                          destfile = destfile,
+                          quiet = TRUE)
+    }
+    
+    # Read in the downloaded files based on their extension 
+    sfx = sub("^.+[.]", "", fl)
+    if( sfx == "fst" ) { 
+      out = fst::read_fst(destfile, as.data.table = TRUE)
+    } else if (sfx == "rds") { 
+      out = readRDS(destfile)
+    } else { 
+      stop("Unknown file format: ", sfx, "Expecting .fst or .rds files.")
+    }
+  }
+  
+  return(out)
+}
+
+
+
+stx_query_import = function(silent = TRUE, 
+                            data_type = c('test_fin.fst','phch.fst','taxa.fst','refs.fst'),
+                            ...) {
+  if (!silent) message('Retrieving Standartox listed Chemicals ...')
+  if (silent) {
+    result = suppressMessages( stx_download(data_type = data_type, ...) )
+  } else {
+    result = stx_download(data_type = data_type, ...)
+  }
+  # Convert data frame to data table
+  result = lapply( result, function(dt) data.table::setDT(dt) )
+  return(result)
+}
+
+
+
+filter_dt = function(dt, var_ls){
+  stopifnot(is.data.frame(dt))
+  stopifnot(is.list(var_ls))
+  data.table::setDT(dt) # Ensure dt is a data.table
+  
+  var_check = !unlist( lapply(var_ls, is.null) )
+  var_check = names( which(var_check) ) # select only those variables that are not NULL
+  
+  # Loop through the variables and filter the taxa.fst data frame 
+  if (length(var_check) > 0) {
+    dt.out = list()
+    
+    for(var in var_check){
+      message("Filtering for variable: ", var)
+      if (var %in% colnames(dt)) { # Check if the variable exists as a column in data table
+        if( !is.null(var_ls[[var]]) ){
+          # Filter the data table based on the specified column and the values in the variable
+          filter_val = var_ls[[var]]
+          dt.out[[var]] <- dt[ dt[[var]] %in% filter_val ]
+          if(nrow(dt.out[[var]]) == 0){ warning( paste("No query matches found for:\t", var) ) } 
+        }
+      } else {
+        warning(paste("Variable", var, "not found in data table. Skipping filter."))
+      }
+    }
+    
+    # Combine results
+    dt.out = distinct( data.table::rbindlist(dt.out) )
+    # Check 
+    if( nrow(dt.out) == 0 ) {
+      warning("No query results for the provided ",paste(var_check, collapse = " & "),
+              ". Please check the input values.")
+      return(NULL) }
+    return(dt.out)
+  } else { dt }
+}
+
+
+stx_query = function(
+  ## COMPOUND FILTERING ##
+  cas_number = NULL,
+  ## BASIC TOX DATA FILTERING ##
+  endpoint_group = c('XX50', 'NOEX', 'LOEX'),
+  exposure = NULL,           # character vector
+  effect = NULL,             # character vector
+  duration = c(0, Inf),      # numeric vector 
+  duration_unit = "h",       # character vector; set to NULL if you want to keep all results!
+  concentration_unit = NULL, # character vector
+  concentration_type = NULL, # character vector
+  ## TAXA FILTERING ##
+  tax_columns = c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family'), # Taxonomy columns to append to the query results. DEFAULT: c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family')
+  tax_genus  = NULL, # character vector
+  tax_family = NULL, # character vector
+  tax_order  = NULL, # character vector
+  tax_class  = NULL, # character vector
+  ecotox_grp = NULL, # character vector
+  ## REFERENCE SECTION ##
+  include_reference = FALSE, # Default FALSE
+  ...){ 
+  # Import stxDb 
+  message("Reading in Standartox Data ...")
+  stx_table = c('test_fin.fst','phch.fst','taxa.fst') # 
+  if(include_reference) { stx_table = unique(c(stx_table,'refs.fst')) }
+  stxDb  = stx_query_import(data_type = stx_table, ...)
+  tox.dt = stxDb$test_fin.fst # final output object. LARGE right after import!
+  stxDb  = stxDb[stx_table[-1]] # dump the largest object!
+  
+  # First quick filter steps:
+  # Remove rows where the specified columns contain "NR" <- NA values
+  message("Removing 'NA' values ...")
+  tox.dt <- tox.dt[!grepl("NR", endpoint) & !grepl("NR", duration_unit) ]
+  # Quick fix for endpoint values
+  tox.dt[, endpoint := sub("[/*]+$","", endpoint)]
+  if(!is.null(endpoint_group)){
+    tmp_var = endpoint_group # quick fix
+    tox.dt = tox.dt[endpoint_group %in% tmp_var]
+  }
+  if(!is.null(duration_unit)){
+    tmp_var = duration_unit # quick fix
+    tox.dt = tox.dt[duration_unit %in% tmp_var]
+  }
+  if( nrow(tox.dt) == 0 ) {
+    warning("No query matches found for the provided endpoint_group or duration_unit. Please check the input values.")
+  }
+  
+  # Step 1: Filter for cas_number then merge with toxdata
+  message("Appending chemical information ...")
+  ## Filter chem data for cas_number
+  if(!is.null(cas_number)){
+    stxDb$phch.fst <- stxDb$phch.fst[cas %in% cas_number]
+    if( nrow(stxDb$phch.fst) == 0 ) {
+      warning("No query matches found for the provided CAS numbers. Please check the input values.")
+    }
+  }
+  ## Merge with tox data by cl_id
+  merge(stxDb$phch.fst, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt
+  tox.dt[, c("chem_class","casnr", "cl_id") := NULL] # don't need the cl_id column anymore.
+  
+  
+  # Step 2: Filter for taxonomic groups then merge with toxdata
+  message("Appending taxonomic information ...")
+  ## Filtering ecotox_grp ##
+  if(!is.null(ecotox_grp)){
+    regstr = paste(ecotox_grp, collapse = "|")
+    stxDb$taxa.fst = stxDb$taxa.fst[ grepl(regstr, stxDb$taxa.fst$tax_group) ]
+    if( nrow( stxDb$taxa.fst ) == 0 ) {
+      warning("No query matches found for the provided tax_group. Please check the input values.")
+      return(NULL)
+    }
+  }
+  ## Filtering tax_columns ##
+  # Specify taxonomy columns for which tax filtering can be applied
+  var_ls = list(
+    tax_class  = tax_class,
+    tax_order  = tax_order, 
+    tax_family = tax_family,
+    tax_genus  = tax_genus
+  )
+  tax.out = filter_dt( stxDb$taxa.fst, var_ls)
+  if( is.null(tax.out) ) { return(NULL) } # Check 
+  
+  # Select pre-defined columns for output
+  tax_key = "tl_id"
+  tax.out = tax.out[, c(tax_key, tax_columns), with = FALSE]
+  # Merge taxonomy data with tox data by tl_id
+  merge(tax.out, tox.dt, all.x = TRUE, by = tax_key) -> tox.dt
+  tox.dt[, (tax_key) := NULL] # drop columns here 
+  
+  
+  # Step 3: Final Tox data filtering 
+  # rmv any rows with NA in result_id
+  tox.dt = tox.dt[!is.na(result_id)]
+  # Filter for the selected columns 
+  var_ls = list(
+    concentration_unit  = concentration_unit,
+    concentration_type  = concentration_type, 
+    effect = effect,
+    exposure  = exposure
+  )
+  tox.dt = filter_dt( tox.dt, var_ls )
+  if( is.null(tox.dt) ) { return(NULL) } # Check 
+  
+  # Filter for duration
+  tox.dt = tox.dt[duration %between% c(min(duration),max(duration))]
+  # Check
+  if( nrow( tox.dt ) == 0 ) {
+    warning("No query matches found. Please check the input filter values.")
+    return(NULL)
+  }
+  
+  # Step 4: Append references if wanted 
+  if(include_reference){
+    message("Appending reference information ...")
+    tox.dt = merge(tox.dt, stxDb$refs.fst, all.x = TRUE, by = "ref_number")
+    tox.dt[, c("ref_number") := NULL]
+  } 
+  
+  # Step 5: Final Cleanup 
+  # Replace all occurrences of "NR" with NA
+  tox.dt = tox.dt[, lapply(.SD, function(x) {
+    if (is.character(x)) {
+      x[x == "NR"] <- NA  # Replace "NR" with NA for character columns
+    }
+    return(x)  # Return the modified column
+  })]
+  # Filter out the "result_id" column
+  message("Done!\n")
+  return( tox.dt[, c("result_id") := NULL] )
+} 
+
+
+
+#' Retrieve data catalog
+#' 
+#' Retrieve a data catalog for all variables (and their values) that can be retrieved with stx_query()
+#' 
+#' @param silent logical; If TRUE, suppresses messages. Default is FALSE.
+#' @param dir_out character; Directory to which the catalog should be downloaded. Default is a temporary directory.
+#' 
+#' @return Returns a list of data.frames containing information on data base variables
+#' 
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' @author Hannes Reinwald
+#' 
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' # basic function call
+#' l = stx_catalog()
+#' 
+#' # to get verbose output from the function
+#' l = stx_catalog(silent = FALSE)
+#' 
+#' # to specify a directory to which the catalog should be downloaded
+#' l = stx_catalog(silent = FALSE, dir_out = "~/tmp")
+#' # This will create a directory under ~/tmp and download the catalog.rds file to that directory.
+#' # The files are then permanently stored in that directory and can be directly read when restarting your R session.
+#' }
+#' @export
+#' 
+stx_catalog = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
+  if (!silent) message('Retrieving Standartox catalog..')
+  l = stx_download(data_type = 'catalog', dir_out = dir_out)
+
+  return(l)
+}
+
+#' Retrieve Standartox toxicity values
+#' 
+#' Retrieve a data.table containing the Standartox toxicity data
+#' 
+#' @param silent logical; If TRUE, suppresses messages. Default is FALSE.
+#' @param dir_out character; Directory to which the catalog should be downloaded. Default is a temporary directory.
+#' 
+#' @return Returns a data.table.
+#' 
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' @author Hannes Reinwald
+#' 
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' # basic function call
+#' 
+#' dt = stx_data()
+#' 
+#' }
+#' @export
+#' 
+stx_data = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
+  if (!silent) message('Retrieving Standartox data..')
+  out = stx_download(data_type = 'test_fin', dir_out = dir_out)
+
+  return(out)
+}
+
+#' Retrieve chemical data
+#' 
+#' Retrieve data on all chemicals in Standartox.
+#' 
+#' @return Returns a data.table containing informaiton on chemicals in Standartox.
+#' 
+#' @param silent logical; If TRUE, suppresses messages. Default is FALSE.
+#' @param dir_out character; Directory to which the chemical information should be downloaded. Default is a temporary directory.
+#' 
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' @author Hannes Reinwald
+#' 
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' # basic function call
+#' df = stx_chem()
+#' 
+#' # to get verbose output from the function
+#' df = stx_chem(silent = FALSE)
+#' 
+#' # to specify a directory to which the chemical information should be downloaded
+#' df = stx_chem(silent = FALSE, dir_out = "~/tmp")
+#' # This will create a directory under ~/tmp and download the respective standartox file to that directory.
+#' # The files are then permanently stored in that directory and can be directly read when restarting your R session.
+#' }
+#' 
+#' @export
+#' 
+stx_chem = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
+  if (!silent) message('Retrieving Standartox chemical information..')
+  out = stx_download(data_type = 'phch', dir_out = dir_out)
+
+  return(out)
+}
+
+#' Retrieve taxa data
+#' 
+#' Retrieve data on all taxa in Standartox.
+#' 
+#' @return Returns a data.table containing informaiton on taxa in Standartox.
+#' 
+#' @param silent logical; If TRUE, suppresses messages. Default is FALSE.
+#' @param dir_out character; Directory to which the taxa information should be downloaded. Default is a temporary directory.
+#' 
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' @author Hannes Reinwald
+#' 
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' # basic function call
+#' df = stx_taxa()
+#' 
+#' # to get verbose output from the function
+#' df = stx_taxa(silent = FALSE)
+#' 
+#' # to specify a directory to which the taxa information should be downloaded
+#' df = stx_taxa(silent = FALSE, dir_out = "~/tmp")
+#' # This will create a directory under ~/tmp and download the respective standartox file to that directory.
+#' # The files are then permanently stored in that directory and can be directly read when restarting your R session.
+#' }
+#' 
+#' @export
+#' 
+stx_taxa = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
+  if (!silent) message('Retrieving Standartox taxa information..')
+  out = stx_download(data_type = 'taxa', dir_out = dir_out)
+
+  return(out)
+}
+
+#' Function to aggregate filtered test results
+#'  
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' 
+#' @noRd
+#'  
+stx_aggregate = function(dat = NULL) {
+  # assign variables to avoid R CMD check NOTES
+  . = concentration = cname = cas = tax_taxon = gmn = gmnsd = n = NULL
+  # checking
+  if (is.null(dat)) stop('Provide table.')
+  # aggregation
+  dat[
+    ,
+    .(gmn = gm_mean(concentration),
+      gmnsd = gm_sd(concentration),
+      n = .N),
+    .(cname, cas, tax_taxon) 
+    ][
+      ,
+      .(min = min(gmn),
+        tax_min = .SD[ which.min(gmn), tax_taxon ],
+        gmn = gm_mean(gmn),
+        gmnsd = gm_sd(gmnsd),
+        max = max(gmn),
+        tax_max = .SD[ which.max(gmn), tax_taxon ],
+        n = sum(n),
+        tax_all = paste0(sort(unique(tax_taxon)), collapse = ', ')),
+      .(cname, cas)
+  ]
+}
+
+#' Retrieve meta data
+#' 
+#' @return Returns a data.table containing meta informaiton on Standartox.
+#' 
+#' @param silent logical; If TRUE, suppresses messages. Default is FALSE.
+#' @param dir_out character; Directory to which the meta information should be downloaded. Default is a temporary directory.#' 
+#'
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' @author Hannes Reinwald
+#' 
+#' @examples
+#' \donttest{
+#' # might fail if there is no internet connection or Zenodo.org not not available
+#' # basic function call
+#' df = stx_meta()
+#' 
+#' # to get verbose output from the function
+#' df = stx_meta(silent = FALSE)
+#' 
+#' # to specify a directory to which the taxa information should be downloaded
+#' df = stx_meta(silent = FALSE, dir_out = "~/tmp")
+#' # This will create a directory under ~/tmp and download the respective standartox file to that directory.
+#' # The files are then permanently stored in that directory and can be directly read when restarting your R session.
+#' }
+#' 
+#' @export
+#' 
+stx_meta = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
+  if (!silent) message('Retrieving Standartox meta information..')
+  out = stx_download(data_type = 'meta', dir_out = dir_out)
+
+  return(out)
+}

From da0a45eb47baca08d5779afae70b7e60e99da08e Mon Sep 17 00:00:00 2001
From: Hannes Reinwald <hannes.reinwald@bayer.com>
Date: Mon, 16 Jun 2025 19:21:23 +0200
Subject: [PATCH 4/6] Added stx_query() function

---
 R/standartox.R | 260 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 169 insertions(+), 91 deletions(-)

diff --git a/R/standartox.R b/R/standartox.R
index a0341e8..6e02edb 100644
--- a/R/standartox.R
+++ b/R/standartox.R
@@ -4,8 +4,9 @@
 #' 
 #' @return Returns a list of data.tables containing the downloaded data. 
 #'
-#' @param data_type character; Specify the type of data to download. Can be one of NULL (default, downloads and imports all), "meta.fst", "phch.fst", "refs.fst", "test_fin.fst", "taxa.fst", etc.
+#' @param data_type character; Specify the type of data to download. Select from c("meta", "phch", "refs", "test_fin", "taxa", "catalog"). NULL (default) will download and imports all,
 #' @param dir_out character; Directory to which the downloaded files should be saved. Default is a temporary directory.
+#' @param silent logical; If TRUE, suppresses messages. Default is TRUE.
 #' 
 #' @author Andreas Scharmueller \email{andschar@@protonmail.com}
 #' @author Hannes Reinwald
@@ -18,78 +19,104 @@
 #' }
 #' @noRd
 #' 
-stx_download = function(data_type, dir_out = file.path(tempdir(), "standartox")) {
+stx_download = function(data_type = NULL, dir_out = file.path(tempdir(),"standartox"), silent = TRUE) {
+  
+  # please keep this. Makes it easier to quickly pull everything without the need of having to specify specific 
+  stx_files = c("meta", "phch", "refs", "test_fin", "taxa", "catalog")
+  if(is.null(data_type)){ data_type = stx_files}
   # Check
-  data_type = match.arg(
-    data_type,
-    c("meta", "phch", "refs", "test_fin", "taxa", "catalog"),
-    several.ok = TRUE
-  )
+  data_type = match.arg( data_type, stx_files, several.ok = TRUE )
   
-  # Output directory
-  dir.create(dir_out, showWarnings = FALSE, recursive = TRUE)
+  # Check if the output directory exists, if not create it
+  if (!dir.exists(dir_out)) { dir.create(dir_out, recursive = TRUE) }
   
-  # Find download URLS
   # HACK this has to be done, because doi.org is the only permanent link between versions
   qurl_permanent = 'https://doi.org/10.5281/zenodo.3785030'
   req = httr::GET(qurl_permanent)
   cont = httr::content(req, as = 'text')
-  
-  # Extract all zenodo download links from the content
+  # extract all zenodo download links from the content
   qurl = unique(stringr::str_extract_all(cont, 'https://zenodo.org/records/[0-9]+/files/.+')[[1]])
   qurl = grep("[.]rds|[.]fst", qurl, value = TRUE) # filter for .rsd and .fst files only! 
-  qurl = sub('\">', '', qurl, fixed = TRUE)
-  qurl = grep(qurl, pattern = paste0(data_type, collapse = '|'), value = TRUE) # filter for data_type
+  
+  # Filter for specific data_type if provided
+  if(!is.null(data_type)){
+    # could be one of: "meta.fst","phch.fst","refs.fst","test_fin.fst","taxa.fst", ...
+    regx_str = paste0("/files/", sub("[.]fst$","[.]fst", sub("[.]rds$","[.]rds", data_type)) )
+    regx_str = paste(regx_str, collapse = "|")
+    qurl = grep(regx_str, qurl, value = TRUE)
+  }
   
   # For each link in qurl check if destination file exists and if not download it.
-  l = list()
-  for(URL in qurl){
-    fl = basename(URL)
-    fl_name = sub('.rds|.fst', '', fl) # remove file extension for list name
-
+  stxDb_ls = list() # output list 
+  for(k in qurl){
+    URL = sub('\">','', k)
+    n   = sub("^.+/files/","", URL)
+    if(!silent) message('\nChecking standartox file: ', n)
+    
     # Define destination file path
-    destfile = file.path(dir_out, fl)
+    destfile = file.path(dir_out, n)
     
     # Check if the file already exists
     if ( !file.exists(destfile) ) {
+      if(!silent) message('Downloading standartox ',n,' ...')
       curl::curl_download(url = URL,
                           destfile = destfile,
                           quiet = TRUE)
+      if(!silent) message('Done! Downloaded to:\n', destfile)
     }
+    else { if(!silent) message('File ', n, ' already exists, skipping download.') }
     
+    if(!silent) message('Reading in file:\t', n)
     # Read in the downloaded files based on their extension 
-    sfx = sub("^.+[.]", "", fl)
+    # for .fst: fst::read_fst(); for .rds: readRDS()
+    sfx = sub("^.+[.]","", basename(destfile))
     if( sfx == "fst" ) { 
-      out = fst::read_fst(destfile, as.data.table = TRUE)
+      stxDb_ls[[n]] = try ( fst::read_fst(destfile) )
+      if(!silent) message("Done!\n")
     } else if (sfx == "rds") { 
-      out = readRDS(destfile)
-    } else { 
-      stop("Unknown file format: ", sfx, "Expecting .fst or .rds files.")
-    }
+      stxDb_ls[[n]] = readRDS(destfile)
+      if(!silent) message("Done!\n")
+    } else { warning("Unknown file format: ", sfx, "Expecting .fst or .rds files.\n") }
   }
-  
-  return(out)
+  # Return the list of data frames
+  return(stxDb_ls)
 }
 
 
-
-stx_query_import = function(silent = TRUE, 
-                            data_type = c('test_fin.fst','phch.fst','taxa.fst','refs.fst'),
-                            ...) {
-  if (!silent) message('Retrieving Standartox listed Chemicals ...')
-  if (silent) {
-    result = suppressMessages( stx_download(data_type = data_type, ...) )
-  } else {
-    result = stx_download(data_type = data_type, ...)
-  }
-  # Convert data frame to data table
-  result = lapply( result, function(dt) data.table::setDT(dt) )
-  return(result)
-}
-
-
-
-filter_dt = function(dt, var_ls){
+#' Filter data.table based on a list of variables
+#' 
+#' This function filters a data.table based on specified values in one or more columns. 
+#' It checks for the existence of the specified columns and applies the filters accordingly.
+#' 
+#' @param dt data.table; The data.table to filter.
+#' @param var_ls list; A named list where each element is a vector of values to filter the corresponding column in the data.table. The names of the list should match the column names in the data.table.
+#' @param silent logical; If TRUE, suppresses messages. Default is TRUE.
+#' 
+#' @return Returns a filtered data.table containing only the rows that match the specified values in the columns. If no matches are found, a warning is issued and NULL is returned.
+#' 
+#' @author Hannes Reinwald
+#' 
+#' 
+#' @examples
+#' \donttest{
+#' # Import the standartox taxonomy data table as example
+#' taxa.dt = stx_taxa()
+#' colnames(taxa.dt) # inspect column names to filter for
+#' 
+#' # Specify taxonomy columns for which tax filtering can be applied.
+#' # Make sure that list names match the column names in your data table!
+#' var_ls = list(
+#' family = 'Cyprinidae',
+#' genus  = c('Daphnia','Ceriodaphnia')
+#' )
+#' 
+#' # Filter your taxonomy table for genus and family specified.
+#' tax.out = filter_dt( taxa.dt, var_ls, silent = FALSE)
+#' View(tax.out) # inspect the output
+#' }
+#' 
+#' @noRd
+filter_dt = function(dt, var_ls, silent = TRUE){
   stopifnot(is.data.frame(dt))
   stopifnot(is.list(var_ls))
   data.table::setDT(dt) # Ensure dt is a data.table
@@ -102,7 +129,7 @@ filter_dt = function(dt, var_ls){
     dt.out = list()
     
     for(var in var_check){
-      message("Filtering for variable: ", var)
+      if(!silent) message("Filtering for variable: ", var)
       if (var %in% colnames(dt)) { # Check if the variable exists as a column in data table
         if( !is.null(var_ls[[var]]) ){
           # Filter the data table based on the specified column and the values in the variable
@@ -116,7 +143,7 @@ filter_dt = function(dt, var_ls){
     }
     
     # Combine results
-    dt.out = distinct( data.table::rbindlist(dt.out) )
+    dt.out = unique( data.table::rbindlist(dt.out) )
     # Check 
     if( nrow(dt.out) == 0 ) {
       warning("No query results for the provided ",paste(var_check, collapse = " & "),
@@ -127,6 +154,37 @@ filter_dt = function(dt, var_ls){
 }
 
 
+#' Query Standartox toxicity values
+#'
+#' Retrieve toxicity values from the Standartox data base on Zenodo.org \url{https://doi.org/10.5281/zenodo.3785030}.
+#' 
+#' @return Returns a data.table containing Standartox data base query results. 
+#'
+#' @param cas character, integer; Limit data base query to specific CAS numbers, multiple entries possible (e.g. c('1071-83-6', '1071836'), NULL (default).
+#' @param concentration_unit character; Limit data base query to specific concentration units (e.g. ug/l - default).
+#' @param concentration_type character; Limit data base query to specific concentration types, can be one of NULL (default), 'active ingredient', 'formulation', 'total', 'not reported', 'unionized', 'dissolved', 'labile'. See \url{https://cfpub.epa.gov/ecotox/pdf/codeappendix.pdf} p.4.
+#' @param duration integer vector of length two; Limit data base query to specific test durations (hours) (e.g. c(24, 48)). NULL (default).
+#' @param endpoint character; Choose endypoint type, must be one of 'XX50' (default), 'NOEX', 'LOEX'.
+#' @param effect character; Limit data base query to specific effect groups, multiple entries possible (e.g. 'Mortality', 'Intoxication', 'Growth'). See \url{https://cfpub.epa.gov/ecotox/pdf/codeappendix.pdf} p.95. NULL (default).
+#' @param exposure character; Choose exposure type, (e.g. aquatic, environmental, diet). NULL (default).
+#' @param chemical_role character; Limit data base query to specific chemical roles (e.g. insecticide), multiple entries possible. NULL (default).
+#' @param chemical_class character; Limit data base query to specific chemical classes (e.g. neonicotinoid), multiple entries possible. NULL (default).
+#' @param taxa character; Limit data base query to specific taxa, multiple entries possible. NULL (default).
+#' @param ecotox_grp character; Convenience grouping of organisms in ecotoxicology, must be one of NULL (default), 'invertebrate', 'fish', 'plant_land', 'macrophyte', 'algae'.
+#' @param trophic_lvl character; Trophic level of organism, must be one of NULL (default), 'autotroph', 'heterotroph'.
+#' @param habitat character; Limit data base query to specific organism habitats, can be one of NULL (default) 'marine', 'brackish', 'freshwater', 'terrestrial'.
+#' @param region character; Limit data base query to organisms occurring in specific regions, can be one of NULL (default) 'africa', 'america_north', 'america_south', 'asia', 'europe', 'oceania'.
+#' @param vers integer; Choose the version of the EPA Ecotox on which Standartox is based on. NULL (default) accesses the most recent version.
+#' @param ... currently not used
+#'
+#' @return Returns a list of three data.tables (filtered data base query results, aggregated data base query results, meta information)
+#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
+#' 
+#' @examples
+#' \donttest{
+#' 
+#' @export
+#' 
 stx_query = function(
   ## COMPOUND FILTERING ##
   cas_number = NULL,
@@ -147,20 +205,31 @@ stx_query = function(
   ecotox_grp = NULL, # character vector
   ## REFERENCE SECTION ##
   include_reference = FALSE, # Default FALSE
-  ...){ 
+  rm_na = TRUE, # Default TRUE; if FALSE, keep NR values in the result
+  verbose = FALSE, # Default TRUE; if FALSE, print messages
+  ...){
+  message("Querying Standartox data base ...")
+  
   # Import stxDb 
-  message("Reading in Standartox Data ...")
-  stx_table = c('test_fin.fst','phch.fst','taxa.fst') # 
-  if(include_reference) { stx_table = unique(c(stx_table,'refs.fst')) }
-  stxDb  = stx_query_import(data_type = stx_table, ...)
-  tox.dt = stxDb$test_fin.fst # final output object. LARGE right after import!
-  stxDb  = stxDb[stx_table[-1]] # dump the largest object!
+  if(verbose) message("Reading in Standartox Data ...")
+  stx_table = c('test_fin','phch','taxa') # 
+  if(include_reference) { stx_table = unique(c(stx_table,'refs')) }
+  stxDb =  stx_download(data_type = stx_table) #, ...)
+  names(stxDb) = sub("[.]fst$","",names(stxDb)) # FIX
   
+  # Convert data frame to data table
+  stxDb = lapply( stxDb, function(dt) data.table::setDT(dt) )
+  tox.dt = stxDb$test_fin # final output object. LARGE right after import!
+  stxDb  = stxDb[stx_table[-1]] # dump the largest object! <- hope to save some memory with that.
+
   # First quick filter steps:
   # Remove rows where the specified columns contain "NR" <- NA values
-  message("Removing 'NA' values ...")
-  tox.dt <- tox.dt[!grepl("NR", endpoint) & !grepl("NR", duration_unit) ]
-  # Quick fix for endpoint values
+  if(rm_na){
+    if(verbose) message("Removing 'NA' values ...")
+    tox.dt = tox.dt[!grepl("NR", endpoint) & !grepl("NR", duration_unit) ] 
+  }
+  
+  # Quick fix for endpoint values - some contain weird string endings.
   tox.dt[, endpoint := sub("[/*]+$","", endpoint)]
   if(!is.null(endpoint_group)){
     tmp_var = endpoint_group # quick fix
@@ -175,52 +244,55 @@ stx_query = function(
   }
   
   # Step 1: Filter for cas_number then merge with toxdata
-  message("Appending chemical information ...")
+  if(verbose) message("Appending chemical information ...")
   ## Filter chem data for cas_number
   if(!is.null(cas_number)){
-    stxDb$phch.fst <- stxDb$phch.fst[cas %in% cas_number]
-    if( nrow(stxDb$phch.fst) == 0 ) {
+    stxDb$phch <- stxDb$phch[cas %in% cas_number]
+    if( nrow(stxDb$phch) == 0 ) {
       warning("No query matches found for the provided CAS numbers. Please check the input values.")
     }
+    merge(stxDb$phch, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt
+  } else {
+    merge(stxDb$phch, tox.dt, all.y = TRUE, by = "cl_id") -> tox.dt
   }
-  ## Merge with tox data by cl_id
-  merge(stxDb$phch.fst, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt
-  tox.dt[, c("chem_class","casnr", "cl_id") := NULL] # don't need the cl_id column anymore.
+  suppressWarnings( tox.dt[, c("chem_class","casnr", "cl_id") := NULL] ) # don't need the cl_id column anymore.
   
   
   # Step 2: Filter for taxonomic groups then merge with toxdata
-  message("Appending taxonomic information ...")
+  if(verbose) message("Appending taxonomic information ...")
+  
+  # Append 'tax_' prefix to the taxonomic columns (except for tax_key)
+  tax_key = "tl_id"
+  tax_col = paste0("tax_", colnames(stxDb$taxa))
+  tax_col[ grep(paste0("^tax_",tax_key,"$"),tax_col) ] <- tax_key # replace tax_key with tl_id
+  colnames(stxDb$taxa) <- tax_col # set new column names
+  
+  # Select pre-defined columns for output
+  tax.out = stxDb$taxa[, c(tax_key, tax_columns), with = FALSE]
+  # Merge taxonomy data with tox data by tl_id
+  tox.dt = merge(tox.dt, tax.out, all.x = TRUE, by = tax_key)
+  
   ## Filtering ecotox_grp ##
   if(!is.null(ecotox_grp)){
     regstr = paste(ecotox_grp, collapse = "|")
-    stxDb$taxa.fst = stxDb$taxa.fst[ grepl(regstr, stxDb$taxa.fst$tax_group) ]
-    if( nrow( stxDb$taxa.fst ) == 0 ) {
+    tox.dt = tox.dt[ grepl(regstr, tox.dt$tax_group) ]
+    if( nrow( tox.dt ) == 0 ) {
       warning("No query matches found for the provided tax_group. Please check the input values.")
       return(NULL)
     }
   }
   ## Filtering tax_columns ##
-  # Specify taxonomy columns for which tax filtering can be applied
-  var_ls = list(
+  var_ls = list( # Specify taxonomy columns for which tax filtering can be applied
     tax_class  = tax_class,
     tax_order  = tax_order, 
     tax_family = tax_family,
     tax_genus  = tax_genus
   )
-  tax.out = filter_dt( stxDb$taxa.fst, var_ls)
-  if( is.null(tax.out) ) { return(NULL) } # Check 
-  
-  # Select pre-defined columns for output
-  tax_key = "tl_id"
-  tax.out = tax.out[, c(tax_key, tax_columns), with = FALSE]
-  # Merge taxonomy data with tox data by tl_id
-  merge(tax.out, tox.dt, all.x = TRUE, by = tax_key) -> tox.dt
-  tox.dt[, (tax_key) := NULL] # drop columns here 
-  
+  tox.dt = filter_dt( tox.dt, var_ls)
+  if( is.null(tox.dt) ) { return(NULL) } # Check 
   
   # Step 3: Final Tox data filtering 
-  # rmv any rows with NA in result_id
-  tox.dt = tox.dt[!is.na(result_id)]
+  tox.dt = tox.dt[!is.na(result_id)] # rmv any rows with NA in result_id <- this should not be the case but to be save!
   # Filter for the selected columns 
   var_ls = list(
     concentration_unit  = concentration_unit,
@@ -232,7 +304,13 @@ stx_query = function(
   if( is.null(tox.dt) ) { return(NULL) } # Check 
   
   # Filter for duration
-  tox.dt = tox.dt[duration %between% c(min(duration),max(duration))]
+  lower = min(duration)
+  upper = max(duration)
+  if (lower != 0 | upper != Inf) {
+    #tox.dt = tox.dt[duration %between% c(lower, upper)] # <- this works only when data.table is loaded
+    tox.dt = tox.dt[duration >= lower & duration <= upper] # <- this works always
+  }
+  
   # Check
   if( nrow( tox.dt ) == 0 ) {
     warning("No query matches found. Please check the input filter values.")
@@ -241,8 +319,8 @@ stx_query = function(
   
   # Step 4: Append references if wanted 
   if(include_reference){
-    message("Appending reference information ...")
-    tox.dt = merge(tox.dt, stxDb$refs.fst, all.x = TRUE, by = "ref_number")
+    if(verbose) message("Appending reference information ...")
+    tox.dt = merge(tox.dt, stxDb$refs, all.x = TRUE, by = "ref_number")
     tox.dt[, c("ref_number") := NULL]
   } 
   
@@ -291,9 +369,9 @@ stx_query = function(
 #' 
 stx_catalog = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
   if (!silent) message('Retrieving Standartox catalog..')
-  l = stx_download(data_type = 'catalog', dir_out = dir_out)
+  ls = stx_download(data_type = 'catalog', dir_out = dir_out)[[1]]
 
-  return(l)
+  return(ls)
 }
 
 #' Retrieve Standartox toxicity values
@@ -319,8 +397,8 @@ stx_catalog = function(silent = FALSE, dir_out = file.path(tempdir(), "standarto
 #' @export
 #' 
 stx_data = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
-  if (!silent) message('Retrieving Standartox data..')
-  out = stx_download(data_type = 'test_fin', dir_out = dir_out)
+  if(!silent) message('Retrieving Standartox data..')
+  out = stx_download(data_type = 'test_fin', dir_out = dir_out)[[1]]
 
   return(out)
 }
@@ -356,7 +434,7 @@ stx_data = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")
 #' 
 stx_chem = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
   if (!silent) message('Retrieving Standartox chemical information..')
-  out = stx_download(data_type = 'phch', dir_out = dir_out)
+  out = stx_download(data_type = 'phch', dir_out = dir_out)[[1]]
 
   return(out)
 }
@@ -392,7 +470,7 @@ stx_chem = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")
 #' 
 stx_taxa = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
   if (!silent) message('Retrieving Standartox taxa information..')
-  out = stx_download(data_type = 'taxa', dir_out = dir_out)
+  out = stx_download(data_type = 'taxa', dir_out = dir_out)[[1]]
 
   return(out)
 }
@@ -458,7 +536,7 @@ stx_aggregate = function(dat = NULL) {
 #' 
 stx_meta = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")) {
   if (!silent) message('Retrieving Standartox meta information..')
-  out = stx_download(data_type = 'meta', dir_out = dir_out)
+  out = stx_download(data_type = 'meta', dir_out = dir_out)[[1]]
 
   return(out)
 }

From cea60b1644433befcf30093f2bdcbd668ac406c3 Mon Sep 17 00:00:00 2001
From: Hannes Reinwald <hannes.reinwald@bayer.com>
Date: Tue, 17 Jun 2025 14:00:20 +0200
Subject: [PATCH 5/6] Added more examples for stx_query()

---
 R/example_code.R | 94 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 67 insertions(+), 27 deletions(-)

diff --git a/R/example_code.R b/R/example_code.R
index 1506798..c290fa1 100644
--- a/R/example_code.R
+++ b/R/example_code.R
@@ -1,54 +1,94 @@
-# filter_dt -----------------------------------
+# filter_dt() -----------------------------------
 taxa.dt = stx_taxa()
 colnames(taxa.dt) # inspect column names to filter for
 
 # Specify taxonomy columns for which tax filtering can be applied.
 # Make sure that list names match the column names in your data table!
 var_ls = list(
-  family = 'Cyprinidae',
-  genus  = c('Daphnia','Ceriodaphnia')
+  family = "Cyprinidae",
+  genus  = c("Daphnia","Ceriodaphnia")
 )
 
 tax.out = filter_dt( taxa.dt, var_ls, silent = FALSE)
 View(tax.out) # inspect the output
 
 
-# stx_query ---------------------------------
-# Note that by default stx_query filters for EC50, NOEC and LOEC values and for duration_unit = "h" (hours).
-# if you need something else specify in filter!
+# stx_query() ---------------------------------
 
-# Example 1) - basix stx query
-tox.dt1 = stx_query(verbose = T) # will return results filtered for duration_unit = "h" (hours)
+# Example 1) - Basic stx_query() call
+# will return results filtered for default endpoint_group = c("XX50", "NOEX", "LOEX") and duration_unit = "h"
+dt = stx_query(verbose = T)
 
-# to keep all duration values, set duration_unit = NULL, default is "h"
-tox.dt2 = stx_query(duration_unit = NULL, verbose = T) # keep NA values )
+# If you wish to filter for different endpoint groups, you can specify them in the query.
+stx_catalog()$endpoint_group # to view available endpoint groups
+dt1 = stx_query(endpoint_group = c("Bioconc","MATC","MCIG"))
 
-# To get everything!
-tox.dt3 = stx_query( endpoint_group = NULL,
-                     duration_unit = NULL, 
-                     verbose = T, rm_na = F) # remove NA values
+# if you wish to keep all duration_unit and endpoint_groups, set them to NULL. 
+dt2 = stx_query(duration_unit = NULL, endpoint_group = NULL, verbose = T) 
+# including "NR" (not reported) values. This is everything you can retrieve via stx_data() as well. 
+dt3 = stx_query(duration_unit = NULL, endpoint_group = NULL, verbose = T, rm_na = F) 
 
-# NOTE: stx_data() will deliver ALL available results without any pre-filtering!
-dt = stx_data()
 
-# Example 2)
-tox.dt = stx_query(
+# Example 2) - Filter for specific taxonomic groups
+# Filter for fish and algae for an exposure duration of 12 to 120 hours and concentration unit in g/l for endpoint group XX50 (50% effect/lethality values)
+stx_catalog()$group # to get an idea about available taxonomic groups
+stx_catalog()$concentration_unit
+dt4 = stx_query(
   verbose = T, 
   endpoint_group = "XX50",
-  duration = c(12, 96),
+  duration = c(12, 120),
   concentration_unit = "g/l", 
-  tax_genus  = c("Danio","Pimephales") 
-  #tax_family = c("Cyprinidae","Salmonidae")
-  #tax_group  = c("fish","algae") 
+  tax_group  = c("fish","algae")
 )
 
-# Example 3)
-tox.dt = stx_query(
+
+# Example 3) - Filter for specific genus and chemical compounds + append reference data
+# make sure to extract only EC50/LC50 values from 12 - 96 hours and of comparable concentration units
+dt5 = stx_query(
   verbose = T,
-  cas_number = c("482-89-3", "50-00-0","1071-83-6"), 
-  #endpoint_group = "XX50",
+  cas_number = c("1071-83-6","63-25-2","138261-41-3"), # glyphosate, carbaryl and imidacloprid 
+  endpoint_group = "XX50", # to get only 50% effect/lethality values
   duration = c(12, 96),
   concentration_unit = "g/l", 
-  tax_genus = c("Danio","Daphnia"),
+  tax_genus = c("Danio","Daphnia","Ceriodaphnia","Chironomus"),
+  include_reference = TRUE
+)
+
+
+# Example 4) - filter for specific taxonomic families and append reference data + more taxonomic lineage information
+stx_catalog()$family # get an idea about which family to use by inspecting the taxa catalog
+colnames(stx_taxa())[-1] # all possible taxonomic columns to extract from stx_taxa()
+
+dt6 = stx_query(
+  verbose = T, 
+  endpoint_group = "XX50",
+  duration = c(12, 120),
+  concentration_unit = "g/l", 
+  tax_family = c("Cyprinidae","Salmonidae","Daphniidae"),
+  tax_columns = c("group","taxon","genus","family","order","class","habitat"), # to get more taxonomic lineage information
   include_reference = TRUE
 )
+
+
+# Example 5) - Specific data selection
+# get ALL LC50 values for 96 - 120 h of exposure for zebra fish (Danio rerio)
+danio = stx_query(
+  endpoint_group = "XX50",
+  duration = c(96, 120),
+  effect = "mortality",
+  concentration_unit = "g/l",
+  concentration_type = "active ingredient",
+  tax_genus = "Danio",
+  include_reference = TRUE
+)
+
+# get ALL LC50 values for 24 - 48 h of exposure for Daphnia
+dmag = stx_query(
+  endpoint_group = "XX50",
+  duration = c(24, 48),
+  effect = "mortality",
+  concentration_unit = "g/l",
+  concentration_type = "active ingredient",
+  tax_genus = "Daphnia",
+  include_reference = TRUE
+)
\ No newline at end of file

From e91a2e15467ad5babc91d8847cb33f8101e6721e Mon Sep 17 00:00:00 2001
From: Hannes Reinwald <hannes.reinwald@bayer.com>
Date: Tue, 17 Jun 2025 14:03:59 +0200
Subject: [PATCH 6/6] Updated function one more time and added documentation
 and code examples

---
 R/standartox.R | 101 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 71 insertions(+), 30 deletions(-)

diff --git a/R/standartox.R b/R/standartox.R
index 6e02edb..aad3924 100644
--- a/R/standartox.R
+++ b/R/standartox.R
@@ -154,35 +154,75 @@ filter_dt = function(dt, var_ls, silent = TRUE){
 }
 
 
-#' Query Standartox toxicity values
+#' Query Standartox Toxicity Data
 #'
-#' Retrieve toxicity values from the Standartox data base on Zenodo.org \url{https://doi.org/10.5281/zenodo.3785030}.
-#' 
-#' @return Returns a data.table containing Standartox data base query results. 
+#' Retrieve and filter toxicity data from the Standartox database (\url{https://doi.org/10.5281/zenodo.3785030}) using chemical, experimental, and taxonomic criteria.
 #'
-#' @param cas character, integer; Limit data base query to specific CAS numbers, multiple entries possible (e.g. c('1071-83-6', '1071836'), NULL (default).
-#' @param concentration_unit character; Limit data base query to specific concentration units (e.g. ug/l - default).
-#' @param concentration_type character; Limit data base query to specific concentration types, can be one of NULL (default), 'active ingredient', 'formulation', 'total', 'not reported', 'unionized', 'dissolved', 'labile'. See \url{https://cfpub.epa.gov/ecotox/pdf/codeappendix.pdf} p.4.
-#' @param duration integer vector of length two; Limit data base query to specific test durations (hours) (e.g. c(24, 48)). NULL (default).
-#' @param endpoint character; Choose endypoint type, must be one of 'XX50' (default), 'NOEX', 'LOEX'.
-#' @param effect character; Limit data base query to specific effect groups, multiple entries possible (e.g. 'Mortality', 'Intoxication', 'Growth'). See \url{https://cfpub.epa.gov/ecotox/pdf/codeappendix.pdf} p.95. NULL (default).
-#' @param exposure character; Choose exposure type, (e.g. aquatic, environmental, diet). NULL (default).
-#' @param chemical_role character; Limit data base query to specific chemical roles (e.g. insecticide), multiple entries possible. NULL (default).
-#' @param chemical_class character; Limit data base query to specific chemical classes (e.g. neonicotinoid), multiple entries possible. NULL (default).
-#' @param taxa character; Limit data base query to specific taxa, multiple entries possible. NULL (default).
-#' @param ecotox_grp character; Convenience grouping of organisms in ecotoxicology, must be one of NULL (default), 'invertebrate', 'fish', 'plant_land', 'macrophyte', 'algae'.
-#' @param trophic_lvl character; Trophic level of organism, must be one of NULL (default), 'autotroph', 'heterotroph'.
-#' @param habitat character; Limit data base query to specific organism habitats, can be one of NULL (default) 'marine', 'brackish', 'freshwater', 'terrestrial'.
-#' @param region character; Limit data base query to organisms occurring in specific regions, can be one of NULL (default) 'africa', 'america_north', 'america_south', 'asia', 'europe', 'oceania'.
-#' @param vers integer; Choose the version of the EPA Ecotox on which Standartox is based on. NULL (default) accesses the most recent version.
-#' @param ... currently not used
+#' @param cas_number character; Optional. Vector of CAS numbers to filter chemicals (e.g. \code{c("1071-83-6","63-25-2","138261-41-3")}). Default is \code{NULL} (no filtering).
+#' @param endpoint_group character; Optional. Endpoint group(s) to filter results. All possible endpoint groups can be checked via \code{stx_catalog()$endpoint_group}. Default is \code{c("XX50", "NOEX", "LOEX")}.
+#' @param exposure character; Optional. Vector of exposure types (e.g. \code{"aquatic"}). All possible exposure types can be checked via \code{stx_catalog()$exposure}. Default is \code{NULL}.
+#' @param effect character; Optional. Vector of effect types (e.g. \code{"Mortality", "Growth"}). All possible effect types can be checked via \code{stx_catalog()$effect}. Default is \code{NULL}.
+#' @param duration numeric; Optional. Numeric vector of length two specifying minimum and maximum test duration (in hours), e.g. \code{c(0, 48)}. Default is \code{c(0, Inf)}.
+#' @param duration_unit character; Optional. Filter by duration unit (e.g. \code{"h"} for hours). All possible duration units can be checked via \code{stx_catalog()$duration_unit}. Set to \code{NULL} to keep all. Default is \code{"h"}.
+#' @param concentration_unit character; Optional. Filter by concentration unit (e.g. \code{"g/l"}). All possible concentration units can be checked via \code{stx_catalog()$concentration_unit}. Default is \code{NULL}.
+#' @param concentration_type character; Optional. Filter by concentration type (e.g. \code{"active ingredient"}). All possible concentration types can be checked via \code{stx_catalog()$concentration_type}. Default is \code{NULL}.
+#' @param tax_columns character; Columns of taxonomic information to append to results. All possible columns can be checked via \code{colnames(stx_taxa())}. Default is \code{c("group", "taxon", "genus", "family")}.
+#' @param tax_genus character; Optional. Filter by genus. All possible genera can be checked via \code{stx_catalog()$genus}. Default is \code{NULL}.
+#' @param tax_family character; Optional. Filter by family. All possible families can be checked via \code{stx_catalog()$family}. Default is \code{NULL}.
+#' @param tax_order character; Optional. Filter by order. All possible orders can be checked via \code{stx_catalog()$order}. Default is \code{NULL}.
+#' @param tax_class character; Optional. Filter by class. All possible classes can be checked via \code{stx_catalog()$class}. Default is \code{NULL}.
+#' @param ecotox_grp character; Optional. Filter by one or more ecotoxicological groups. Possible values are \code{"invertebrate"}, \code{"plant"}, \code{"fish"}, \code{"fungi"}, \code{"algae"}, \code{"aves"}, \code{"amphibia"}, \code{"mammalia"}, \code{"reptilia"}, \code{"macrophyte"}. All possible ecotox groups can be checked via \code{stx_catalog()$group}. Multiple entries possible. Default is \code{NULL}.
+#' @param include_reference logical; If \code{TRUE}, append reference information. Default is \code{FALSE}.
+#' @param rm_na logical; If \code{TRUE}, remove rows with "NR" (not reported) values. Default is \code{TRUE}.
+#' @param verbose logical; If \code{TRUE}, print progress messages. Default is \code{FALSE}.
+#' @param ... Additional arguments passed to \code{stx_download}.
+#'
+#' @return Returns a \code{data.table} with filtered Standartox toxicity data.
+#'
+#' @author Hannes Reinwald
 #'
-#' @return Returns a list of three data.tables (filtered data base query results, aggregated data base query results, meta information)
-#' @author Andreas Scharmueller \email{andschar@@protonmail.com}
-#' 
 #' @examples
 #' \donttest{
 #' 
+#' Basic stx_query() call: Will return results filtered for default endpoint_group = c("XX50", "NOEX", "LOEX") and duration_unit = "h"
+#' stx_query(verbose = T)
+#' 
+#' # If you wish to filter for different endpoint groups, you can specify them in the query.
+#' stx_catalog()$endpoint_group # to view available endpoint groups
+#' stx_query(endpoint_group = c("Bioconc","MATC","MCIG"))
+#' 
+#' # Query for a specific CAS number, endpoint group, and tax group(s)
+#' stx_query(
+#'   cas_number = "1071-83-6",
+#'   endpoint_group = "NOEX",
+#'   duration = c(0, 48),
+#'   ecotox_group = c("invertebrate", "fish"),
+#'   tax_family = "Daphniidae"
+#' )
+#' 
+#' # get ALL LC50 values for 96 - 120 h of exposure for zebra fish (Danio rerio)
+#' stx_query(
+#'   endpoint_group = "XX50",
+#'   duration = c(96, 120),
+#'   effect = "mortality",
+#'   concentration_unit = "g/l",
+#'   concentration_type = "active ingredient",
+#'   tax_genus = "Danio",
+#'   include_reference = TRUE
+#' )
+#' 
+#' # get ALL LC50 values for 24 - 48 h of exposure for Daphnia
+#' stx_query(
+#'   endpoint_group = "XX50",
+#'   duration = c(24, 48),
+#'   effect = "mortality",
+#'   concentration_unit = "g/l",
+#'   concentration_type = "active ingredient",
+#'   tax_genus = "Daphnia",
+#'   include_reference = TRUE
+#' )
+#' }
+#'
 #' @export
 #' 
 stx_query = function(
@@ -197,12 +237,12 @@ stx_query = function(
   concentration_unit = NULL, # character vector
   concentration_type = NULL, # character vector
   ## TAXA FILTERING ##
-  tax_columns = c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family'), # Taxonomy columns to append to the query results. DEFAULT: c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family')
+  tax_columns = c('group', 'taxon', 'genus', 'family'), # Taxonomy columns to append to the query results. DEFAULT: c('tax_group', 'tax_taxon', 'tax_genus', 'tax_family')
   tax_genus  = NULL, # character vector
   tax_family = NULL, # character vector
   tax_order  = NULL, # character vector
   tax_class  = NULL, # character vector
-  ecotox_grp = NULL, # character vector
+  tax_group = NULL, # character vector
   ## REFERENCE SECTION ##
   include_reference = FALSE, # Default FALSE
   rm_na = TRUE, # Default TRUE; if FALSE, keep NR values in the result
@@ -266,6 +306,7 @@ stx_query = function(
   tax_col = paste0("tax_", colnames(stxDb$taxa))
   tax_col[ grep(paste0("^tax_",tax_key,"$"),tax_col) ] <- tax_key # replace tax_key with tl_id
   colnames(stxDb$taxa) <- tax_col # set new column names
+  tax_columns = paste0("tax_", tax_columns) # append 'tax_' prefix to tax_columns
   
   # Select pre-defined columns for output
   tax.out = stxDb$taxa[, c(tax_key, tax_columns), with = FALSE]
@@ -273,8 +314,8 @@ stx_query = function(
   tox.dt = merge(tox.dt, tax.out, all.x = TRUE, by = tax_key)
   
   ## Filtering ecotox_grp ##
-  if(!is.null(ecotox_grp)){
-    regstr = paste(ecotox_grp, collapse = "|")
+  if(!is.null(tax_group)){
+    regstr = paste(tax_group, collapse = "|")
     tox.dt = tox.dt[ grepl(regstr, tox.dt$tax_group) ]
     if( nrow( tox.dt ) == 0 ) {
       warning("No query matches found for the provided tax_group. Please check the input values.")
@@ -289,7 +330,8 @@ stx_query = function(
     tax_genus  = tax_genus
   )
   tox.dt = filter_dt( tox.dt, var_ls)
-  if( is.null(tox.dt) ) { return(NULL) } # Check 
+  if( is.null(tox.dt) ) { return(NULL) } # Check
+  suppressWarnings( tox.dt[, (tax_key) := NULL] ) # don't need the tl_id column anymore.
   
   # Step 3: Final Tox data filtering 
   tox.dt = tox.dt[!is.na(result_id)] # rmv any rows with NA in result_id <- this should not be the case but to be save!
@@ -338,7 +380,6 @@ stx_query = function(
 } 
 
 
-
 #' Retrieve data catalog
 #' 
 #' Retrieve a data catalog for all variables (and their values) that can be retrieved with stx_query()
@@ -539,4 +580,4 @@ stx_meta = function(silent = FALSE, dir_out = file.path(tempdir(), "standartox")
   out = stx_download(data_type = 'meta', dir_out = dir_out)[[1]]
 
   return(out)
-}
+}
\ No newline at end of file