diff --git a/README.md b/README.md
index ff6e88e..7580ac7 100644
--- a/README.md
+++ b/README.md
@@ -49,12 +49,29 @@ Minimal changes needed are:
 
 ## How to run 
 
+Use snakemake_cellranger.yaml to create an environment with all required packages.
+
+```bash
+micromamba create -f snakemake_cellranger.yaml
+```
+
+or
+
+```bash
+conda env create -f snakemake_cellranger.yaml
+```
+
+Set channel priority to strict.
+
+```bash
+conda config --set channel_priority strict
+```
+
 You may call the pipeline as follows in the directory where you cloned it. 
 
 ```bash
-snakemake --cluster "bsub -n16 -q verylong -R rusage[mem=200GB]" -p -j4 -c42 --configfile config/config-cluster.yaml --use-conda  --use-envmodules
+snakemake --cluster "bsub -n16 -q verylong -R rusage[mem=200GB]" -p -j4 -c42 --configfile config/config-cluster.yaml --use-conda --use-envmodules --conda-frontend conda
 ```
 
  - `--cluster` may change depending on the computational footprint of your analyses
  - `--configfile` should point to your personal configuration
-
diff --git a/config/config-example.yaml b/config/config-example.yaml
index 2040a43..0e12dce 100644
--- a/config/config-example.yaml
+++ b/config/config-example.yaml
@@ -22,16 +22,15 @@ metadata:
     - ""
   # Define the column names used to define a sample
   identifiers:
-    - SAMPLE_NAME
+    - PID
     - Sample_Type
-    - Age
-    - fraction
   # Define all columns from the metadata spreadsheet that
   # will be included in the SingleCellExperiment / Seurat
   # objects
   single_cell_object_metadata_fields:
-      - SAMPLE_NAME
-      - Age
+      - PID
+      - Sample_Type
+      - individual
 # Enable / Disable rules and specifiy rule-specific parameters
 rules:
   cellranger_count:
diff --git a/config/config-interspecies-bonemarrow.yaml b/config/config-interspecies-bonemarrow.yaml
new file mode 100644
index 0000000..3eedb8e
--- /dev/null
+++ b/config/config-interspecies-bonemarrow.yaml
@@ -0,0 +1,58 @@
+paths:
+  # directory to store output data = cellranger files and SingleCellExperiment objects
+  # paths to input data = fastq files are stored in metadata sheet (OTP export)
+  output_dir: "/omics/odcf/analysis/OE0538_projects/DO-0008/data"
+  target_templates:
+    # paths for each type of output file
+      # remove "/{0[Species_ID]}" to store all objects in one directory or
+      # replace "Species_ID" with other colname as suitable wildcard from metadata sheet (e.g. Sample_Type, ...) to create subfolders
+      # remove or replace all instances of Species_ID in Snakefile and samples.py 
+    linked_files: "cellranger/linked_files/{0[Species_ID]}/{0[individual]}" 
+    cellranger_count: "cellranger/cellranger_count/{0[Species_ID]}/{0[individual]}/outs"
+    construct_sce_objects: "sce_objects/01_cellranger_output/{0[Species_ID]}/sce_{0[individual]}-01"
+  # define specific target files
+  target_files:
+    samples_sheet: "metadata.csv"
+references:
+  all_masked: "/omics/groups/OE0538/internal/shared_data/CellRangerReferences/GRCm38_masked_allStrains/"
+metadata:
+  table:
+  # should have same format as or be based on OTP export sheet
+   - "/omics/odcf/analysis/OE0538_projects/DO-0008/metadata/OE0538_DO-0008_metadata_combined.csv"
+  identifiers: 
+  # define one or multiple column name(s) that uniquely define a sample/object 
+  # multiple cols are concatenated into one sample-specific name
+  # in the format of {Species_ID}_{Age_ID}_{Fraction_ID}_{Sample_NR}
+    - Species_ID # columns I manually added to metadata sheet 
+    - Age_ID
+    - Fraction_ID 
+    - Sample_NR
+    #- PID # columns included in OTP metadata sheet
+    #- Sample_Type 
+  single_cell_object_metadata_fields:
+  # Define all columns from the metadata spreadsheet that
+  # will be included in the SingleCellExperiment / Seurat objects
+      - Object_ID # columns I manually added to metadata sheet
+      - Mouse_ID
+      - Species_ID
+      - Age
+      - Age_ID
+      - Age_weeks
+      - Fraction
+      - Fraction_ID
+      - Antibody_combination
+      - Sample_NR
+      - Extrarun
+      - Batch_exp_day
+      - Batch_sequencing
+      - Date_collected
+      - Keep_sample
+      - individual
+      - PID
+# Enable / Disable rules and specifiy rule-specific parameters
+rules:
+  cellranger_count:
+    extra: ""  # set additional arguments for cellranger count
+  allele_specific: False
+  wasp_filter_reads: False
+  realign_bam: False
\ No newline at end of file
diff --git a/snakemake_cellranger.yaml b/snakemake_cellranger.yaml
new file mode 100644
index 0000000..d44ce8f
--- /dev/null
+++ b/snakemake_cellranger.yaml
@@ -0,0 +1,181 @@
+# this env is required, please install and activate for running snakemake
+name: snakemake-cellranger
+channels:
+- conda-forge
+- bioconda
+- defaults
+dependencies:
+- libsqlite=3.39.4=h753d276_0
+- toposort=1.7=pyhd8ed1ab_0
+- importlib-metadata=5.0.0=pyha770c72_1
+- jinja2=3.1.2=pyhd8ed1ab_1
+- exceptiongroup=1.0.1=pyhd8ed1ab_0
+- veracitools=0.1.3=py_0
+- google-cloud-core=2.3.2=pyhd8ed1ab_0
+- coincbc=2.10.8=0_metapackage
+- oauth2client=4.1.3=py_0
+- ubiquerg=0.6.2=pyhd8ed1ab_0
+- google-auth=2.14.0=pyh1a96a4e_0
+- python-dateutil=2.8.2=pyhd8ed1ab_0
+- liblapacke=3.9.0=16_linux64_openblas
+- micromamba=1.0.0=1
+- coin-or-clp=1.17.7=hc56784d_2
+- aioeasywebdav=2.4.0=pyha770c72_0
+- libgomp=12.2.0=h65d4601_19
+- ply=3.11=py_1
+- importlib_resources=5.10.0=pyhd8ed1ab_0
+- libgfortran5=12.2.0=h337968e_19
+- datrie=0.8.2=py310h5764c6d_6
+- pip=22.3.1=pyhd8ed1ab_0
+- packaging=21.3=pyhd8ed1ab_0
+- dataclasses=0.8=pyhc8e2a94_3
+- pycparser=2.21=pyhd8ed1ab_0
+- configargparse=1.5.3=pyhd8ed1ab_0
+- urllib3=1.26.11=pyhd8ed1ab_0
+- colorama=0.4.6=pyhd8ed1ab_0
+- yarl=1.8.1=py310h5764c6d_0
+- psutil=5.9.4=py310h5764c6d_0
+- plac=1.3.5=pyhd8ed1ab_0
+- certifi=2022.9.24=pyhd8ed1ab_0
+- markupsafe=2.1.1=py310h5764c6d_2
+- toolz=0.12.0=pyhd8ed1ab_0
+- cachetools=5.2.0=pyhd8ed1ab_0
+- google-crc32c=1.1.2=py310he8fe98e_4
+- amply=0.1.5=pyhd8ed1ab_0
+- reretry=0.11.1=pyhd8ed1ab_0
+- c-ares=1.18.1=h7f98852_0
+- libsodium=1.0.18=h36c2ea0_1
+- peppy=0.35.2=pyhd8ed1ab_0
+- snakemake=7.18.1=hdfd78af_0
+- pytz=2022.6=pyhd8ed1ab_0
+- pytest=7.2.0=pyhd8ed1ab_2
+- libcblas=3.9.0=16_linux64_openblas
+- httplib2=0.21.0=pyhd8ed1ab_0
+- yte=1.5.1=py310hff52083_1
+- pyrsistent=0.19.2=py310h5764c6d_0
+- libgrpc=1.49.1=h30feacc_1
+- pulp=2.7.0=py310hff52083_0
+- attrs=22.1.0=pyh71513ae_1
+- pandas=1.5.1=py310h769672d_1
+- multidict=6.0.2=py310h5764c6d_2
+- connection_pool=0.0.3=pyhd3deb0d_0
+- _libgcc_mutex=0.1=conda_forge
+- google-api-core=2.10.2=pyhd8ed1ab_0
+- smmap=3.0.5=pyh44b312d_0
+- pygments=2.13.0=pyhd8ed1ab_0
+- wheel=0.38.3=pyhd8ed1ab_0
+- docutils=0.19=py310hff52083_1
+- ftputil=5.0.4=pyhd8ed1ab_0
+- conda=22.9.0=py310hff52083_2
+- gitdb=4.0.9=pyhd8ed1ab_0
+- aiohttp=3.8.3=py310h5764c6d_1
+- libgfortran-ng=12.2.0=h69a702a_19
+- stopit=1.1.2=py_0
+- defusedxml=0.7.1=pyhd8ed1ab_0
+- liblapack=3.9.0=16_linux64_openblas
+- backports=1.0=py_2
+- numpy=1.23.4=py310h53a5b5f_1
+- iniconfig=1.1.1=pyh9f0ad1d_0
+- snakemake-minimal=7.18.1=pyhdfd78af_0
+- coin-or-cbc=2.10.8=h3786ebc_0
+- tzdata=2022f=h191b570_0
+- readline=8.1.2=h0f457ee_0
+- frozenlist=1.3.3=py310h5764c6d_0
+- filelock=3.8.0=pyhd8ed1ab_0
+- ruamel_yaml=0.15.80=py310h5764c6d_1008
+- pycosat=0.6.4=py310h5764c6d_1
+- logmuse=0.2.6=pyh8c360ce_0
+- boto3=1.26.5=pyhd8ed1ab_0
+- pyparsing=3.0.9=pyhd8ed1ab_0
+- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
+- pyu2f=0.1.5=pyhd8ed1ab_0
+- protobuf=4.21.9=py310hd8f1fbe_0
+- conda-package-handling=1.9.0=py310h5764c6d_1
+- pysftp=0.2.9=py_1
+- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
+- python-fastjsonschema=2.16.2=pyhd8ed1ab_0
+- ld_impl_linux-64=2.39=hc81fddc_0
+- ncurses=6.3=h27087fc_1
+- googleapis-common-protos=1.56.4=py310hff52083_1
+- google-cloud-storage=2.6.0=pyh1a96a4e_0
+- slacker=0.14.0=py_0
+- libffi=3.4.2=h7f98852_5
+- _openmp_mutex=4.5=2_gnu
+- libzlib=1.2.13=h166bdaf_4
+- coin-or-utils=2.11.6=h202d8b1_2
+- libabseil=20220623.0=cxx17_h48a1fff_5
+- prettytable=3.4.1=pyhd8ed1ab_0
+- cffi=1.15.1=py310h255011f_2
+- typing-extensions=4.4.0=hd8ed1ab_0
+- pyyaml=6.0=py310h5764c6d_5
+- dropbox=11.35.0=pyhd8ed1ab_0
+- python_abi=3.10=2_cp310
+- nbformat=5.7.0=pyhd8ed1ab_0
+- libnsl=2.0.0=h7f98852_0
+- stone=3.3.1=pyhd8ed1ab_0
+- libblas=3.9.0=16_linux64_openblas
+- cryptography=38.0.3=py310h600f1e7_0
+- google-auth-httplib2=0.1.0=pyhd8ed1ab_1
+- tk=8.6.12=h27826a3_0
+- libopenblas=0.3.21=pthreads_h78a6416_3
+- pyasn1-modules=0.2.7=py_0
+- jsonschema=4.17.0=pyhd8ed1ab_0
+- coin-or-cgl=0.60.6=h6f57e76_2
+- dpath=2.0.6=py310hff52083_2
+- google-api-python-client=2.65.0=pyhd8ed1ab_0
+- setuptools-scm=7.0.5=pyhd8ed1ab_1
+- rsa=4.9=pyhd8ed1ab_0
+- pyasn1=0.4.8=py_0
+- wcwidth=0.2.5=pyh9f0ad1d_2
+- tqdm=4.64.1=pyhd8ed1ab_0
+- traitlets=5.5.0=pyhd8ed1ab_0
+- wrapt=1.14.1=py310h5764c6d_1
+- zipp=3.10.0=pyhd8ed1ab_0
+- botocore=1.29.5=pyhd8ed1ab_0
+- idna=3.4=pyhd8ed1ab_0
+- google-resumable-media=2.4.0=pyhd8ed1ab_0
+- bcrypt=3.2.2=py310h5764c6d_1
+- attmap=0.13.2=pyhd8ed1ab_0
+- requests=2.28.1=pyhd8ed1ab_1
+- xz=5.2.6=h166bdaf_0
+- grpcio=1.49.1=py310hc32fa93_1
+- libprotobuf=3.21.9=h6239696_0
+- gitpython=3.1.29=pyhd8ed1ab_0
+- s3transfer=0.6.0=pyhd8ed1ab_0
+- yaml=0.2.5=h7f98852_2
+- ratelimiter=1.2.0=pyhd8ed1ab_1003
+- bzip2=1.0.8=h7f98852_4
+- ca-certificates=2022.9.24=ha878542_0
+- uritemplate=4.1.1=pyhd8ed1ab_0
+- future=0.18.2=pyhd8ed1ab_6
+- jupyter_core=4.11.2=py310hff52083_0
+- pluggy=1.0.0=pyhd8ed1ab_5
+- brotlipy=0.7.0=py310h5764c6d_1005
+- jmespath=1.0.1=pyhd8ed1ab_0
+- setuptools=65.5.1=pyhd8ed1ab_0
+- libcrc32c=1.1.2=h9c3ff4c_0
+- libstdcxx-ng=12.2.0=h46fd767_19
+- commonmark=0.9.1=py_0
+- zlib=1.2.13=h166bdaf_4
+- tabulate=0.9.0=pyhd8ed1ab_1
+- re2=2022.06.01=h27087fc_0
+- appdirs=1.4.4=pyh9f0ad1d_0
+- aiosignal=1.3.1=pyhd8ed1ab_0
+- paramiko=2.12.0=pyhd8ed1ab_0
+- filechunkio=1.8=py_2
+- charset-normalizer=2.1.1=pyhd8ed1ab_0
+- python-irodsclient=1.1.5=pyhd8ed1ab_0
+- rich=12.6.0=pyhd8ed1ab_0
+- async-timeout=4.0.2=pyhd8ed1ab_0
+- six=1.16.0=pyh6c4a22f_0
+- coin-or-osi=0.108.7=h2720bb7_2
+- pyopenssl=22.1.0=pyhd8ed1ab_0
+- libuuid=2.32.1=h7f98852_1000
+- pysocks=1.7.1=pyha2e5f31_6
+- openssl=3.0.7=h166bdaf_0
+- typing_extensions=4.4.0=pyha770c72_0
+- python=3.10.6=ha86cf86_0_cpython
+- pynacl=1.5.0=py310h5764c6d_2
+- smart_open=6.2.0=pyha770c72_0
+- tomli=2.0.1=pyhd8ed1ab_0
+- libgcc-ng=12.2.0=h65d4601_19
\ No newline at end of file
diff --git a/utils/process_otp_metadata.py b/utils/process_otp_metadata.py
new file mode 100644
index 0000000..10ca797
--- /dev/null
+++ b/utils/process_otp_metadata.py
@@ -0,0 +1,13 @@
+import pandas as pd
+
+
+def rename_date_of_birth(row: pd.Series):
+    """Unify the 'BIRTH' and 'DATE_OF_BIRTH' columns into
+    a single 'DATE_OF_BIRTH' column.
+    """
+    values = [row["BIRTH"], row["DATE_OF_BIRTH"]]
+    dates = [val for val in values if not pd.isna(val)]
+    if len(dates) == 1:
+        return dates[0]
+    elif len(dates) != 1:
+        return pd.NA
diff --git a/workflow/Snakefile b/workflow/Snakefile
index b035d0c..8c705fb 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -6,27 +6,24 @@ from scripts.samples import Samples
 
 samples = Samples(config)
 
-REFERENCES = config["references"]
+METADATA_PATH = config["metadata"]["table"]
 OUTPUT_BASE_PATH = config["paths"]["output_dir"]
 
-def get_sample_string(identifiers):
-    return "_".join(["{" + label + "}" for label in identifiers])
-
-SAMPLE_WILDCARDS = get_sample_string(config['metadata']['identifiers'])
+REFERENCES = config["references"]
+IDENTIFIERS = config["metadata"]["identifiers"]
 
 print(samples.metadata)
 
 localrules: link_files, write_samples_spreadsheet, all  # local execution of non-demanding rules
 
-wildcard_constraints:
-    sample_type="[A-z0-9_-]+",
-    individual="[A-z0-9_-]+"
+wildcard_constraints: #should not contain ".", see rule cellranger_count
+    individual="[A-z0-9_-]+",
+    Species_ID="[A-z0-9_-]+"
 
 rule all:
     input:
         samples.targets
 
-
 rule link_files:
     """Link files with filename pattern required for Cellranger
     
@@ -41,9 +38,9 @@ rule link_files:
 
         )
     output:
-        directory(f"{OUTPUT_BASE_PATH}/linked_files/{{individual}}/{{sample_type}}/")
+       directory(f"{OUTPUT_BASE_PATH}/cellranger/linked_files/{{Species_ID}}/{{individual}}")
     log:
-        "logs/link_files_{individual}_{sample_type}.log"
+        f"{OUTPUT_BASE_PATH}/cellranger/logs/{{Species_ID}}/link_files_{{individual}}.log"
     run:
         log_fh  = open(log[0], "w")
         os.makedirs(output[0], exist_ok=True)
@@ -57,9 +54,9 @@ rule link_files:
 
 rule write_samples_spreadsheet:
     output:
-        f"{OUTPUT_BASE_PATH}/{config['paths']['target_files']['samples_sheet']}"
+        f"{OUTPUT_BASE_PATH}/cellranger/{config['paths']['target_files']['samples_sheet']}"
     log:
-        "logs/write_samples_spreadsheet.log"
+        f"{OUTPUT_BASE_PATH}/cellranger/logs/write_samples_spreadsheet.log"
     run:
         with open(log[0], "w") as log_fh:
             print("storing samples metadata to {output[0]}", file=log_fh)
@@ -75,23 +72,47 @@ rule cellranger_count:
     input:
         rules.link_files.output
     output:
-        outs_dir=f"{OUTPUT_BASE_PATH}/cellranger_count/{{individual}}_{{sample_type}}/outs"
+        outs_dir=directory(f"{OUTPUT_BASE_PATH}/cellranger/cellranger_count/{{Species_ID}}/{{individual}}/outs")
     params:
         genome=REFERENCES["all_masked"],
         output_root_dir=lambda wildcards, output: Path(output[0]).parents[1],  # remove last 2 levels from output path
         extra=config["rules"]["cellranger_count"]["extra"]
     log:
-        "logs/cellranger_count_{individual}_{sample_type}.log"
+        f"{OUTPUT_BASE_PATH}/cellranger/logs/{{Species_ID}}/cellranger_count_{{individual}}.log"
     threads: 16
     envmodules:
         "cellranger/6.1.1",
     shell:
         "cd {params.output_root_dir}; "
+        "pwd; "
         "cellranger count "
-        "--id {wildcards.individual}_{wildcards.sample_type} "  # letters, numbers, hyphens, and underscores
+        "--id {wildcards.individual} "  # letters, numbers, hyphens, and underscores
         "--transcriptome {params.genome} "
         "--fastqs {input} "
         "--localcores={threads} "
         "{params.extra} "
-        "--sample {wildcards.individual}_{wildcards.sample_type} "
+        "--sample {wildcards.individual} "
         "> {log}"
+
+
+rule construct_sce_objects:
+    """construct SCE objects from counts matrices
+    
+    Initially, building the environment might take some time.
+    
+    Note: output must match target_templates in configuration YAML.
+    """
+    input: 
+        output_cellranger = rules.cellranger_count.output
+    output:
+        sce_objects = f"{OUTPUT_BASE_PATH}/sce_objects/01_cellranger_output/{{Species_ID}}/sce_{{individual}}-01"
+    params:
+        identifiers = IDENTIFIERS,
+        metadata = METADATA_PATH,
+        single_cell_object_metadata_fields = config["metadata"]["single_cell_object_metadata_fields"]
+    threads: 2
+    conda: 
+        "envs/construct_sce_objects.yaml" 
+    script:
+        "scripts/construct_sce_objects.R"
+        
diff --git a/workflow/envs/construct_sce_objects.yaml b/workflow/envs/construct_sce_objects.yaml
new file mode 100644
index 0000000..bd49ff1
--- /dev/null
+++ b/workflow/envs/construct_sce_objects.yaml
@@ -0,0 +1,243 @@
+# this environment is automatically created by snakemake
+name: construct-sce-objects
+channels:
+- conda-forge
+- bioconda
+- defaults
+dependencies:
+- r-dbplyr=2.3.0=r41hc72bb7e_0
+- bioconductor-delayedmatrixstats=1.16.0=r41hdfd78af_0
+- font-ttf-ubuntu=0.83=hab24e00_0
+- libsanitizer=12.2.0=h46fd767_19
+- bwidget=1.9.14=ha770c72_1
+- r-bit=4.0.5=r41h06615bd_0
+- libcurl=7.86.0=h7bff187_1
+- libpng=1.6.39=h753d276_0
+- r-digest=0.6.31=r41h38f115c_0
+- r-fastmap=1.1.0=r41h7525677_1
+- r-cpp11=0.4.3=r41hc72bb7e_0
+- _r-mutex=1.0.1=anacondar_1
+- r-stringi=1.7.12=r41h1ae9187_0
+- r-forcats=0.5.2=r41hc72bb7e_1
+- r-withr=2.5.0=r41hc72bb7e_1
+- r-crayon=1.5.2=r41hc72bb7e_1
+- r-highr=0.10=r41hc72bb7e_0
+- r-sys=3.4.1=r41h06615bd_0
+- r-formatr=1.14=r41hc72bb7e_0
+- r-base=4.1.3=h2f963a2_5
+- bioconductor-delayedarray=0.20.0=r41hc0cfd56_2
+- libstdcxx-ng=12.2.0=h46fd767_19
+- tktable=2.10=hb7b940f_3
+- r-sass=0.4.5=r41h38f115c_0
+- lerc=4.0.0=h27087fc_0
+- libgcc-devel_linux-64=12.2.0=h3b97bd3_19
+- bioconductor-scuttle=1.4.0=r41hc247a5b_2
+- libgfortran5=12.2.0=h337968e_19
+- r-colorspace=2.1_0=r41h133d619_0
+- r-ids=1.0.1=r41hc72bb7e_2
+- libstdcxx-devel_linux-64=12.2.0=h3b97bd3_19
+- jpeg=9e=h166bdaf_2
+- r-callr=3.7.3=r41hc72bb7e_0
+- r-curl=4.3.3=r41h06615bd_1
+- fribidi=1.0.10=h36c2ea0_0
+- krb5=1.19.3=h3790be6_0
+- bioconductor-xvector=0.34.0=r41hc0cfd56_2
+- bioconductor-biocparallel=1.28.3=r41hc247a5b_1
+- libgcc-ng=12.2.0=h65d4601_19
+- r-r.utils=2.12.2=r41hc72bb7e_0
+- r-lubridate=1.9.1=r41h133d619_0
+- r-base64enc=0.1_3=r41h06615bd_1005
+- r-readr=2.1.3=r41h7525677_1
+- ca-certificates=2022.12.7=ha878542_0
+- xorg-kbproto=1.0.7=h7f98852_1002
+- r-xml2=1.3.3=r41h044e5c7_2
+- r-scales=1.2.1=r41hc72bb7e_1
+- sed=4.8=he412f7d_0
+- harfbuzz=6.0.0=h8e241bc_0
+- readline=8.1.2=h0f457ee_0
+- zstd=1.5.2=h3eb15da_6
+- r-rcurl=1.98_1.9=r41h06615bd_1
+- libuuid=2.32.1=h7f98852_1000
+- zlib=1.2.13=h166bdaf_4
+- r-r6=2.5.1=r41hc72bb7e_1
+- bioconductor-edger=3.36.0=r41hc247a5b_2
+- libzlib=1.2.13=h166bdaf_4
+- r-rcolorbrewer=1.1_3=r41h785f33e_1
+- libedit=3.1.20191231=he28a2e2_2
+- r-farver=2.1.1=r41h7525677_1
+- r-lattice=0.20_45=r41h06615bd_1
+- libgfortran-ng=12.2.0=h69a702a_19
+- fonts-conda-ecosystem=1=0
+- r-glue=1.6.2=r41h06615bd_1
+- xorg-libice=1.0.10=h7f98852_0
+- r-utf8=1.2.2=r41h06615bd_1
+- xorg-libxt=1.2.1=h7f98852_2
+- pandoc=2.19.2=h32600fe_1
+- bioconductor-genomicranges=1.46.1=r41hc0cfd56_1
+- r-pkgconfig=2.0.3=r41hc72bb7e_2
+- r-rstudioapi=0.14=r41hc72bb7e_1
+- r-memoise=2.0.1=r41hc72bb7e_1
+- r-futile.options=1.0.1=r41hc72bb7e_1003
+- r-snow=0.4_4=r41hc72bb7e_1
+- r-httr=1.4.4=r41hc72bb7e_1
+- libffi=3.4.2=h7f98852_5
+- _openmp_mutex=4.5=2_gnu
+- sysroot_linux-64=2.12=he073ed8_15
+- keyutils=1.6.1=h166bdaf_0
+- bioconductor-biobase=2.54.0=r41hc0cfd56_2
+- r-googledrive=2.0.0=r41hc72bb7e_1
+- r-rvest=1.0.3=r41hc72bb7e_1
+- _libgcc_mutex=0.1=conda_forge
+- bioconductor-singlecellexperiment=1.16.0=r41hdfd78af_0
+- r-yaml=2.3.7=r41h133d619_0
+- r-selectr=0.4_2=r41hc72bb7e_2
+- r-generics=0.1.3=r41hc72bb7e_1
+- r-backports=1.4.1=r41h06615bd_1
+- r-ps=1.7.2=r41h06615bd_0
+- font-ttf-inconsolata=3.000=h77eed37_0
+- fonts-conda-forge=1=0
+- bioconductor-genomeinfodbdata=1.2.7=r41hdfd78af_2
+- r-gargle=1.2.1=r41hc72bb7e_1
+- r-viridislite=0.4.1=r41hc72bb7e_1
+- r-nlme=3.1_161=r41hac0b197_0
+- r-hms=1.1.2=r41hc72bb7e_1
+- r-munsell=0.5.0=r41hc72bb7e_1005
+- ld_impl_linux-64=2.39=hcc3a1bd_1
+- curl=7.86.0=h7bff187_1
+- c-ares=1.18.1=h7f98852_0
+- r-sitmo=2.0.2=r41h7525677_1
+- r-locfit=1.5_9.7=r41h133d619_0
+- bioconductor-matrixgenerics=1.6.0=r41hdfd78af_0
+- r-tzdb=0.3.0=r41h7525677_1
+- pango=1.50.12=hd33c08f_1
+- r-jsonlite=1.8.4=r41h133d619_0
+- libev=4.33=h516909a_1
+- libwebp-base=1.2.4=h166bdaf_0
+- bioconductor-dropletutils=1.14.2=r41hc247a5b_1
+- r-dplyr=1.0.10=r41h7525677_1
+- bioconductor-beachmat=2.10.0=r41hc247a5b_2
+- r-vroom=1.6.1=r41h38f115c_0
+- r-data.table=1.14.6=r41h06615bd_0
+- pixman=0.40.0=h36c2ea0_0
+- r-tibble=3.1.8=r41h06615bd_1
+- r-xfun=0.36=r41h38f115c_0
+- bioconductor-sparsematrixstats=1.6.0=r41hc247a5b_2
+- r-evaluate=0.20=r41hc72bb7e_0
+- xorg-xproto=7.0.31=h7f98852_1007
+- r-jquerylib=0.1.4=r41hc72bb7e_1
+- xorg-libxrender=0.9.10=h7f98852_1003
+- r-mass=7.3_58.2=r41h133d619_0
+- r-r.oo=1.25.0=r41hc72bb7e_1
+- r-rematch2=2.1.2=r41hc72bb7e_2
+- r-mime=0.12=r41h06615bd_1
+- liblapack=3.9.0=16_linux64_openblas
+- r-processx=3.8.0=r41h06615bd_0
+- r-blob=1.2.3=r41hc72bb7e_1
+- libblas=3.9.0=16_linux64_openblas
+- bioconductor-zlibbioc=1.40.0=r41hc0cfd56_2
+- r-haven=2.5.1=r41h7525677_0
+- r-timechange=0.2.0=r41h38f115c_0
+- r-matrixstats=0.63.0=r41h06615bd_0
+- r-assertthat=0.2.1=r41hc72bb7e_3
+- xorg-xextproto=7.3.0=h7f98852_1002
+- bioconductor-s4vectors=0.32.4=r41hc0cfd56_0
+- r-rappdirs=0.3.3=r41h06615bd_1
+- r-ggplot2=3.4.0=r41hc72bb7e_1
+- xz=5.2.6=h166bdaf_0
+- libgomp=12.2.0=h65d4601_19
+- r-cachem=1.0.6=r41h06615bd_1
+- r-tidyverse=1.3.2=r41hc72bb7e_1
+- r-clipr=0.8.0=r41hc72bb7e_1
+- r-rcpp=1.0.10=r41h38f115c_0
+- font-ttf-source-code-pro=2.038=h77eed37_0
+- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+- bioconductor-iranges=2.28.0=r41hc0cfd56_2
+- libnghttp2=1.51.0=hdcd2b5c_0
+- bioconductor-rhdf5lib=1.16.0=r41hc0cfd56_2
+- r-fansi=1.0.4=r41h133d619_0
+- kernel-headers_linux-64=2.6.32=he073ed8_15
+- gcc_impl_linux-64=12.2.0=hcc96c02_19
+- r-pillar=1.8.1=r41hc72bb7e_1
+- gettext=0.21.1=h27087fc_0
+- libiconv=1.17=h166bdaf_0
+- r-bitops=1.0_7=r41h06615bd_1
+- bioconductor-hdf5array=1.22.1=r41hc0cfd56_1
+- r-purrr=1.0.1=r41h133d619_0
+- r-broom=1.0.3=r41hc72bb7e_0
+- r-bh=1.81.0_1=r41hc72bb7e_0
+- libxml2=2.10.3=h7463322_0
+- r-isoband=0.2.7=r41h38f115c_1
+- r-futile.logger=1.4.3=r41hc72bb7e_1004
+- r-reprex=2.0.2=r41hc72bb7e_1
+- r-openssl=2.0.5=r41hb1dc35e_0
+- r-lifecycle=1.0.3=r41hc72bb7e_1
+- r-stringr=1.5.0=r41h785f33e_0
+- r-tidyselect=1.2.0=r41hc72bb7e_0
+- r-lambda.r=1.2.4=r41hc72bb7e_2
+- expat=2.5.0=h27087fc_0
+- r-r.methodss3=1.8.2=r41hc72bb7e_1
+- r-dbi=1.1.3=r41hc72bb7e_1
+- r-rmarkdown=2.20=r41hc72bb7e_0
+- xorg-libx11=1.7.2=h7f98852_0
+- bzip2=1.0.8=h7f98852_4
+- r-cli=3.6.0=r41h38f115c_0
+- r-gtable=0.3.1=r41hc72bb7e_1
+- libtiff=4.5.0=h6adf6a1_2
+- libopenblas=0.3.21=pthreads_h78a6416_3
+- tk=8.6.12=h27826a3_0
+- bioconductor-genomeinfodb=1.30.1=r41hdfd78af_0
+- bioconductor-limma=3.50.3=r41hc0cfd56_0
+- xorg-libxau=1.0.9=h7f98852_0
+- r-cellranger=1.1.0=r41hc72bb7e_1005
+- make=4.3=hd18ef5c_1
+- r-rematch=1.0.1=r41hc72bb7e_1005
+- xorg-renderproto=0.11.1=h7f98852_1002
+- graphite2=1.3.13=h58526e2_1001
+- r-htmltools=0.5.4=r41h38f115c_0
+- r-mgcv=1.8_41=r41h5f7b363_0
+- r-knitr=1.42=r41hc72bb7e_1
+- libdeflate=1.17=h0b41bf4_0
+- bioconductor-rhdf5filters=1.6.0=r41hc247a5b_2
+- fontconfig=2.14.1=hc2a2eb6_0
+- r-bslib=0.4.2=r41hc72bb7e_0
+- gfortran_impl_linux-64=12.2.0=h55be85b_19
+- r-dqrng=0.3.0=r41h7525677_1
+- libxcb=1.13=h7f98852_1004
+- xorg-libsm=1.2.3=hd9c2040_1000
+- r-googlesheets4=1.0.1=r41h785f33e_1
+- r-prettyunits=1.1.1=r41hc72bb7e_2
+- libcblas=3.9.0=16_linux64_openblas
+- bioconductor-rhdf5=2.38.1=r41hbe1951d_0
+- binutils_impl_linux-64=2.39=he00db2b_1
+- icu=70.1=h27087fc_0
+- pcre2=10.40=hc3806b6_0
+- r-rlang=1.0.6=r41h7525677_1
+- r-readxl=1.4.1=r41h3ebcfa7_1
+- pthread-stubs=0.4=h36c2ea0_1001
+- r-ellipsis=0.3.2=r41h06615bd_1
+- r-magrittr=2.0.3=r41h06615bd_1
+- openssl=1.1.1s=h0b41bf4_1
+- gxx_impl_linux-64=12.2.0=hcc96c02_19
+- r-progress=1.2.2=r41hc72bb7e_3
+- r-fs=1.6.0=r41h38f115c_0
+- xorg-libxext=1.3.4=h7f98852_1
+- gsl=2.7=he838d99_0
+- r-tidyr=1.3.0=r41h38f115c_0
+- libssh2=1.10.0=haa6b8db_3
+- r-bit64=4.0.5=r41h06615bd_1
+- ncurses=6.3=h27087fc_1
+- r-matrix=1.5_3=r41h5f7b363_0
+- r-uuid=1.1_0=r41h06615bd_1
+- cairo=1.16.0=ha61ee94_1014
+- libglib=2.74.1=h606061b_1
+- r-vctrs=0.5.2=r41h38f115c_0
+- freetype=2.12.1=hca18f0e_1
+- r-tinytex=0.43=r41hc72bb7e_0
+- xorg-libxdmcp=1.1.3=h7f98852_0
+- bioconductor-biocgenerics=0.40.0=r41hdfd78af_0
+- r-dtplyr=1.2.2=r41hc72bb7e_2
+- r-modelr=0.1.10=r41hc72bb7e_0
+- r-askpass=1.1=r41h06615bd_3
+- bioconductor-summarizedexperiment=1.24.0=r41hdfd78af_0
+- r-labeling=0.4.2=r41hc72bb7e_2
+  
\ No newline at end of file
diff --git a/workflow/scripts/construct_sce_objects.R b/workflow/scripts/construct_sce_objects.R
new file mode 100644
index 0000000..3e21d8f
--- /dev/null
+++ b/workflow/scripts/construct_sce_objects.R
@@ -0,0 +1,44 @@
+#-------------------------------------------------------------------------------
+
+library(tidyverse, quietly = TRUE) 
+library(DropletUtils, quietly = TRUE) 
+set.seed(1)
+
+individual_curr <- snakemake@wildcards[["individual"]] # currently loaded individual sample 
+IDENTIFIERS <- snakemake@params[["identifiers"]] 
+
+# construct SCE object from raw cellranger output
+print(paste0(snakemake@input[["output_cellranger"]], "/raw_feature_bc_matrix"))
+sce <- read10xCounts(samples = paste0(snakemake@input[["output_cellranger"]], 
+                                      "/raw_feature_bc_matrix"), 
+                     col.names = TRUE, 
+                     type = "sparse" )
+
+# add specified metadata to object 
+metadata <- read.csv(file = snakemake@params[["metadata"]], 
+                     head = TRUE, 
+                     sep = ",", 
+                     check.names=FALSE, 
+                     stringsAsFactors=FALSE, 
+                     as.is=TRUE, 
+                     colClasses = "character")
+
+# if necessary, concatenate identifiers again to obtain all possible wildcards 
+if(! "individual" %in% colnames(metadata)){
+  metadata <- metadata %>%
+    unite(col = "individual", all_of(IDENTIFIERS), sep = "_", remove = FALSE)
+  print('establishing "individual" column, construct_sce_objects.R')
+}
+
+# subset data as specified by wildcard and single_cell_object_metadata_fields
+metadata <- metadata |> filter(individual == individual_curr)
+cols_add <- snakemake@params[["single_cell_object_metadata_fields"]]
+metadata <- metadata[,colnames(metadata) %in% cols_add]
+
+# add cell-level metadata to each barcode in the SCE object
+for(i in colnames(metadata)){
+  colData(sce)[i] <- rep(metadata[1,i], ncol(sce))
+}
+
+print(sce)
+saveRDS(sce, file = snakemake@output[["sce_objects"]])
\ No newline at end of file
diff --git a/workflow/scripts/samples.py b/workflow/scripts/samples.py
index bf9c40a..a41a331 100644
--- a/workflow/scripts/samples.py
+++ b/workflow/scripts/samples.py
@@ -2,83 +2,65 @@
 
 from snakemake.io import Wildcards
 
-
 class Samples:
     """
     Convert the OTP-exported metadata spreadsheet into a pandas.DataFrame
     that provides metadata for the workflow.
-
-
     """
-    # columns to select
-    columns = ["PID",
-               "Sample Type",
-               "Species Scientific Name",
-               "STRAIN",
-               "FastQ Path",
-               "FastQC Path",
-               "Run ID",
-               "ILSE_NO",
-               "TISSUE",
-               "BIRTH",
-               "DATE_OF_BIRTH",
-               "DATE_OF_DEATH",
-               "LANE_NO",
-               "READ",
-               "CELLRANGER_FASTQ_PATH"]
-
-    # map to rename columns in the format of (old):(new)
-    columns_map = {
-        "PID": "individual",
-        "Sample Type": "sample_type",
-    }
-
+               
     def __init__(self, config):
-        metadata_files = config["metadata"]["raw"]
+        IDENTIFIERS = config["metadata"]["identifiers"]
+        metadata_files = config["metadata"]["table"]
 
         self.output_base_dir = config["paths"]["output_dir"]
         self.target_templates = config["paths"]["target_templates"]
+        
+        # columns to select for subsampling 
+        columns = ["PID",
+                   "Sample Type",
+                   "Species Scientific Name",
+                   "STRAIN",
+                   "FastQ Path",
+                   "FastQC Path",
+                   "Run ID",
+                   "ILSE_NO",
+                   "TISSUE",
+                   "BIRTH",
+                   "DATE_OF_BIRTH",
+                   "DATE_OF_DEATH",
+                   "LANE_NO",
+                   "READ",
+                   "CELLRANGER_FASTQ_PATH",
+                   "individual"]  # include 'individual' col to be generated below
 
         metadata_full = pd.concat((pd.read_csv(f) for f in metadata_files), ignore_index=True)
 
-        metadata_full["DATE_OF_BIRTH"] = metadata_full.apply(self.rename_date_of_birth, axis=1)
-
+        # if required, generate 'individual' column containing all concatenated identifiers
+        if not "individual" in metadata_full.columns: 
+            metadata_full['individual'] = metadata_full[IDENTIFIERS].apply('_'.join, axis=1)
+   
         metadata_full = self.get_cellranger_filename(metadata_full)
 
-        self.metadata = self.select_columns(metadata_full)
-        self.metadata = self.metadata.rename(self.columns_map, axis="columns")
-
-    def rename_date_of_birth(self,
-                             row: pd.Series):
-        """Unify the 'BIRTH' and 'DATE_OF_BIRTH' columns into
-        a single 'DATE_OF_BIRTH' column.
-        """
-        values = [row["BIRTH"], row["DATE_OF_BIRTH"]]
-        dates = [val for val in values if not pd.isna(val)]
-        if len(dates) == 1:
-            return dates[0]
-        elif len(dates) != 1:
-            return pd.NA
+        self.metadata = self.select_columns(metadata_full, custom_columns = columns + IDENTIFIERS)
 
     def get_cellranger_filename(self, df: pd.DataFrame) -> pd.DataFrame:
 
         """Add column containing CellRanger compatible filename,
         i.e. in the format of
-
         [Sample Name]_S[Sample_Number]_L00[Lane Number]_[Read Type]_001.fastq.gz
-
-        Here, [Sample Name] consists of PID and Sample Type.
-
+        
+        Here, [Sample Name] consists of "individual".
+        
         See also: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input
         """
 
-        grouped = df.groupby(["PID", "Sample Type", "LANE_NO", "READ"])
+        grouped = df.groupby(["individual", "LANE_NO", "READ"])
         groups = []
         for name, group in grouped:
             group = group.sort_values("FastQ Path")  # import to have consistent sorting
             group["multi_sample_idx"] = range(1, len(group)+1)
             group["CELLRANGER_FASTQ_PATH"] = group.agg(
-                "{0[PID]}_{0[Sample Type]}_S{0[multi_sample_idx]}_L00{0[LANE_NO]}_R{0[READ]}_001.fastq.gz".format,
+                "{0[individual]}_S{0[multi_sample_idx]}_L00{0[LANE_NO]}_R{0[READ]}_001.fastq.gz".format,
                 axis=1,
             )
             groups.append(group)
@@ -87,13 +69,19 @@ def get_cellranger_filename(self, df: pd.DataFrame) -> pd.DataFrame:
 
     def select_columns(self,
                        df: pd.DataFrame,
-                       columns: list = None):
+                       custom_columns: list = None):
         """Select/Subset columns from DataFrame to reduce
-        DataFrame dimensions. """
-        if not columns:
-            columns = self.columns
-        return df[columns]
-
+        DataFrame dimensions. A list of column names can be provided by `custom_columns` """
+        
+        if custom_columns:
+          print(custom_columns)
+          df_subset = df[custom_columns] 
+        else:
+          print(custom_columns)
+          df_subset = df[columns]
+        
+        return df_subset
+            
     @staticmethod
     def filter_by_wildcards(
             wildcards: Wildcards,
@@ -104,11 +92,10 @@ def filter_by_wildcards(
     ) -> list:
         """Get item from DataFrame by subsetting using the following index
         attributes provided via Snakemakes wildcards object.
-
         :return param:
         """
 
-        _identifiers = ["individual", "sample_type"]
+        _identifiers = ["individual", "Species_ID"] 
 
         if wildcards:
             filters = dict((k, getattr(wildcards, k)) for k in _identifiers)
@@ -138,7 +125,7 @@ def targets(self) -> list:
 
         targets = list(map(lambda x: self.metadata.agg(x.format, axis=1).drop_duplicates().to_list(), target_templates.values()))
         return targets
-
-
-
+      
+      
+      
 #grouped = metadata_subset.groupby(["PID", "Sample Type"])["FastQ Path"].apply(list)