silogen · alexander-aurell-amd · Oct 24, 2025 · Sep 8, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/.github/workflows/docs-file-copy.yml b/.github/workflows/docs-file-copy.yml
@@ -0,0 +1,35 @@
+name: Copy workload documentation to public docs repo
+# We rsync the ai-workloads documentation to a temp clone of the public docs repo
+# and commit and push the changes to the main branch of the public docs repo. Purpose is to keep the Docs repo (consolidated SiloGen docs) updated with ai-workloads repository changes.
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "docs/**"
+      - "workloads/**"
+      - ".github/workflows/docs-file-copy.yml"
+
+jobs:
+  copy-docs:
+    if: github.repository == 'silogen/ai-workloads'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout core repo
+        uses: actions/checkout@v4
+
+      - name: Push to public docs repo
+        run: |
+          git config --global user.name 'GitHub Actions'
+          git config --global user.email 'actions@github.com'
+          git clone https://x-access-token:${{ secrets.DOCS_REPO_TOKEN }}@github.com/silogen/ai-workloads.git source_docs
+          git clone https://x-access-token:${{ secrets.DOCS_REPO_TOKEN }}@github.com/silogen/docs.git target_silogen_docs
+          cd target_silogen_docs
+          rsync -av --delete --exclude='.git' ../source_docs/docs docs/ai-workloads-docs
+          rsync -av --delete --exclude='.git' ../source_docs/workloads docs/ai-workloads-manifests
+          git add .
+          git diff --staged --quiet || git commit -m "Update external docs from ai-workloads repo"
+          git push origin main
+        env:
+          DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -82,6 +82,9 @@ target/
 profile_default/
 ipython_config.py
 
+# MacOS stuff
+.DS_Store
+
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
     hooks:
       - id: check-json
       - id: check-yaml
-        exclude: templates|mkdocs.yml
+        exclude: templates|mkdocs.yml|vlm-lora-finetune
       - id: end-of-file-fixer
       - id: requirements-txt-fixer
       - id: trailing-whitespace
@@ -20,20 +20,20 @@ repos:
         args: ["--config=pyproject.toml"]
 
   - repo: https://github.com/pycqa/flake8
-    rev: 7.1.2
+    rev: 7.2.0
     hooks:
       - id: flake8
         args: ["--config=.flake8"]
 
   - repo: https://github.com/pycqa/isort
-    rev: 6.0.0
+    rev: 6.0.1
     hooks:
       - id: isort
         name: isort (python)
         args: ["--settings-path=pyproject.toml"]
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.15.0
+    rev: v1.16.0
     hooks:
       - id: mypy
         args: ["--config-file=pyproject.toml"]
@@ -43,7 +43,7 @@ repos:
           - types-PyYAML
 
   - repo: https://github.com/gruntwork-io/pre-commit
-    rev: v0.1.26
+    rev: v0.1.29
     hooks:
       - id: helmlint
         exclude: kaiwo|llm-finetune-silogen-engine
diff --git a/docker/lifescience/reinvent4/Dockerfile b/docker/lifescience/reinvent4/Dockerfile
@@ -0,0 +1,28 @@
+FROM rocm/pytorch:rocm7.0_ubuntu24.04_py3.12_pytorch_release_2.5.1
+
+# Use bash to support string substitution.
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+# Clone the Reinvet4 repository and use the stable 4.6.22 version
+RUN git clone https://github.com/MolecularAI/REINVENT4
+WORKDIR /REINVENT4
+RUN git checkout v4.6.22
+RUN wget -O priors/reinvent.prior "https://zenodo.org/records/15641297/files/reinvent.prior?download=1"
+
+# Remove torch and torchvision from pyproject.toml
+RUN sed -i.bak '/torch==/d' pyproject.toml && \
+    sed -i.bak '/torchvision /d' pyproject.toml
+
+# Now run the install script as usual
+RUN python install.py cpu
+
+COPY demo_notebooks/ notebooks/
+
+# Download the chemprop model
+RUN mkdir chemprop
+RUN wget -q --show-progress -O chemprop/model.pt "https://www.dropbox.com/scl/fi/zpnqc9at5a5dnkzfdbo6g/model.pt?rlkey=g005yli9364uptd94d60jtg5c&e=1&dl=1"
+
+# Copy entrypoint script
+COPY entrypoint.sh /
+
+CMD ["/bin/bash"]
diff --git a/docker/lifescience/reinvent4/README.md b/docker/lifescience/reinvent4/README.md
@@ -0,0 +1,31 @@
+# Running Reinvent inference interactively
+
+Connect to the pod with your favorite terminal.
+
+This repo provides an altered version of these notebooks to be runnable from the terminal with the subscript `_clean`. These can simply be run by:
+
+```sh
+python3 notebooks/<notebook_name.py>
+```
+Alternatively, Reinvent jobs can be run by:
+```sh
+reinvent -l <log_name> <config_name>
+```
+
+## Running inference job automatically (non-interactive)
+
+In order to run Reinvent jobs automatically using the above image do the following:
+- Set up config and output directory:
+
+Put your config files as well any other files needed such as datasets or priors in `CONFIG_PATH`. In `OUTPUT_PATH`, the job will write output logs.
+```sh
+export CONFIG_PATH=<local_path_to_configs>
+export OUTPUT_PATH=<local_path_to_output>
+```
+
+Then, the following command will run the job:
+
+```sh
+docker run --rm -v $CONFIG_PATH:/data -v $OUTPUT_PATH:/output  --device=/dev/kfd --device=/dev/dri/renderD<RENDER_ID> rocm-reinvent /data/<config_file>.toml /output/<output_log_name>
+```
+where the last two arguments provide paths to the config file to run as well where to save outputs.
diff --git a/docker/lifescience/reinvent4/demo_notebooks/Reinvent_TLRL_clean.py b/docker/lifescience/reinvent4/demo_notebooks/Reinvent_TLRL_clean.py
@@ -0,0 +1,223 @@
+# This script is based on the file notebooks/Reinvent_TLRL.py:
+# https://github.com/MolecularAI/REINVENT4/blob/main/notebooks/Reinvent_TLRL.py
+
+import os
+import re
+import shutil
+import subprocess
+
+import pandas as pd
+
+
+def main():
+    wd = "R4_TLRL_output"
+
+    # Delete existing working directory and create a new one
+    if not os.path.isdir(wd):
+        shutil.rmtree(wd, ignore_errors=True)
+        os.mkdir(wd)
+    os.chdir(wd)
+
+    # Write config file
+    prior_filename = "../priors/reinvent.prior"
+    agent_filename = prior_filename
+    stage1_checkpoint = "stage1.chkpt"
+    stage1_parameters = f"""
+    run_type = "staged_learning"
+    device = "cuda:0"
+    tb_logdir = "tb_stage1"
+    json_out_config = "_stage1.json"
+
+    [parameters]
+    prior_file = "{prior_filename}"
+    agent_file = "{agent_filename}"
+    summary_csv_prefix = "stage1"
+    batch_size = 100
+    use_checkpoint = false
+    sample_strategy = "beamsearch" #Additional interesting param?
+
+    [learning_strategy]
+    type = "dap"
+    sigma = 128
+    rate = 0.0001
+
+    [[stage]]
+    max_score = 1.0
+    max_steps = 5
+    chkpt_file = "{stage1_checkpoint}"
+    [stage.scoring]
+    type = "geometric_mean"
+    [[stage.scoring.component]]
+    [stage.scoring.component.custom_alerts]
+    [[stage.scoring.component.custom_alerts.endpoint]]
+    name = "Alerts"
+    params.smarts = [ "[*;r8]", "[*;r9]", "[*;r10]", "[*;r11]", "[*;r12]", "[*;r13]", "[*;r14]", "[*;r15]", "[*;r16]", "[*;r17]", "[#8][#8]", "[#6;+]", "[#16][#16]", "[#7;!n][S;!$(S(=O)=O)]", "[#7;!n][#7;!n]", "C#C", "C(=[O,S])[O,S]", "[#7;!n][C;!$(C(=[O,N])[N,O])][#16;!s]", "[#7;!n][C;!$(C(=[O,N])[N,O])][#7;!n]", "[#7;!n][C;!$(C(=[O,N])[N,O])][#8;!o]", "[#8;!o][C;!$(C(=[O,N])[N,O])][#16;!s]", "[#8;!o][C;!$(C(=[O,N])[N,O])][#8;!o]", "[#16;!s][C;!$(C(=[O,N])[N,O])][#16;!s]" ]
+    [[stage.scoring.component]]
+    [stage.scoring.component.QED]
+    [[stage.scoring.component.QED.endpoint]]
+    name = "QED"
+    weight = 0.6
+    [[stage.scoring.component]]
+    [stage.scoring.component.NumAtomStereoCenters]
+    [[stage.scoring.component.NumAtomStereoCenters.endpoint]]
+    name = "Stereo"
+    weight = 0.4
+    transform.type = "left_step"
+    transform.low = 0
+    """
+
+    stage1_config_filename = "stage1.toml"
+    with open(stage1_config_filename, "w") as tf:
+        tf.write(stage1_parameters)
+
+    # Stage 1 Reinforcement Learning
+    shutil.rmtree("tb_stage1_0", ignore_errors=True)
+
+    # Run the stage1 process using subprocess
+    print("Starting Stage 1 Reinforcement Learning...")
+    stage1_result = subprocess.run(f"reinvent {stage1_config_filename} 2>&1 | tee stage1.log", shell=True, text=True)
+    if stage1_result.returncode == 0:
+        print("Stage 1 completed.")
+    else:
+        raise RuntimeError(f"Stage 1 execution failed with exit code: {stage1_result.returncode}")
+
+    # Transfer Learning to focus the model
+    # Prepare the data
+    bdb = pd.read_csv("../notebooks/data/tnks2.csv")
+    clean = bdb[~bdb["exp (nM)"].str.match("[<>]")]
+    clean = clean.astype({"exp (nM)": "float"})
+
+    good = clean[clean["exp (nM)"] < 1000]
+    good = good[good["exp_method"] != "EC50"]
+    good = good[good["exp_method"] != "Kd"]
+    good = good.rename(columns={"exp (nM)": "IC50"})
+    good = good.drop(columns=["exp_method"])
+
+    # Write the good binders to a SMILES file
+    TL_train_filename = "tnks2_train.smi"
+    TL_validation_filename = "tnks2_validation.smi"
+    data = good.sample(frac=1)
+    n_head = int(0.8 * len(data))  # 80% of the data for training
+    n_tail = len(good) - n_head
+    print(f"number of molecules for: training={n_head}, validation={n_tail}")
+
+    train, validation = data.head(n_head), data.tail(n_tail)
+    train.to_csv(TL_train_filename, sep="\t", index=False, header=False)
+    validation.to_csv(TL_validation_filename, sep="\t", index=False, header=False)
+
+    # TL setup
+    TL_parameters = f"""
+    run_type = "transfer_learning"
+    device = "cuda:0"
+    tb_logdir = "tb_TL"
+
+    [parameters]
+    num_epochs = 1
+    save_every_n_epochs = 1
+    batch_size = 100
+    sample_batch_size = 2000
+    input_model_file = "{stage1_checkpoint}"
+    output_model_file = "TL_reinvent.model"
+    smiles_file = "{TL_train_filename}"
+    validation_smiles_file = "{TL_validation_filename}"
+    standardize_smiles = true
+    randomize_smiles = true
+    randomize_all_smiles = false
+    internal_diversity = true
+    """
+
+    TL_config_filename = "transfer_learning.toml"
+    with open(TL_config_filename, "w") as tf:
+        tf.write(TL_parameters)
+
+    # Start Transfer Learning
+    shutil.rmtree("tb_TL", ignore_errors=True)
+
+    # Run the transfer learning process using subprocess
+    print("Starting Transfer Learning...")
+    transfer_result = subprocess.run(
+        f"reinvent {TL_config_filename} 2>&1 | tee transfer_learning.log", shell=True, text=True
+    )
+    if transfer_result.returncode == 0:
+        print("Transfer learning completed.")
+    else:
+        raise RuntimeError(f"Transfer learning execution failed with exit code: {transfer_result.returncode}")
+
+    # Choose the model from transfer learning
+    TL_model_filename = "TL_reinvent.model.1.chkpt"
+
+    stage2_parameters = re.sub("stage1", "stage2", stage1_parameters)
+    stage2_parameters = re.sub("agent_file.*\n", f"agent_file = '{TL_model_filename}'\n", stage2_parameters)
+    stage2_parameters = re.sub("max_steps.*\n", "max_steps = 5\n", stage2_parameters)
+
+    # Stage 2 RL
+    # Predictive model (ChemProp)
+    chemprop_path = "../chemprop/"
+    pred_model_parameters = f"""
+    [[stage.scoring.component]]
+    [stage.scoring.component.ChemProp]
+    [[stage.scoring.component.ChemProp.endpoint]]
+    name = "ChemProp"
+    weight = 0.6
+    params.checkpoint_dir = "{chemprop_path}"
+    params.rdkit_2d_normalized = true
+    params.target_column = "DG"
+    params.features = "rdkit_2d_normalized"
+    transform.type = "reverse_sigmoid"
+    transform.high = 0.0
+    transform.low = -50.0
+    transform.k = 0.4
+    """
+
+    # Combine parameters and write to file
+    full_stage2_parameters = stage2_parameters + pred_model_parameters
+    df_parameters = """
+    [diversity_filter]
+    type = "IdenticalMurckoScaffold"
+    bucket_size = 10
+    minscore = 0.7
+    """
+    inception_parameters = """
+    [inception]
+    smiles_file = ""  # no seed SMILES
+    memory_size = 50
+    sample_size = 10
+    """
+
+    full_stage2_parameters += df_parameters + inception_parameters
+    stage2_config_filename = "stage2.toml"
+    with open(stage2_config_filename, "w") as tf:
+        tf.write(full_stage2_parameters)
+
+    # Run stage2 using subprocess
+    print("Starting Stage 2 Reinforcement Learning...")
+    stage2_result = subprocess.run(f"reinvent {stage2_config_filename} 2>&1 | tee stage2.log", shell=True, text=True)
+    if stage2_result.returncode == 0:
+        print("Stage 2 completed.")
+    else:
+        raise RuntimeError(f"Stage 2 execution failed with exit code: {stage2_result.returncode}")
+
+    # Inspect results with TensorBoard
+    # Run TensorBoard separately after REINVENT finishes
+    # subprocess.run(["tensorboard", "--bind_all", "--logdir", f"{wd}/tb_stage2_0"])
+
+    # Process the results for good binders
+    # csv_file = os.path.join(wd, "stage2_1.csv")
+    csv_file = "stage2_1.csv"
+    df = pd.read_csv(csv_file)
+    good_QED = df["QED"] > 0.8
+    good_dG = df["ChemProp (raw)"] < -25.0  # kcal/mol
+    good_binders = df[good_QED & good_dG]
+    print(len(good_binders))
+
+    # Duplicate removal
+    good_binders = good_binders.drop_duplicates(subset=["SMILES"])
+    print(len(good_binders))
+
+    # Displaying good binders
+    # grid = create_mol_grid(good_binders)
+    # display(grid)
+
+
+if __name__ == "__main__":
+    main()