Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
bf73782
GH action created for copying files to docs repository (#342)
thbergst82 Sep 8, 2025
91414aa
Inference of GenCast & PanguWeather using ai-models (#445)
ltsabadz Sep 10, 2025
30d712c
Inference of Aurora using ai-models (#355)
ltsabadz Sep 10, 2025
725dc52
Silogen-engine finetuning example / life sciences (#402)
juhok-amd Sep 10, 2025
3e671ae
Add imagePullSecrets parameter to JupyterLab and VSCode workloads (#449)
markvanheeswijk Sep 18, 2025
522c2cb
Refm life science swinunetr training (#407)
jorivesga Sep 24, 2025
e391695
Enable MLflow in Silogen Finetuning Engine (#454)
Gastron Sep 25, 2025
6589f30
Feature/xlstm inference (#346)
peyron-amd Sep 25, 2025
4eb4830
Add Feature/wan2.1 serving (#338) along with tutorial
sopiko99 Sep 26, 2025
6324d8f
Add Wan2.2 fine-tuning workload
aniemela-amd Sep 26, 2025
f6e9192
Add TorchServe Packager (#450)
sopiko99 Sep 26, 2025
361a25e
Refm robotics job for OpenSplat (#311)
mwessman-amd Sep 26, 2025
24b390b
ECMWF downloader and preprocessor workload (#411)
ppihajoki-amd Sep 29, 2025
212577c
ECMWF ai-models outputs preprocessor (#444)
ppihajoki-amd Sep 29, 2025
2f1a687
Weatherbench runner (#446)
ppihajoki-amd Sep 29, 2025
122e34f
Add toctree to the workloads overview page (#457)
jussielo-amd Oct 6, 2025
79eefde
Added two new life science workloads for reinvent and semlaflow
mwessman-amd Oct 8, 2025
27adef1
Added vlm lora finetuning workload (#401)
tkarkkai-amd Oct 10, 2025
a6bfcef
DPO Example in silogen finetuning workload (#458)
Gastron Oct 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/docs-file-copy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Copy workload documentation to public docs repo
# We rsync the ai-workloads documentation to a temp clone of the public docs repo
# and commit and push the changes to the main branch of the public docs repo. Purpose is to keep the Docs repo (consolidated SiloGen docs) updated with ai-workloads repository changes.

on:
push:
branches:
- main
paths:
- "docs/**"
- "workloads/**"
- ".github/workflows/docs-file-copy.yml"

jobs:
copy-docs:
if: github.repository == 'silogen/ai-workloads'
runs-on: ubuntu-latest
steps:
- name: Checkout core repo
uses: actions/checkout@v4

- name: Push to public docs repo
run: |
git config --global user.name 'GitHub Actions'
git config --global user.email 'actions@github.com'
git clone https://x-access-token:${{ secrets.DOCS_REPO_TOKEN }}@github.com/silogen/ai-workloads.git source_docs
git clone https://x-access-token:${{ secrets.DOCS_REPO_TOKEN }}@github.com/silogen/docs.git target_silogen_docs
cd target_silogen_docs
rsync -av --delete --exclude='.git' ../source_docs/docs docs/ai-workloads-docs
rsync -av --delete --exclude='.git' ../source_docs/workloads docs/ai-workloads-manifests
git add .
git diff --staged --quiet || git commit -m "Update external docs from ai-workloads repo"
git push origin main
env:
DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ target/
profile_default/
ipython_config.py

# MacOS stuff
.DS_Store

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ repos:
hooks:
- id: check-json
- id: check-yaml
exclude: templates|mkdocs.yml
exclude: templates|mkdocs.yml|vlm-lora-finetune
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: trailing-whitespace
Expand All @@ -20,20 +20,20 @@ repos:
args: ["--config=pyproject.toml"]

- repo: https://github.com/pycqa/flake8
rev: 7.1.2
rev: 7.2.0
hooks:
- id: flake8
args: ["--config=.flake8"]

- repo: https://github.com/pycqa/isort
rev: 6.0.0
rev: 6.0.1
hooks:
- id: isort
name: isort (python)
args: ["--settings-path=pyproject.toml"]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
rev: v1.16.0
hooks:
- id: mypy
args: ["--config-file=pyproject.toml"]
Expand All @@ -43,7 +43,7 @@ repos:
- types-PyYAML

- repo: https://github.com/gruntwork-io/pre-commit
rev: v0.1.26
rev: v0.1.29
hooks:
- id: helmlint
exclude: kaiwo|llm-finetune-silogen-engine
28 changes: 28 additions & 0 deletions docker/lifescience/reinvent4/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM rocm/pytorch:rocm7.0_ubuntu24.04_py3.12_pytorch_release_2.5.1

# Use bash to support string substitution.
SHELL ["/bin/bash", "-o", "pipefail", "-c"]

# Clone the Reinvet4 repository and use the stable 4.6.22 version
RUN git clone https://github.com/MolecularAI/REINVENT4
WORKDIR /REINVENT4
RUN git checkout v4.6.22
RUN wget -O priors/reinvent.prior "https://zenodo.org/records/15641297/files/reinvent.prior?download=1"

# Remove torch and torchvision from pyproject.toml
RUN sed -i.bak '/torch==/d' pyproject.toml && \
sed -i.bak '/torchvision /d' pyproject.toml

# Now run the install script as usual
RUN python install.py cpu

COPY demo_notebooks/ notebooks/

# Download the chemprop model
RUN mkdir chemprop
RUN wget -q --show-progress -O chemprop/model.pt "https://www.dropbox.com/scl/fi/zpnqc9at5a5dnkzfdbo6g/model.pt?rlkey=g005yli9364uptd94d60jtg5c&e=1&dl=1"

# Copy entrypoint script
COPY entrypoint.sh /

CMD ["/bin/bash"]
31 changes: 31 additions & 0 deletions docker/lifescience/reinvent4/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Running Reinvent inference interactively

Connect to the pod with your favorite terminal.

This repo provides an altered version of these notebooks to be runnable from the terminal with the subscript `_clean`. These can simply be run by:

```sh
python3 notebooks/<notebook_name.py>
```
Alternatively, Reinvent jobs can be run by:
```sh
reinvent -l <log_name> <config_name>
```

## Running inference job automatically (non-interactive)

In order to run Reinvent jobs automatically using the above image do the following:
- Set up config and output directory:

Put your config files as well any other files needed such as datasets or priors in `CONFIG_PATH`. In `OUTPUT_PATH`, the job will write output logs.
```sh
export CONFIG_PATH=<local_path_to_configs>
export OUTPUT_PATH=<local_path_to_output>
```

Then, the following command will run the job:

```sh
docker run --rm -v $CONFIG_PATH:/data -v $OUTPUT_PATH:/output --device=/dev/kfd --device=/dev/dri/renderD<RENDER_ID> rocm-reinvent /data/<config_file>.toml /output/<output_log_name>
```
where the last two arguments provide paths to the config file to run as well where to save outputs.
223 changes: 223 additions & 0 deletions docker/lifescience/reinvent4/demo_notebooks/Reinvent_TLRL_clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
# This script is based on the file notebooks/Reinvent_TLRL.py:
# https://github.com/MolecularAI/REINVENT4/blob/main/notebooks/Reinvent_TLRL.py

import os
import re
import shutil
import subprocess

import pandas as pd


def main():
wd = "R4_TLRL_output"

# Delete existing working directory and create a new one
if not os.path.isdir(wd):
shutil.rmtree(wd, ignore_errors=True)
os.mkdir(wd)
os.chdir(wd)

# Write config file
prior_filename = "../priors/reinvent.prior"
agent_filename = prior_filename
stage1_checkpoint = "stage1.chkpt"
stage1_parameters = f"""
run_type = "staged_learning"
device = "cuda:0"
tb_logdir = "tb_stage1"
json_out_config = "_stage1.json"

[parameters]
prior_file = "{prior_filename}"
agent_file = "{agent_filename}"
summary_csv_prefix = "stage1"
batch_size = 100
use_checkpoint = false
sample_strategy = "beamsearch" #Additional interesting param?

[learning_strategy]
type = "dap"
sigma = 128
rate = 0.0001

[[stage]]
max_score = 1.0
max_steps = 5
chkpt_file = "{stage1_checkpoint}"
[stage.scoring]
type = "geometric_mean"
[[stage.scoring.component]]
[stage.scoring.component.custom_alerts]
[[stage.scoring.component.custom_alerts.endpoint]]
name = "Alerts"
params.smarts = [ "[*;r8]", "[*;r9]", "[*;r10]", "[*;r11]", "[*;r12]", "[*;r13]", "[*;r14]", "[*;r15]", "[*;r16]", "[*;r17]", "[#8][#8]", "[#6;+]", "[#16][#16]", "[#7;!n][S;!$(S(=O)=O)]", "[#7;!n][#7;!n]", "C#C", "C(=[O,S])[O,S]", "[#7;!n][C;!$(C(=[O,N])[N,O])][#16;!s]", "[#7;!n][C;!$(C(=[O,N])[N,O])][#7;!n]", "[#7;!n][C;!$(C(=[O,N])[N,O])][#8;!o]", "[#8;!o][C;!$(C(=[O,N])[N,O])][#16;!s]", "[#8;!o][C;!$(C(=[O,N])[N,O])][#8;!o]", "[#16;!s][C;!$(C(=[O,N])[N,O])][#16;!s]" ]
[[stage.scoring.component]]
[stage.scoring.component.QED]
[[stage.scoring.component.QED.endpoint]]
name = "QED"
weight = 0.6
[[stage.scoring.component]]
[stage.scoring.component.NumAtomStereoCenters]
[[stage.scoring.component.NumAtomStereoCenters.endpoint]]
name = "Stereo"
weight = 0.4
transform.type = "left_step"
transform.low = 0
"""

stage1_config_filename = "stage1.toml"
with open(stage1_config_filename, "w") as tf:
tf.write(stage1_parameters)

# Stage 1 Reinforcement Learning
shutil.rmtree("tb_stage1_0", ignore_errors=True)

# Run the stage1 process using subprocess
print("Starting Stage 1 Reinforcement Learning...")
stage1_result = subprocess.run(f"reinvent {stage1_config_filename} 2>&1 | tee stage1.log", shell=True, text=True)
if stage1_result.returncode == 0:
print("Stage 1 completed.")
else:
raise RuntimeError(f"Stage 1 execution failed with exit code: {stage1_result.returncode}")

# Transfer Learning to focus the model
# Prepare the data
bdb = pd.read_csv("../notebooks/data/tnks2.csv")
clean = bdb[~bdb["exp (nM)"].str.match("[<>]")]
clean = clean.astype({"exp (nM)": "float"})

good = clean[clean["exp (nM)"] < 1000]
good = good[good["exp_method"] != "EC50"]
good = good[good["exp_method"] != "Kd"]
good = good.rename(columns={"exp (nM)": "IC50"})
good = good.drop(columns=["exp_method"])

# Write the good binders to a SMILES file
TL_train_filename = "tnks2_train.smi"
TL_validation_filename = "tnks2_validation.smi"
data = good.sample(frac=1)
n_head = int(0.8 * len(data)) # 80% of the data for training
n_tail = len(good) - n_head
print(f"number of molecules for: training={n_head}, validation={n_tail}")

train, validation = data.head(n_head), data.tail(n_tail)
train.to_csv(TL_train_filename, sep="\t", index=False, header=False)
validation.to_csv(TL_validation_filename, sep="\t", index=False, header=False)

# TL setup
TL_parameters = f"""
run_type = "transfer_learning"
device = "cuda:0"
tb_logdir = "tb_TL"

[parameters]
num_epochs = 1
save_every_n_epochs = 1
batch_size = 100
sample_batch_size = 2000
input_model_file = "{stage1_checkpoint}"
output_model_file = "TL_reinvent.model"
smiles_file = "{TL_train_filename}"
validation_smiles_file = "{TL_validation_filename}"
standardize_smiles = true
randomize_smiles = true
randomize_all_smiles = false
internal_diversity = true
"""

TL_config_filename = "transfer_learning.toml"
with open(TL_config_filename, "w") as tf:
tf.write(TL_parameters)

# Start Transfer Learning
shutil.rmtree("tb_TL", ignore_errors=True)

# Run the transfer learning process using subprocess
print("Starting Transfer Learning...")
transfer_result = subprocess.run(
f"reinvent {TL_config_filename} 2>&1 | tee transfer_learning.log", shell=True, text=True
)
if transfer_result.returncode == 0:
print("Transfer learning completed.")
else:
raise RuntimeError(f"Transfer learning execution failed with exit code: {transfer_result.returncode}")

# Choose the model from transfer learning
TL_model_filename = "TL_reinvent.model.1.chkpt"

stage2_parameters = re.sub("stage1", "stage2", stage1_parameters)
stage2_parameters = re.sub("agent_file.*\n", f"agent_file = '{TL_model_filename}'\n", stage2_parameters)
stage2_parameters = re.sub("max_steps.*\n", "max_steps = 5\n", stage2_parameters)

# Stage 2 RL
# Predictive model (ChemProp)
chemprop_path = "../chemprop/"
pred_model_parameters = f"""
[[stage.scoring.component]]
[stage.scoring.component.ChemProp]
[[stage.scoring.component.ChemProp.endpoint]]
name = "ChemProp"
weight = 0.6
params.checkpoint_dir = "{chemprop_path}"
params.rdkit_2d_normalized = true
params.target_column = "DG"
params.features = "rdkit_2d_normalized"
transform.type = "reverse_sigmoid"
transform.high = 0.0
transform.low = -50.0
transform.k = 0.4
"""

# Combine parameters and write to file
full_stage2_parameters = stage2_parameters + pred_model_parameters
df_parameters = """
[diversity_filter]
type = "IdenticalMurckoScaffold"
bucket_size = 10
minscore = 0.7
"""
inception_parameters = """
[inception]
smiles_file = "" # no seed SMILES
memory_size = 50
sample_size = 10
"""

full_stage2_parameters += df_parameters + inception_parameters
stage2_config_filename = "stage2.toml"
with open(stage2_config_filename, "w") as tf:
tf.write(full_stage2_parameters)

# Run stage2 using subprocess
print("Starting Stage 2 Reinforcement Learning...")
stage2_result = subprocess.run(f"reinvent {stage2_config_filename} 2>&1 | tee stage2.log", shell=True, text=True)
if stage2_result.returncode == 0:
print("Stage 2 completed.")
else:
raise RuntimeError(f"Stage 2 execution failed with exit code: {stage2_result.returncode}")

# Inspect results with TensorBoard
# Run TensorBoard separately after REINVENT finishes
# subprocess.run(["tensorboard", "--bind_all", "--logdir", f"{wd}/tb_stage2_0"])

# Process the results for good binders
# csv_file = os.path.join(wd, "stage2_1.csv")
csv_file = "stage2_1.csv"
df = pd.read_csv(csv_file)
good_QED = df["QED"] > 0.8
good_dG = df["ChemProp (raw)"] < -25.0 # kcal/mol
good_binders = df[good_QED & good_dG]
print(len(good_binders))

# Duplicate removal
good_binders = good_binders.drop_duplicates(subset=["SMILES"])
print(len(good_binders))

# Displaying good binders
# grid = create_mol_grid(good_binders)
# display(grid)


if __name__ == "__main__":
main()
Loading