Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
ebd0e59
Update _version.py (#86)
YaphetKG Oct 5, 2023
b66eb64
Update _version.py
YaphetKG Oct 5, 2023
e7e5f7c
Rti merge (#84)
YaphetKG Oct 31, 2023
96c75df
Merge branch 'main' into develop
YaphetKG Oct 31, 2023
fee790d
adding v5.0
YaphetKG Nov 1, 2023
e15a373
cde-links branch
YaphetKG Nov 6, 2023
6aac2fd
pin linkml
YaphetKG Nov 6, 2023
487b2c1
Update config.yaml
YaphetKG Nov 7, 2023
bb61ab9
pop total items before result
YaphetKG Nov 8, 2023
48650b8
print extracted elements
YaphetKG Nov 8, 2023
1413e92
Merge pull request #92 from helxplatform/robokop-v5.0
HowardLander Nov 29, 2023
3cfe3f5
Update requirements.txt
YaphetKG Dec 21, 2023
21889ed
Keep edge provenance (#94)
YaphetKG Jan 4, 2024
85cbead
Merge branch 'main' into develop
YaphetKG Feb 19, 2024
2eb136a
Pipeline parameterize restructure (#95)
YaphetKG Apr 18, 2024
2aee7f3
Merge branch 'main' into develop
YaphetKG Apr 18, 2024
f671194
pin avalon
YaphetKG Apr 18, 2024
473816a
deleted jenkins and added workflows
pchachicho Jun 6, 2024
98257d9
unlinked helx-actions
pchachicho Jun 7, 2024
d7b1f7e
testing paths
pchachicho Jun 11, 2024
b0d8c92
testing again
pchachicho Jun 11, 2024
9bee088
d
pchachicho Jun 11, 2024
2b44c22
tests
pchachicho Jun 11, 2024
7a8cc6d
commented out pytest
pchachicho Jun 13, 2024
94253c9
try again for bandit
pchachicho Jun 13, 2024
7f37f91
commented out bandit
pchachicho Jun 13, 2024
e696854
changed dag to dags
pchachicho Jun 14, 2024
b909274
Added fixes
pchachicho Jun 21, 2024
44ea4df
Merge pull request #101 from helxplatform/gh_actions
pchachicho Jun 21, 2024
4532bda
Bagel (#103)
YaphetKG Aug 7, 2024
af41d8f
Dbgap programs (#104)
YaphetKG Aug 7, 2024
99a6e7b
adding bagel config parse to bool
YaphetKG Sep 19, 2024
05b115e
Dev sync main (#106)
YaphetKG Sep 19, 2024
2058584
Updated docker image to 2.10.2
mbacon-renci Oct 8, 2024
6dec958
fix kgx path error
YaphetKG Oct 8, 2024
d3f03ab
fix kgx path error
YaphetKG Oct 8, 2024
9ba7498
Merge branch 'develop' into new_image_2-10-2
mbacon-renci Oct 10, 2024
745cf79
Trying out the slim apache images to reduce vulnerability footprint
mbacon-renci Oct 10, 2024
b86807e
Building general package update into dockerfile
mbacon-renci Oct 10, 2024
5dbe502
Trying more rigorous dist-upgrade to try to get rid of vulerabilities…
mbacon-renci Oct 10, 2024
3f6e987
dist-upgrade also needs -y flag
mbacon-renci Oct 10, 2024
a1673b6
Rolling back from slim image
mbacon-renci Oct 11, 2024
0464da1
Fixed apt-get syntax error
mbacon-renci Oct 11, 2024
47df70d
Reverting develop to 2.7.2 for stability's sake, will revisit after u…
mbacon-renci Oct 11, 2024
b721976
Update Dockerfile
YaphetKG Oct 11, 2024
6f505d8
Update Dockerfile
YaphetKG Oct 11, 2024
be2b07d
Removed post-install package cleanup
mbacon-renci Oct 11, 2024
be35f9c
spurious merge
mbacon-renci Oct 11, 2024
73812a1
Update requirements.txt
YaphetKG Oct 12, 2024
6d5194c
test merge issue
YaphetKG Oct 21, 2024
6db88f8
fix merge issue
YaphetKG Oct 21, 2024
dec80b9
Merge branch 'main' into develop
YaphetKG Oct 22, 2024
5794dc1
modify radx pipeline (#111)
YaphetKG Feb 18, 2025
dbfb5b2
Heal ingest 2 18 (#112)
YaphetKG Feb 26, 2025
06a4ed1
test node type
YaphetKG Feb 26, 2025
faf4975
Heal ingest 2 18 (#114)
YaphetKG Mar 24, 2025
4d8cf1a
Merge branch 'main' into develop
YaphetKG Mar 25, 2025
048e15f
Bitnami (#117)
YaphetKG Mar 26, 2025
ddb3663
remove clear index
YaphetKG May 19, 2025
27cc880
Update requirements.txt
yskale May 19, 2025
2cea207
fix study name in graph (#121)
YaphetKG May 27, 2025
6e4ad05
Merge branch 'main' into develop
YaphetKG May 28, 2025
30c4e8d
comment clear index. rolledback during merge
YaphetKG May 28, 2025
3a81811
Picsure data ingest test (#118)
yskale Jun 3, 2025
0182738
fix dug version
YaphetKG Jun 3, 2025
4b59d18
Merge branch 'main' into develop
YaphetKG Jun 3, 2025
14d09f3
Picsure data ingest test (#127)
YaphetKG Sep 9, 2025
6917538
DUG-529 Added support to process diffs in annotate_and_index (#110)
vladimir2217 Sep 9, 2025
23ab7a1
Picsure data ingest test (#128)
YaphetKG Sep 19, 2025
faccf86
Dug 547 (#129)
waTeim Nov 3, 2025
c6ab5e0
fix recover (#131)
YaphetKG Nov 12, 2025
086ef6a
Airflow 3.1.1 (#130)
YaphetKG Nov 14, 2025
58493b3
Merge branch 'main' into develop
YaphetKG Nov 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 11 additions & 20 deletions .env
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@
AIRFLOW_UID=502
AIRFLOW_GID=0

API_WORKERS=4
API_PORT=5551
API_TIMEOUT=10
KGX_DATA_SETS="bdc-studies-kgx:v1.0"
INPUT_DATA_SETS="heal-mds-studies:v1.0"

DATA_DIR=./local_storage
LAKEFS_ACCESS_KEY=""
LAKEFS_SECRET_KEY=""
LAKEFS_REPO=""
LAKEFS_BRANCH=""
LAKEFS_URL="https://lakefs.apps.renci.org"

DUG_LOG_LEVEL=INFO

ELASTICSEARCH_PASSWORD=12345
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_USERNAME=elastic

NBOOST_API_HOST=nboost

REDIS_PASSWORD=weak
REDIS_HOST=merge-redis-master
REDIS_PORT=6379
TRANQL_ACCESS_LOG=access.log
TRANQL_ERROR_LOG=error.log
ROGER_DUG__INPUTS_DATA__SETS=topmed:v1.0
BIOMEGATRON_URL="https://med-nemo.apps.renci.org/annotate"
SAPBERT_URL="https://sap-qdrant.apps.renci.org/annotate"
NODE_NORM_URL="https://nodenormalization-sri.renci.org/get_normalized_nodes?conflate=false&description=true&curie="
NAME_RES_URL="https://name-resolution-sri.renci.org/reverse_lookup"
2 changes: 1 addition & 1 deletion .github/workflows/trivy-pr-scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
# We still fail the job if results are found, so below will always run
# unless manually canceled.
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
uses: github/codeql-action/upload-sarif@v3
if: '!cancelled()'
with:
sarif_file: 'trivy-results.sarif'
103 changes: 72 additions & 31 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,33 +1,74 @@
FROM bitnami/airflow:2.10.5-debian-12-r7

USER root
RUN apt-get update && apt-get install -y git nano vim gcc rustc cargo
#RUN useradd -u 1001 -ms /bin/bash airflow && chown -R airflow /home/airflow
COPY requirements.txt requirements.txt
RUN source /opt/bitnami/airflow/venv/bin/activate && CARGO_HOME=/tmp/.cargo && \
pip install setuptools wheel && \
pip install -r requirements.txt

RUN rm -f requirements.txt

## Vul patches
## Python lib patches on airflow python env
RUN source /opt/bitnami/airflow/venv/bin/activate pip install --upgrade \
flask-appbuilder==4.5.3 \
cryptography==44.0.1 \
werkzeug==3.0.6 \
urllib3==2.2.2
RUN source /opt/bitnami/airflow/venv/bin/activate pip uninstall -y \
apache-airflow-providers-mysql==6.2.0

# Uninstall these from non airflow python env
RUN pip install --upgrade \
flask-appbuilder==4.5.3 \
cryptography==44.0.1 \
werkzeug==3.0.6 \
urllib3==2.2.2
RUN apt-get autoremove -y vim
RUN apt-get autoremove -y binutils
RUN apt-get autoremove -y linux-libc-dev
# Use a Debian-based image for better compatibility
FROM python:3.11.14-slim-trixie

# Set Airflow version and home directory
ARG AIRFLOW_VERSION=3.1.1
ARG AIRFLOW_HOME=/opt/airflow

# Environment variables
ENV AIRFLOW_HOME=${AIRFLOW_HOME}
ENV AIRFLOW__CORE__LOAD_EXAMPLES=False
ENV AIRFLOW__CORE__EXECUTOR=LocalExecutor
ENV AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
ENV PYTHONUNBUFFERED=1

# Create airflow user and directories
RUN useradd --uid 50000 --home-dir ${AIRFLOW_HOME} --create-home airflow && \
mkdir -p ${AIRFLOW_HOME}/dags ${AIRFLOW_HOME}/logs ${AIRFLOW_HOME}/plugins ${AIRFLOW_HOME}/config

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpq-dev \
libffi-dev \
libssl-dev \
curl \
tini \
tzdata \
git \
&& rm -rf /var/lib/apt/lists/*

# Upgrade pip tools
RUN pip install --no-cache-dir --upgrade pip setuptools wheel

# Install Airflow (with PostgreSQL, Celery, Redis support)
RUN pip install --no-cache-dir \
"apache-airflow[postgres,celery,redis,fab]==${AIRFLOW_VERSION}" \
"apache-airflow-providers-cncf-kubernetes" \
--constraint "https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.11.txt"

# Optional: install extra packages
RUN pip install --no-cache-dir psycopg2-binary redis

COPY ./requirements.txt /tmp/requirements.txt

RUN pip install -r /tmp/requirements.txt

RUN rm /tmp/requirements.txt



RUN apt-get purge -y --auto-remove \
build-essential \
libpq-dev \
libffi-dev \
libssl-dev \
curl \
git && \
apt-get clean

# Set ownership
RUN chown -R airflow:airflow ${AIRFLOW_HOME}

# Switch to airflow user
USER airflow
WORKDIR ${AIRFLOW_HOME}

# Expose Airflow webserver port
EXPOSE 8080

# Use tini for signal handling
ENTRYPOINT ["/usr/bin/tini", "--"]

# Default command
CMD ["airflow", "webserver"]
46 changes: 44 additions & 2 deletions dags/annotate_and_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,28 @@

from airflow.models import DAG
from airflow.operators.empty import EmptyOperator
from roger.tasks import default_args, create_pipeline_taskgroup
from airflow.operators.python import PythonOperator
from roger.tasks import default_args, create_pipeline_taskgroup, logger, create_python_task

env_enabled_datasets = os.getenv(
"ROGER_DUG__INPUTS_DATA__SETS", "topmed,anvil").split(",")

with DAG(
dag_id='annotate_and_index',
default_args=default_args,
schedule_interval=None
params=
{
"repository_id": None,
"branch_name": None,
"commitid_from": None,
"commitid_to": None
},
# schedule_interval=None
) as dag:
init = EmptyOperator(task_id="init", dag=dag)
finish = EmptyOperator(task_id="finish", dag=dag)


from roger import pipelines
from roger.config import config
envspec = os.getenv("ROGER_DUG__INPUTS_DATA__SETS","topmed:v2.0")
Expand All @@ -42,3 +51,36 @@
# . . .

init >> create_pipeline_taskgroup(dag, pipeline_class, config) >> finish




with DAG(
dag_id='dag_test',
default_args=default_args,
params=
{
"repository_id": None,
"branch_name": None,
"commitid_from": None,
"commitid_to": None
},
# schedule_interval=None
) as dag:

init = EmptyOperator(task_id="init", dag=dag)
finish = EmptyOperator(task_id="finish", dag=dag)

def print_context(ds=None, **kwargs):
print(">>>All kwargs")
print(kwargs)
print(">>>All ds")
print(ds)


init >> create_python_task(dag, "get_from_lakefs", print_context) >> finish

#run_this = PythonOperator(task_id="print_the_context", python_callable=print_context)

if __name__ == "__main__":
dag.test()
2 changes: 1 addition & 1 deletion dags/knowledge_graph_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
with DAG(
dag_id='knowledge_graph_build',
default_args=default_args,
schedule_interval=None
# schedule_interval=None
) as dag:

""" Build the workflow tasks. """
Expand Down
8 changes: 4 additions & 4 deletions dags/roger/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import warnings
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Optional, List
from typing import Dict, Optional, List, Union

import yaml
from dug.config import Config as DugConfig
Expand Down Expand Up @@ -33,7 +33,7 @@ class LakefsConfig(DictLike):
secret_access_key: str
branch: str
repo: str
enabled: bool = False
enabled: Union[bool, str] = False

def __post_init__(self):
if isinstance(self.enabled, str):
Expand Down Expand Up @@ -135,8 +135,8 @@ class AnnotationConfig(DictLike):
])

def __post_init__(self):
self.annotator_args["sapbert"]["bagel"]["enabled"] = self.annotator_args["sapbert"]["bagel"][
"enabled"].lower() == "true"
self.annotator_args["sapbert"]["bagel"]["enabled"] = str(self.annotator_args["sapbert"]["bagel"][
"enabled"]).lower() == "true"


@dataclass
Expand Down
3 changes: 3 additions & 0 deletions dags/roger/core/bulkload.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def create_nodes_csv_file(self, input_data_path=None, output_data_path=None):
merged_nodes_file = storage.merged_objects('nodes', input_data_path)
counter = 1
for node in storage.json_line_iter(merged_nodes_file):
if node.get('description'):
node['description'] = node['description'].replace('\n',
' ')
if not node.get('category'):
category_error_nodes.add(node['id'])
node['category'] = [BiolinkModel.root_type]
Expand Down
3 changes: 1 addition & 2 deletions dags/roger/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,7 @@ def annotate(self, to_string=False, files=None, input_data_path=None,
"Annotate files with the appropriate parsers and crawlers"
if files is None:
files = self.get_objects(input_data_path=input_data_path)
self.annotate_files(parsable_files=files,
output_data_path=output_data_path)
self.annotate_files(parsable_files=files, output_data_path=output_data_path)
output_log = self.log_stream.getvalue() if to_string else ''
return output_log

Expand Down
Loading
Loading