From 69cbb746b54501db6128e8e4f655744f1f08c5e6 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Fri, 20 Nov 2020 09:16:54 -0500 Subject: [PATCH 001/332] Adds support for bulk reload to remote redis. --- roger/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/roger/core.py b/roger/core.py index 3d354071..aae6a121 100644 --- a/roger/core.py +++ b/roger/core.py @@ -541,7 +541,9 @@ def insert (self): if len(edges) > 0: args.extend (("-r " + " -r ".join (edges)).split ()) args.extend ([ "--separator=|" ]) - args.extend ([ redisgraph['graph'] ]) + args.extend([f"--host={redisgraph['host']}"]) + args.extend([f"--port={redisgraph['ports']['http']}"]) + args.extend([f"--password={redisgraph['password']}"]) """ standalone_mode=False tells click not to sys.exit() """ bulk_insert (args, standalone_mode=False) From c18632a7f69e2d69f77c44930cbb290f81d98ad6 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Fri, 20 Nov 2020 09:17:27 -0500 Subject: [PATCH 002/332] config for bitnami's redis chart --- bin/roger | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/bin/roger b/bin/roger index 5d96cde8..f1496948 100755 --- a/bin/roger +++ b/bin/roger @@ -2,9 +2,11 @@ set -e namespace=${NAMESPACE:-scox} -release=redisgraph -image_repository=redislabs/redisgraph -image_tag=edge +release=${RELEASE:-redisgraph-test} +image_repository=${REDIS_IMAGE:-redislabs/redisgraph} +image_tag=${REDIS_IMAGE_TAG:-edge} +cluster_domain=${CLUSTER_DOMAIN:-cluster.local} +redis_worker_count=${REDIS_WORKER_COUNT:-1} # https://github.com/bitnami/charts/tree/master/bitnami/redis init () { @@ -15,16 +17,13 @@ start () { --set image.repository=$image_repository \ --set image.tag=$image_tag \ --set redis.command="redis-server" \ - --set redis.args="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ - --set master.command="redis-server --loadmodule /usr/lib/redis/modules/redisgraph.so" \ - --set slave.command="redis-server --loadmodule /usr/lib/redis/modules/redisgraph.so" \ - --namespace=$namespace \ - bitnami/redis -} -start () { - helm install $release \ - --set image.repository=$image_repository \ - --set image.tag=$image_tag \ + --set clusterDomain=$cluster_domain \ + --set cluster.slaveCount=$redis_worker_count \ + --set usePassword=false \ + --set master.command="" \ + --set slave.command="" \ + --set master.extraFlags[0]="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ + --set slave.extraFlags[0]="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ --namespace=$namespace \ bitnami/redis } From c05a4a5a2c2ce6c95a73ddf6e1ecca0b262486a8 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 21 Dec 2020 10:03:31 -0500 Subject: [PATCH 003/332] adds password to redis DB interface --- roger/roger_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roger/roger_db.py b/roger/roger_db.py index 75b94e6b..5c1f458c 100644 --- a/roger/roger_db.py +++ b/roger/roger_db.py @@ -9,9 +9,9 @@ class RedisGraph: """ Graph abstraction over RedisGraph. A thin wrapper but provides us some options. """ - def __init__(self, host='localhost', port=6379, graph='default'): + def __init__(self, host='localhost', port=6379, graph='default', password=''): """ Construct a connection to Redis Graph. """ - self.r = redis.Redis(host=host, port=port) + self.r = redis.Redis(host=host, port=port, password=password) self.redis_graph = Graph(graph, self.r) def add_node (self, identifier=None, label=None, properties=None): From 04752e31a37f1f248ca97a832d7c0d36822c2695 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 21 Dec 2020 10:40:00 -0500 Subject: [PATCH 004/332] adds config as parameter for tasks, override default config with json passed when starting task. --- roger/config.yaml | 3 +- roger/core.py | 76 ++++++++++++++++++++++++++------------------- tranql_translate.py | 39 +++++++++++++++++++++-- 3 files changed, 81 insertions(+), 37 deletions(-) diff --git a/roger/config.yaml b/roger/config.yaml index f1377a3c..068d2160 100644 --- a/roger/config.yaml +++ b/roger/config.yaml @@ -3,8 +3,7 @@ redisgraph: password: "" host: localhost graph: test - ports: - http: 6379 + port: 6379 logging: level: DEBUG diff --git a/roger/core.py b/roger/core.py index aae6a121..2cdd2797 100644 --- a/roger/core.py +++ b/roger/core.py @@ -204,9 +204,12 @@ def is_up_to_date (source, targets): class KGXModel: """ Abstractions for transforming Knowledge Graph Exchange formatted data. """ - def __init__(self, biolink): + def __init__(self, biolink, config=None): + if not config: + config = get_config() + self.config = config self.biolink = biolink - + def get (self, dataset_version = "v0.1"): """ Read metadata for edge and node files, then join them into whole KGX objects containing both nodes and edges. @@ -383,9 +386,9 @@ def format_keys (self, keys, schema_type : SchemaType): def load (self): """ Use KGX to load a data set into Redisgraph """ input_format = "json" - uri = f"redis://{config['redisgraph']['host']}:{config['redisgraph']['ports']['http']}/" - username = config['redisgraph']['username'] - password = config['redisgraph']['password'] + uri = f"redis://{self.config['redisgraph']['host']}:{self.config['redisgraph']['port']}/" + username = self.config['redisgraph']['username'] + password = self.config['redisgraph']['password'] log.info (f"connecting to redisgraph: {uri}") for subgraph in glob.glob (f"{kgx_repo}/**.json"): redisgraph_upload(inputs=[ subgraph ], @@ -423,8 +426,11 @@ def get_leaf_class (self, names): class BulkLoad: """ Tools for creating a Redisgraph bulk load dataset. """ - def __init__(self, biolink): + def __init__(self, biolink, config=None): self.biolink = biolink + if not config: + config = get_config() + self.config = config def tables_up_to_date (self): return Util.is_up_to_date ( @@ -519,13 +525,12 @@ def write_bulk (self, bulk_path, obj_map, schema, state={}, f=None): stream.write ("\n") def insert (self): - redisgraph = config.get('redisgraph', {}) - bulk_loader = config.get('bulk_loader', {}) + redisgraph = self.config.get('redisgraph', {}) + bulk_loader = self.config.get('bulk_loader', {}) nodes = sorted(glob.glob (Util.bulk_path ("nodes/**.csv"))) edges = sorted(glob.glob (Util.bulk_path ("edges/**.csv"))) graph = redisgraph['graph'] - log.info (f"bulk loading \n nodes: {nodes} \n edges: {edges}") - print (f"bulk loading \n nodes: {nodes} \n edges: {edges}") + log.info(f"bulk loading \n nodes: {nodes} \n edges: {edges}") try: log.info (f"deleting graph {graph} in preparation for bulk load.") @@ -537,23 +542,25 @@ def insert (self): log.info (f"bulk loading graph: {graph}") args = [] if len(nodes) > 0: - args.extend (("-n " + " -n ".join (nodes)).split ()) + args.extend(("-n " + " -n ".join(nodes)).split()) if len(edges) > 0: - args.extend (("-r " + " -r ".join (edges)).split ()) - args.extend ([ "--separator=|" ]) + args.extend(("-r " + " -r ".join(edges)).split()) + args.extend(["--separator=|"]) args.extend([f"--host={redisgraph['host']}"]) - args.extend([f"--port={redisgraph['ports']['http']}"]) + args.extend([f"--port={redisgraph['port']}"]) args.extend([f"--password={redisgraph['password']}"]) + args.extend([f"{redisgraph['graph']}"]) """ standalone_mode=False tells click not to sys.exit() """ bulk_insert (args, standalone_mode=False) def get_redisgraph (self, redisgraph): return RedisGraph (host=redisgraph['host'], - port=redisgraph['ports']['http'], + port=redisgraph['port'], + password=redisgraph.get('password', ''), graph=redisgraph['graph']) def validate (self): - redisgraph = config.get('redisgraph', {}) + redisgraph = self.config.get('redisgraph', {}) print (f"config:{json.dumps(redisgraph, indent=2)}") db = self.get_redisgraph (redisgraph) validation_queries = config.get('validation', {}).get('queries', []) @@ -571,20 +578,24 @@ def validate (self): class Roger: """ Consolidate Roger functionality for a cleaner interface. """ - def __init__(self, to_string=False): + def __init__(self, to_string=False, config=None): """ Initialize. :param to_string: Log messages to a string, available as self.log_stream.getvalue() after execution completes. """ import logging + self.has_string_handler = to_string + if not config: + config = get_config() + self.config = config if to_string: """ Add a stream handler to enable to_string. """ self.log_stream = StringIO() self.string_handler = logging.StreamHandler (self.log_stream) log.addHandler (self.string_handler) self.biolink = BiolinkModel () - self.kgx = KGXModel (self.biolink) - self.bulk = BulkLoad (self.biolink) + self.kgx = KGXModel (self.biolink, config=config) + self.bulk = BulkLoad (self.biolink, config=config) def __enter__(self): """ Implement Python's Context Manager interface. """ @@ -599,55 +610,56 @@ def __exit__(self, exception_type, exception_value, traceback): """ if exception_type or exception_value or traceback: log.error ("{} {} {}".format (exception_type, exception_value, traceback)) - log.removeHandler (self.string_handler) + if self.has_string_handler: + log.removeHandler (self.string_handler) class RogerUtil: """ An interface abstracting Roger's inner workings to make it easier to incorporate into external tools like workflow engines. """ @staticmethod - def get_kgx (to_string=False): + def get_kgx (to_string=False, config=None): output = None - with Roger (to_string) as roger: + with Roger (to_string, config=config) as roger: roger.kgx.get () output = roger.log_stream.getvalue () if to_string else None return output @staticmethod - def create_schema (to_string=False): + def create_schema (to_string=False, config=None): output = None - with Roger (to_string) as roger: + with Roger (to_string, config=config) as roger: roger.kgx.create_schema () output = roger.log_stream.getvalue () if to_string else None return output @staticmethod - def merge_nodes (to_string=False): + def merge_nodes (to_string=False, config=None): output = None - with Roger (to_string) as roger: + with Roger (to_string, config=config) as roger: roger.kgx.merge () output = roger.log_stream.getvalue () if to_string else None return output @staticmethod - def create_bulk_load (to_string=False): + def create_bulk_load (to_string=False, config=None): output = None - with Roger (to_string) as roger: + with Roger (to_string, config=config) as roger: roger.bulk.create () output = roger.log_stream.getvalue () if to_string else None return output @staticmethod - def bulk_load (to_string=False): + def bulk_load (to_string=False, config=None): output = None - with Roger (to_string) as roger: + with Roger (to_string, config=config) as roger: roger.bulk.insert () output = roger.log_stream.getvalue () if to_string else None return output @staticmethod - def validate (to_string=False): + def validate (to_string=False, config=None): output = None - with Roger (to_string) as roger: + with Roger (to_string, config=config) as roger: roger.bulk.validate () output = roger.log_stream.getvalue () if to_string else None return output diff --git a/tranql_translate.py b/tranql_translate.py index e9fe67e6..210443f3 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -12,7 +12,9 @@ from airflow.models import DAG from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago +from flatten_dict import flatten, unflatten from roger.core import RogerUtil +from roger.roger_util import get_config, get_logger default_args = { 'owner': 'RENCI', @@ -41,6 +43,32 @@ def get_executor_config (annotations=None): } return k8s_executor_config if at_k8s else None + def task_wrapper(python_callable, **kwargs): + """ + Overrides configuration with config from airflow. + :param python_callable: + :param kwargs: + :return: + """ + # get dag config provided + dag_run = kwargs.get('dag_run') + dag_conf = {} + logger = get_logger() + config = get_config() + if dag_run: + dag_conf = dag_run.conf + # remove this since to send every other argument to the python callable. + print(dag_conf) + del kwargs['dag_run'] + # overrides values + config_flat = flatten(config) + dag_conf_flat = flatten(dag_conf) + config_flat.update(dag_conf_flat) + config = unflatten(config_flat) + logger.info("Config") + logger.info(config) + return python_callable(to_string=True, config=config) + def create_python_task (name, a_callable): """ Create a python task. :param name: The name of the task. @@ -48,11 +76,16 @@ def create_python_task (name, a_callable): """ return PythonOperator( task_id=name, - python_callable=a_callable, - op_kwargs={ 'to_string' : True }, + python_callable=task_wrapper, + op_kwargs={ + "python_callable": a_callable, + "to_string": True + }, executor_config=get_executor_config (annotations={ "task_name" : name - }) + }), + dag=dag, + provide_context=True ) """ Build the workflow tasks. """ From 4fd5c40ccbdd377a2c3679baf31cfc08581b4147 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 21 Dec 2020 10:41:49 -0500 Subject: [PATCH 005/332] adds volumes and pip depedencies to airflow installation . --- bin/airk8s | 2 +- bin/custom-values.yaml | 39 +++++++++++++++++++++++++++++++++++++-- requirements.txt | 1 + 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/bin/airk8s b/bin/airk8s index c6b32aae..6a58ce61 100755 --- a/bin/airk8s +++ b/bin/airk8s @@ -11,7 +11,7 @@ init () { helm repo update } start () { - helm install "airflow" stable/airflow \ + helm upgrade --install "airflow" stable/airflow \ --version "$version" \ --namespace "$namespace" \ --values ./custom-values.yaml diff --git a/bin/custom-values.yaml b/bin/custom-values.yaml index e60f3297..c2ea3300 100644 --- a/bin/custom-values.yaml +++ b/bin/custom-values.yaml @@ -12,7 +12,10 @@ airflow: ## the airflow executor type to use ## - executor: CeleryExecutor + image: + repository: renciorg/apache-airflow-1.10.12-python-3.8-git + tag: latest + executor: CeleryExecutor # executor: KubernetesExecutor ## the fernet key used to encrypt the connections in the database @@ -34,6 +37,37 @@ airflow: ## Disable noisy "Handling signal: ttou" Gunicorn log messages GUNICORN_CMD_ARGS: "--log-level WARNING" + extraPipPackages: + - Babel==2.8.0 + - biolink-model==1.2.5 + - biolinkml==1.5.8 + - redisgraph==2.1.5 + - redisgraph-bulk-loader==0.9.3 + - flatten-dict + ################################### + # Volumes + ################################### + + ## EXAMPLE: + ## extraVolumeMounts: + ## - name: synchronised-dags + ## mountPath: /opt/airflow/dags + ## + extraVolumeMounts: + - name: roger-data + mountPath: /dags/roger/data + + ## extra volumes for the web/scheduler/worker Pods + ## + ## EXAMPLE: + ## extraVolumes: + ## - name: synchronised-dags + ## emptyDir: {} + ## + extraVolumes: + - name: roger-data + emptyDir: {} + ################################### # Airflow - Scheduler Configs ################################### @@ -94,7 +128,7 @@ dags: ## #url: "ssh://git@repo.example.com/my-airflow-dags.git" #url: "ssh://git@github.com/stevencox/airflow.git" - url: "ssh://git@github.com/stevencox/roger.git" + url: "ssh://git@github.com/helxplatform/roger.git" ## the branch/tag/sha1 which we clone ## @@ -143,6 +177,7 @@ dags: ## the git sync interval in seconds ## refreshTime: 60 + installRequirments: true ################################### # Database - PostgreSQL Chart diff --git a/requirements.txt b/requirements.txt index 0fe342a6..9a4015af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,6 +46,7 @@ Flask-OpenID==1.2.5 Flask-SQLAlchemy==2.4.4 flask-swagger==0.2.14 Flask-WTF==0.14.3 +flatten-dict funcsigs==1.0.2 future==0.18.2 graphviz==0.14.1 From f0ab6d5f1cd5317a17f04de839421df60f4fc863 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 21 Dec 2020 13:59:20 -0500 Subject: [PATCH 006/332] airflow chart moved from helm repo to airflow repo, and bumped chart version to latest, adds custom docker image for airflow that allows pip install --- bin/airk8s | 6 +++--- bin/custom-values.yaml | 3 ++- docker/airflow/DockerFile | 4 ++++ 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 docker/airflow/DockerFile diff --git a/bin/airk8s b/bin/airk8s index 6a58ce61..b75cbf25 100755 --- a/bin/airk8s +++ b/bin/airk8s @@ -4,14 +4,14 @@ set -x set -e namespace=${NAMESPACE:-scox} -version=v7.11.0 +version=v7.15.0 init () { - helm repo add stable https://kubernetes-charts.storage.googleapis.com + helm repo add airflow-stable https://airflow-helm.github.io/charts helm repo update } start () { - helm upgrade --install "airflow" stable/airflow \ + helm install "airflow" airflow-stable/airflow \ --version "$version" \ --namespace "$namespace" \ --values ./custom-values.yaml diff --git a/bin/custom-values.yaml b/bin/custom-values.yaml index c2ea3300..6c510d53 100644 --- a/bin/custom-values.yaml +++ b/bin/custom-values.yaml @@ -44,6 +44,7 @@ airflow: - redisgraph==2.1.5 - redisgraph-bulk-loader==0.9.3 - flatten-dict + - git+https://github.com/stevencox/kgx.git ################################### # Volumes ################################### @@ -132,7 +133,7 @@ dags: ## the branch/tag/sha1 which we clone ## - ref: main + ref: redis-helm ## the name of a pre-created secret containing files for ~/.ssh/ ## diff --git a/docker/airflow/DockerFile b/docker/airflow/DockerFile new file mode 100644 index 00000000..ed7d953d --- /dev/null +++ b/docker/airflow/DockerFile @@ -0,0 +1,4 @@ +FROM apache/airflow:1.10.12-python3.8 +USER root +RUN apt-get update && apt-get install -y git +USER airflow From 8490884dcb416c65e40e4472aac8f858b216b05b Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 4 Jan 2021 10:45:52 -0500 Subject: [PATCH 007/332] config singleton implementation that enable overriding values from file at runtime. --- roger/Config.py | 84 +++++++++++++++++++++++++++++++++++++++++++++++ roger/config.yaml | 2 +- roger/core.py | 4 ++- 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 roger/Config.py diff --git a/roger/Config.py b/roger/Config.py new file mode 100644 index 00000000..d82973ad --- /dev/null +++ b/roger/Config.py @@ -0,0 +1,84 @@ +import yaml +from flatten_dict import flatten, unflatten +from os import path +from typing import Dict, Optional + + +CONFIG_FILENAME = path.join(path.dirname(path.abspath(__file__)), 'config.yaml') + + +class Config: + """ + Singleton config wrapper + """ + __instance__: Optional[Dict] = None + + def __init__(self, file_name: str): + if not Config.__instance__: + Config.__instance__ = Config.read_config_file(file_name=file_name) + + @staticmethod + def read_config_file(file_name: str): + return yaml.load(open(file_name), Loader=yaml.FullLoader) + + def __getattr__(self, item): + """ + Proxies calls to instance dict. + Note: dict.update is overridden to do partial updates. + Refer to Config.update method. + :param item: method called + :return: proxied method + """ + if item == 'update': + # overrides default dict update method + return self.update + return getattr(Config.__instance__, item) + + def __getitem__(self, item): + """ + Makes config object subscriptable + :param item: key to lookup + :return: value stored in key + """ + return self.__instance__.get(item) + + @staticmethod + def update(new_value: Dict): + """ + Updates dictionary partially. + Given a config {'name': {'first': 'name', 'last': 'name'}} + and a partial update {'name': {'first': 'new name'} } + result would be {'name': {'first': 'new name', 'last': 'name'}} + :param new_value: parts to update + :return: updated dict + """ + config_flat = flatten(Config.__instance__) + new_value_flat = flatten(new_value) + config_flat.update(new_value_flat) + Config.__instance__ = unflatten(config_flat) + return Config.__instance__ + + def __str__(self): + return Config.__instance__.__str__() + + +def get_default_config(file_name: str = CONFIG_FILENAME) -> Config: + """ + Get config as a dictionary + + Parameters + ---------- + file_name: str + The filename with all the configuration + + Returns + ------- + dict + A dictionary containing all the entries from the config YAML + + """ + config_instance = Config(file_name) + return config_instance + + +config: Config = get_default_config() diff --git a/roger/config.yaml b/roger/config.yaml index 068d2160..2a7513bf 100644 --- a/roger/config.yaml +++ b/roger/config.yaml @@ -14,7 +14,7 @@ base_data_uri: https://stars.renci.org/var/kgx_data #https://github.com/RedisGraph/redisgraph-bulk-loader/blob/master/redisgraph_bulk_loader/bulk_insert.py#L43 bulk_loader: - separator: "|" + separator: 0x1E enforce_schema: False skip_invalid_nodes: False skip_invalid_edges: False diff --git a/roger/core.py b/roger/core.py index 2cdd2797..8d52f751 100644 --- a/roger/core.py +++ b/roger/core.py @@ -682,7 +682,9 @@ def validate (to_string=False, config=None): kgx = KGXModel (biolink) bulk = BulkLoad (biolink) if args.data_root is not None: - data_root = get_config()['data_root'] = args.data_root + config = get_config() + data_root = args.data_root + config.update({'data_root': data_root}) log.info (f"data root:{data_root}") if args.get_kgx: kgx.get (dataset_version=args.dataset_version) From 2276c1be8907217e360bdfa9bdc7142523792143 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 4 Jan 2021 10:51:16 -0500 Subject: [PATCH 008/332] enforcing redis schema for bulkloader derived from type we get while parsing kgx json. Uses `recordseparator` ascii char as csv separator. --- roger/core.py | 222 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 169 insertions(+), 53 deletions(-) diff --git a/roger/core.py b/roger/core.py index 8d52f751..2a2d6322 100644 --- a/roger/core.py +++ b/roger/core.py @@ -8,13 +8,14 @@ import time import yaml import sys -import traceback +# import traceback from biolink import model from collections import defaultdict from enum import Enum from io import StringIO -from kgx.cli import redisgraph_upload -from roger.roger_util import get_logger, get_config +from roger.Config import get_default_config as get_config +from roger.roger_util import get_logger +from roger.components.data_conversion_utils import TypeConversionUtil from redisgraph_bulk_loader.bulk_insert import bulk_insert from roger.roger_db import RedisGraph from string import Template @@ -248,7 +249,7 @@ def create_schema (self): if self.schema_up_to_date(): log.info (f"schema is up to date.") return - + predicate_schemas = defaultdict(lambda:None) category_schemas = defaultdict(lambda:None) for subgraph in Util.kgx_objects (): @@ -259,25 +260,27 @@ def create_schema (self): """ Infer predicate schemas. """ for edge in graph['edges']: predicate = edge['edge_label'] - if not predicate in predicate_schemas: - predicate_schemas[predicate] = edge - for k in edge.keys (): - edge[k] = '' - else: - for k in edge.keys (): - if not k in predicate_schemas[predicate]: - predicate_schemas[predicate][k] = '' + predicate_schemas[predicate] = predicate_schemas.get(predicate, {}) + for k in edge.keys (): + current_type = type(edge[k]).__name__ + if not k in predicate_schemas[predicate]: + predicate_schemas[predicate][k] = current_type + else: + previous_type = predicate_schemas[predicate][k] + # prefering lists over strings over bools over floats over ints + predicate_schemas[predicate][k] = TypeConversionUtil.compare_types(previous_type, current_type) """ Infer node schemas. """ for node in graph['nodes']: node_type = self.biolink.get_leaf_class (node['category']) - if not node_type in category_schemas: - category_schemas[node_type] = node - for k in node.keys (): - node[k] = '' - else: - for k in node.keys (): - if not k in category_schemas[node_type]: - category_schemas[node_type][k] = '' + category_schemas[node_type] = category_schemas.get(node_type, {}) + for k in node.keys (): + current_type = type(node[k]).__name__ + if not k in category_schemas[node_type]: + category_schemas[node_type][k] = current_type + else: + previous_type = category_schemas[node_type][k] + category_schemas[node_type][k] = TypeConversionUtil.compare_types(previous_type, current_type) + """ Write node and predicate schemas. """ self.write_schema (predicate_schemas, SchemaType.PREDICATE) self.write_schema (category_schemas, SchemaType.CATEGORY) @@ -296,7 +299,7 @@ def write_schema (self, schema, schema_type: SchemaType): :param schema_type: Type of schema to write. """ file_name = Util.schema_path (f"{schema_type.value}-schema.json") log.info (f"writing schema: {file_name}") - dictionary = { k : self.format_keys(v.keys(), schema_type) for k, v in schema.items () } + dictionary = { k : v for k, v in schema.items () } Util.write_object (dictionary, file_name) def merge_nodes (self, L, R): @@ -431,6 +434,8 @@ def __init__(self, biolink, config=None): if not config: config = get_config() self.config = config + separator = self.config.get('bulk_loader',{}).get('separator', '|') + self.separator = chr(separator) if isinstance(separator, int) else separator def tables_up_to_date (self): return Util.is_up_to_date ( @@ -465,15 +470,13 @@ def create (self): index = self.biolink.get_leaf_class (node['category']) categories[index].append (node) self.write_bulk (Util.bulk_path("nodes"), categories, categories_schema, - state=state, f=subgraph) + state=state, f=subgraph, is_relation=False) """ Write predicate data for bulk load. """ predicates = defaultdict(lambda: []) for edge in graph['edges']: predicates[edge['edge_label']].append (edge) - edge['src'] = edge.pop ('subject') - edge['dest'] = edge.pop ('object') - self.write_bulk (Util.bulk_path("edges"), predicates, predicates_schema) + self.write_bulk (Util.bulk_path("edges"), predicates, predicates_schema, is_relation=True) def cleanup (self, v): """ Filter problematic text. @@ -487,8 +490,70 @@ def cleanup (self, v): v = v.replace ("[", "@").replace ("]", "@") #f" {v}" v = v.replace ("|","^") return v - - def write_bulk (self, bulk_path, obj_map, schema, state={}, f=None): + + @staticmethod + def create_redis_schema_header(attributes: dict, is_relation=False): + """ + Creates col headers for csv to be used by redis bulk loader by assigning redis types + :param attributes: dictionary of data labels with values as python type strings + :param separator: CSV separator + :return: list of attributes where each item is attributeLabel:redisGraphDataType + """ + redis_type_conversion_map = { + 'str': 'STRING', + 'float': 'FLOAT', # Do we need to handle double + 'int': 'INT', + 'bool': 'BOOL', + 'list': 'ARRAY' + } + col_headers = [] + format_for_redis = lambda label, typ: f'{label}:{typ}' + for attribute, attribute_type in attributes.items(): + col_headers.append(format_for_redis(attribute, redis_type_conversion_map[attribute_type])) + # Note this two fields are only important to bulk loader + # they will not be members of the graph + # https://github.com/RedisGraph/redisgraph-bulk-loader/tree/master#input-schemas + if is_relation: + col_headers.append('internal_start_id:START_ID') + col_headers.append('internal_end_id:END_ID') + # replace id:STRING with id:ID + col_headers.append('id:ID') + col_headers = list(filter(lambda x: x != 'id:STRING', col_headers)) + return col_headers + + def group_items_by_attributes_set(self, objects: list, processed_object_ids: set): + """ + Groups items into a dictionary where the keys are sets of attributes set for all + items accessed in that key. + Eg. { set(id,name,category): [{id:'xx0',name:'bbb', 'category':['type']}.... + {id:'xx1', name:'bb2', category: ['type1']}] } + :param objects: list of nodes or edges + :param processed_object_ids: ids of object to skip since they are processed. + :return: dictionary grouping based on set attributes + """ + clustered_by_set_values = {} + improper_redis_keys = set() + for obj in objects: + # redis bulk loader needs columns not to include ':' + # till backticks are implemented we should avoid these. + key_filter = lambda k: ':' not in k + keys_with_values = frozenset([k for k in obj.keys() if obj[k] and key_filter(k)]) + for key in [k for k in obj.keys() if obj[k] and not key_filter(k)]: + improper_redis_keys.add(key) + # group by attributes that have values. # Why? + # Redis bulk loader has one issue + # imagine we have {'name': 'x'} , {'name': 'y', 'is_metabolite': true} + # we have a common schema name:STRING,is_metabolite:BOOL + # values `x, ` and `y,true` but x not having value for is_metabolite is not handled + # well, redis bulk loader says we should give it default if we were to enforce schema + # but due to the nature of the data assigning defaults is very not an option. + # hence grouping data into several csv's might be the right way (?) + if obj['id'] not in processed_object_ids: + clustered_by_set_values[keys_with_values] = clustered_by_set_values.get(keys_with_values, []) + clustered_by_set_values[keys_with_values].append(obj) + return clustered_by_set_values, improper_redis_keys + + def write_bulk(self, bulk_path, obj_map, schema, state={}, is_relation=False, f=None): """ Write a bulk load group of objects. :param bulk_path: Path to the bulk loader object to write. :param obj_map: A map of biolink type to list of objects. @@ -496,39 +561,89 @@ def write_bulk (self, bulk_path, obj_map, schema, state={}, f=None): :param state: Track state of already written objects to avoid duplicates. """ os.makedirs (bulk_path, exist_ok=True) + processed_objects_id = state.get('processed_id', set()) + called_x_times = state.get('called_times', 0) + called_x_times += 1 for key, objects in obj_map.items (): - out_file = f"{bulk_path}/{key}.csv" if len(objects) == 0: continue - new_file = not os.path.exists (out_file) all_keys = schema[key] - with open (out_file, "a") as stream: - if new_file: - log.info (f" --creating {out_file}") - stream.write ("|".join (all_keys)) - stream.write ("\n") - """ Make all objects conform to the schema. """ - for obj in objects: - for akey in all_keys: - if not akey in obj: - obj[akey] = "" - """ Write fields, skipping duplicate objects. """ - for obj in objects: - oid = str(obj['id']) - if oid in state: - continue - state[oid] = oid - values = [ self.cleanup(obj[k]) for k in all_keys if not 'smiles' in k ] - clean = list(map(str, values)) - s = "|".join (clean) - stream.write (s) - stream.write ("\n") + """ Make all objects conform to the schema. """ + clustered_by_set_values, improper_redis_keys = self.group_items_by_attributes_set(objects, + processed_objects_id) + + if len(improper_redis_keys): + log.warning(f"The following keys were skipped since they include conflicting `:`" + f" that would cause errors while bulk loading to redis." + f"{improper_redis_keys}") + for index, set_attributes in enumerate(clustered_by_set_values.keys()): + items = clustered_by_set_values[set_attributes] + # When parted files are saved let the file names be collected here + state['file_paths'] = state.get('file_paths', {}) + if key not in state['file_paths']: + state['file_paths'][key] = {set_attributes: ''} + out_file = state['file_paths'].get(key, {}).get(set_attributes, '') + + # When calling write bulk , lets say we have processed some + # chemicals from file 1 and we start processing file 2 + # if we are using just index then we might (rather will) end up adding + # records to the wrong file so we need this to be unique as possible + # by adding called_x_times , if we already found out-file from state obj + # we are sure that the schemas match. + out_file = f"{bulk_path}/{key}.csv-{index}-{called_x_times}" if not out_file else out_file + new_file = not os.path.exists(out_file) + keys_for_header = {x: all_keys[x] for x in all_keys if x in set_attributes} + redis_schema_header = self.create_redis_schema_header(keys_for_header, is_relation) + with open(out_file, "a") as stream: + if new_file: + state['file_paths'][key][set_attributes] = out_file + log.info(f" --creating {out_file}") + stream.write(self.separator.join(redis_schema_header)) + stream.write("\n") + else: + log.info(f" --appending to {out_file}") + """ Write fields, skipping duplicate objects. """ + for obj in items: + oid = str(obj['id']) + if oid in processed_objects_id: + continue + processed_objects_id.add(oid) + """ Add ID / START_ID / END_ID depending""" + internal_id_fields = { + 'internal_id': obj['id'] + } + if is_relation: + internal_id_fields.update({ + 'internal_start_id': obj['subject'], + 'internal_end_id': obj['object'] + }) + obj.update(internal_id_fields) + values = [] + # uses redis schema header to preserve order when writing lines out. + for column_name in redis_schema_header: + # last key is the type + obj_key = ':'.join(column_name.split(':')[:-1]) + value = obj[obj_key] + + if obj_key not in internal_id_fields: + current_type = type(value).__name__ + expected_type = all_keys[obj_key] + # cast it if it doesn't match type in schema keys i.e all_keys + value = TypeConversionUtil.cast(obj[obj_key], all_keys[obj_key]) \ + if expected_type != current_type else value + + values.append(str(value)) + s = self.separator.join(values) + stream.write(s) + stream.write("\n") + state['processed_id'] = processed_objects_id + state['called_times'] = called_x_times def insert (self): redisgraph = self.config.get('redisgraph', {}) bulk_loader = self.config.get('bulk_loader', {}) - nodes = sorted(glob.glob (Util.bulk_path ("nodes/**.csv"))) - edges = sorted(glob.glob (Util.bulk_path ("edges/**.csv"))) + nodes = sorted(glob.glob (Util.bulk_path ("nodes/**.csv*"))) + edges = sorted(glob.glob (Util.bulk_path ("edges/**.csv*"))) graph = redisgraph['graph'] log.info(f"bulk loading \n nodes: {nodes} \n edges: {edges}") @@ -545,10 +660,11 @@ def insert (self): args.extend(("-n " + " -n ".join(nodes)).split()) if len(edges) > 0: args.extend(("-r " + " -r ".join(edges)).split()) - args.extend(["--separator=|"]) + args.extend([f"--separator={self.separator}"]) args.extend([f"--host={redisgraph['host']}"]) args.extend([f"--port={redisgraph['port']}"]) args.extend([f"--password={redisgraph['password']}"]) + args.extend(['--enforce-schema']) args.extend([f"{redisgraph['graph']}"]) """ standalone_mode=False tells click not to sys.exit() """ bulk_insert (args, standalone_mode=False) From ff058fc1f5c8f51da4f59d35172ed9a49eb0d88b Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 4 Jan 2021 10:52:41 -0500 Subject: [PATCH 009/332] adds type casting --- roger/components/data_conversion_utils.py | 54 +++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 roger/components/data_conversion_utils.py diff --git a/roger/components/data_conversion_utils.py b/roger/components/data_conversion_utils.py new file mode 100644 index 00000000..7d85c2b7 --- /dev/null +++ b/roger/components/data_conversion_utils.py @@ -0,0 +1,54 @@ +from typing import Any + + +class TypeConversionUtil: + + type_map = { + list.__name__: { + 'priority': 0, + 'constructor': lambda x: list([x]) + }, + str.__name__: { + 'priority': 1, + 'constructor': lambda x: str(x) + }, + bool.__name__: { + 'priority': 2, + 'constructor': lambda x: True if x else False + }, + float.__name__: { + 'priority': 3, + 'constructor': lambda x: float(x), + }, + int.__name__: { + 'priority': 4, + 'constructor': lambda x: int(x) + } + } + + @staticmethod + def cast(value: Any, to_type: str): + """ + Parses a value to dest type. + :param value: value to parse + :param to_type: destination type + :return: parsed value + """ + if to_type not in TypeConversionUtil.type_map: + raise TypeError(f'Type {to_type} not found in conversion map. Available types are {TypeConversionUtil.type_map.keys()}') + dest_type_constructor = TypeConversionUtil.type_map[to_type]['constructor'] + return dest_type_constructor(value) + + @staticmethod + def compare_types(data_type: str, data_type_2: str): + """ + Of two python types selects the one we would like to upcast to. + :param data_type: + :param data_type_2: + :return: + """ + assert data_type in TypeConversionUtil.type_map, f"Unrecognised type {data_type} for list of types {list(type_map.keys())}" + assert data_type_2 in TypeConversionUtil.type_map, f"Unrecognised type {data_type} for list of types {list(type_map.keys())}" + d1_val = TypeConversionUtil.type_map[data_type]['priority'] + d2_val = TypeConversionUtil.type_map[data_type_2]['priority'] + return data_type if d1_val < d2_val else data_type_2 From b3f0d179cc6bbb62a7b43100e7263d697a2ec68a Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 4 Jan 2021 10:54:06 -0500 Subject: [PATCH 010/332] adds some more kgx files, roger_util refers to Config.py and import fix for roger_db --- roger/metadata.yaml | 12 ++++++------ roger/roger_db.py | 2 +- roger/roger_util.py | 30 +++--------------------------- 3 files changed, 10 insertions(+), 34 deletions(-) diff --git a/roger/metadata.yaml b/roger/metadata.yaml index 1b56206e..597111a5 100644 --- a/roger/metadata.yaml +++ b/roger/metadata.yaml @@ -1,16 +1,16 @@ versions: - edgeFiles: - # - biolink_kgx-edge-v0.1.json +# - biolink_kgx-edge-v0.1.json - chembio_kgx-edge-v0.1.json - chemical_normalization-edge-v0.1.json - cord19-phenotypes-edge-v0.1.json -# - cord19-scibite-edge-v0.1.json -# - cord19-scigraph-edge-v0.1.json + - cord19-scibite-edge-v0.1.json + - cord19-scigraph-edge-v0.1.json - ctd-edge-v0.1.json - foodb-edge-v0.1.json -# - kegg-edge-v0.1.json + - kegg-edge-v0.1.json - mychem-edge-v0.1.json -# - panther-edge-v0.1.json + - panther-edge-v0.1.json - pharos-edge-v0.1.json - topmed-edge-v0.1.json nodeFiles: @@ -22,7 +22,7 @@ versions: - cord19-scigraph-node-v0.1.json - ctd-node-v0.1.json - foodb-node-v0.1.json - # - kegg-node-v0.1.json + - kegg-node-v0.1.json - mychem-node-v0.1.json - panther-node-v0.1.json - pharos-node-v0.1.json diff --git a/roger/roger_db.py b/roger/roger_db.py index 5c1f458c..ef01c486 100644 --- a/roger/roger_db.py +++ b/roger/roger_db.py @@ -2,7 +2,7 @@ import redis from redisgraph import Node, Edge, Graph from redis.exceptions import ResponseError -from roger.roger_util import get_config, get_logger +from roger.roger_util import get_logger logger = get_logger () diff --git a/roger/roger_util.py b/roger/roger_util.py index 35c5f3f7..00bacc66 100644 --- a/roger/roger_util.py +++ b/roger/roger_util.py @@ -1,34 +1,10 @@ import logging -import requests import sys -import yaml -from os import path -from typing import Dict, Any, Optional +from typing import Optional +from roger.Config import get_default_config -config: Optional[Dict[str, Any]] = None logger: Optional[logging.Logger] = None -CONFIG_FILENAME = path.join(path.dirname(path.abspath(__file__)), 'config.yaml') - -def get_config(filename: str = CONFIG_FILENAME) -> dict: - """ - Get config as a dictionary - - Parameters - ---------- - filename: str - The filename with all the configuration - - Returns - ------- - dict - A dictionary containing all the entries from the config YAML - - """ - global config - if config is None: - config = yaml.load(open(filename), Loader=yaml.FullLoader) - return config def get_logger(name: str = 'roger') -> logging.Logger: """ @@ -47,7 +23,7 @@ def get_logger(name: str = 'roger') -> logging.Logger: """ global logger if logger is None: - config = get_config() + config = get_default_config() logger = logging.getLogger(name) handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(config['logging']['format']) From 3b1615f3658657ea49ae5bb1281b7d20a15b9d41 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 4 Jan 2021 10:56:25 -0500 Subject: [PATCH 011/332] allow config when running the dag to override config.yaml values; add shebang to roger. Was causing bad substitution error on debian when running `make`. --- bin/roger | 1 + tranql_translate.py | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/bin/roger b/bin/roger index f1496948..fd0df7d7 100755 --- a/bin/roger +++ b/bin/roger @@ -1,3 +1,4 @@ +#!/usr/bin/env bash #set -x set -e diff --git a/tranql_translate.py b/tranql_translate.py index 210443f3..fc5ee36c 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -12,9 +12,9 @@ from airflow.models import DAG from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago -from flatten_dict import flatten, unflatten from roger.core import RogerUtil -from roger.roger_util import get_config, get_logger +from roger.Config import get_default_config as get_config +from roger.roger_util import get_logger default_args = { 'owner': 'RENCI', @@ -58,13 +58,9 @@ def task_wrapper(python_callable, **kwargs): if dag_run: dag_conf = dag_run.conf # remove this since to send every other argument to the python callable. - print(dag_conf) del kwargs['dag_run'] # overrides values - config_flat = flatten(config) - dag_conf_flat = flatten(dag_conf) - config_flat.update(dag_conf_flat) - config = unflatten(config_flat) + config.update(dag_conf) logger.info("Config") logger.info(config) return python_callable(to_string=True, config=config) From 318f53fc0eaf71f2ae070826e1a0f7d21704de86 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 4 Jan 2021 11:02:56 -0500 Subject: [PATCH 012/332] chaning airflow git branch back to main --- bin/custom-values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/custom-values.yaml b/bin/custom-values.yaml index 6c510d53..3b56b317 100644 --- a/bin/custom-values.yaml +++ b/bin/custom-values.yaml @@ -133,7 +133,7 @@ dags: ## the branch/tag/sha1 which we clone ## - ref: redis-helm + ref: main ## the name of a pre-created secret containing files for ~/.ssh/ ## From 50928ace560224f1c1c1557b083c00f08dcbcbd2 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 13:27:11 -0500 Subject: [PATCH 013/332] equalize float, int and bool in priority. If we have two attribute bool and int convert them to strings --- roger/components/data_conversion_utils.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/roger/components/data_conversion_utils.py b/roger/components/data_conversion_utils.py index 7d85c2b7..94594885 100644 --- a/roger/components/data_conversion_utils.py +++ b/roger/components/data_conversion_utils.py @@ -17,11 +17,11 @@ class TypeConversionUtil: 'constructor': lambda x: True if x else False }, float.__name__: { - 'priority': 3, + 'priority': 2, 'constructor': lambda x: float(x), }, int.__name__: { - 'priority': 4, + 'priority': 2, 'constructor': lambda x: int(x) } } @@ -47,8 +47,19 @@ def compare_types(data_type: str, data_type_2: str): :param data_type_2: :return: """ - assert data_type in TypeConversionUtil.type_map, f"Unrecognised type {data_type} for list of types {list(type_map.keys())}" - assert data_type_2 in TypeConversionUtil.type_map, f"Unrecognised type {data_type} for list of types {list(type_map.keys())}" + assert data_type in TypeConversionUtil.type_map, f"Unrecognised type {data_type} From types:" \ + f"{list(TypeConversionUtil.type_map.keys())}" + + assert data_type_2 in TypeConversionUtil.type_map, f"Unrecognised type {data_type} From types: " \ + f"{list(TypeConversionUtil.type_map.keys())}" + d1_val = TypeConversionUtil.type_map[data_type]['priority'] d2_val = TypeConversionUtil.type_map[data_type_2]['priority'] + + if data_type != data_type_2 and d1_val == d2_val: + # For float int and bool have same priority + # treat them as strings. + d1_val = (d1_val - 1) + data_type = str.__name__ + return data_type if d1_val < d2_val else data_type_2 From 1ce1584778c4b3f872ba946654a4494aee33f21d Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 13:29:07 -0500 Subject: [PATCH 014/332] till redis-bulk next version is released using main, --- bin/custom-values.yaml | 4 ++-- requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/custom-values.yaml b/bin/custom-values.yaml index 3b56b317..5e45669d 100644 --- a/bin/custom-values.yaml +++ b/bin/custom-values.yaml @@ -42,7 +42,7 @@ airflow: - biolink-model==1.2.5 - biolinkml==1.5.8 - redisgraph==2.1.5 - - redisgraph-bulk-loader==0.9.3 + - git+https://github.com/RedisGraph/redisgraph-bulk-loader.git - flatten-dict - git+https://github.com/stevencox/kgx.git ################################### @@ -133,7 +133,7 @@ dags: ## the branch/tag/sha1 which we clone ## - ref: main + ref: redis-helm ## the name of a pre-created secret containing files for ~/.ssh/ ## diff --git a/requirements.txt b/requirements.txt index 9a4015af..ec461207 100644 --- a/requirements.txt +++ b/requirements.txt @@ -141,7 +141,8 @@ rdflib==5.0.0 rdflib-jsonld==0.5.0 redis==3.5.3 redisgraph==2.1.5 -redisgraph-bulk-loader==0.9.3 +#redisgraph-bulk-loader==0.9.3 , @TODO select next version once its released +https://github.com/RedisGraph/redisgraph-bulk-loader.git requests==2.24.0 Send2Trash==1.5.0 setproctitle==1.1.10 From 00b5724769690069a6781869fb0c924f5ab5e25d Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 13:33:39 -0500 Subject: [PATCH 015/332] Disabling probes in redis chart. --- bin/roger | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/roger b/bin/roger index fd0df7d7..039e501a 100755 --- a/bin/roger +++ b/bin/roger @@ -23,6 +23,10 @@ start () { --set usePassword=false \ --set master.command="" \ --set slave.command="" \ + --set master.readinessProbe.enabled="false" \ + --set master.livenessProbe.enabled="false" \ + --set slave.readinessProbe.enabled="false" \ + --set slave.livenessProbe.enabled="false" \ --set master.extraFlags[0]="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ --set slave.extraFlags[0]="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ --namespace=$namespace \ From 4622a5ac23861c28967a1d97304699dabb2c497e Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 13:36:05 -0500 Subject: [PATCH 016/332] Unit tests and validation --- roger/config.yaml | 23 ++++ roger/core.py | 26 +++-- roger/test/data/bulk/named_thing.csv-0-1 | 4 + ...nflicting_prop_types.schema.edges.kgx.json | 52 +++++++++ ...nflicting_prop_types.schema.nodes.kgx.json | 52 +++++++++ ...non_conflicting_prop_types.schema.kgx.json | 91 +++++++++++++++ roger/test/mocks.py | 71 +++++++++++ roger/test/test_KGX_Model.py | 47 ++++++++ roger/test/test_bulk_loader.py | 110 ++++++++++++++++++ roger/test/test_type_conversion_util.py | 49 ++++++++ 10 files changed, 513 insertions(+), 12 deletions(-) create mode 100644 roger/test/data/bulk/named_thing.csv-0-1 create mode 100644 roger/test/data/conflicting_prop_types.schema.edges.kgx.json create mode 100644 roger/test/data/conflicting_prop_types.schema.nodes.kgx.json create mode 100644 roger/test/data/non_conflicting_prop_types.schema.kgx.json create mode 100644 roger/test/mocks.py create mode 100644 roger/test/test_KGX_Model.py create mode 100644 roger/test/test_bulk_loader.py create mode 100644 roger/test/test_type_conversion_util.py diff --git a/roger/config.yaml b/roger/config.yaml index 2a7513bf..0b8f4da2 100644 --- a/roger/config.yaml +++ b/roger/config.yaml @@ -53,3 +53,26 @@ validation: query_by_type: name: Query by Type query: "MATCH (a:gene)-[e]-(b) WHERE 'chemical_substance' IN b.category RETURN count(distinct(a)), count(distinct(b))" + smiles_values: + name: Query Chemicals with smiles that look like arrays + query: "Match (a: chemical_substance { simple_smiles: '$var' }) RETURN a.id" + args: + - var: "[Os+6]" + - var: "[SiH2]" + - var: "[CH]" + - var: "[S-2]" + - var: "[Ti+4]" + - var: "[P-3]" + - var: "[Ca+2]" + - var: "[Au+3]" + - var: "[TeH2]" + - var: "[Pb]" + - var: "[B+]" + - var: "[AsH]" + - var: "[O-][I+2]([O-])[O-]" + - var: "[He+]" + - var: "[Mo+6]" + - var: "[N-]=[N+]=[N-]" + - var: "[Ag+]" + - var: "[Zn+2]" + - var: "[C-]#[O+]" \ No newline at end of file diff --git a/roger/core.py b/roger/core.py index 2a2d6322..7d4edc3e 100644 --- a/roger/core.py +++ b/roger/core.py @@ -263,11 +263,10 @@ def create_schema (self): predicate_schemas[predicate] = predicate_schemas.get(predicate, {}) for k in edge.keys (): current_type = type(edge[k]).__name__ - if not k in predicate_schemas[predicate]: + if k not in predicate_schemas[predicate]: predicate_schemas[predicate][k] = current_type else: previous_type = predicate_schemas[predicate][k] - # prefering lists over strings over bools over floats over ints predicate_schemas[predicate][k] = TypeConversionUtil.compare_types(previous_type, current_type) """ Infer node schemas. """ for node in graph['nodes']: @@ -275,7 +274,7 @@ def create_schema (self): category_schemas[node_type] = category_schemas.get(node_type, {}) for k in node.keys (): current_type = type(node[k]).__name__ - if not k in category_schemas[node_type]: + if k not in category_schemas[node_type]: category_schemas[node_type][k] = current_type else: previous_type = category_schemas[node_type][k] @@ -521,7 +520,8 @@ def create_redis_schema_header(attributes: dict, is_relation=False): col_headers = list(filter(lambda x: x != 'id:STRING', col_headers)) return col_headers - def group_items_by_attributes_set(self, objects: list, processed_object_ids: set): + @staticmethod + def group_items_by_attributes_set(objects: list, processed_object_ids: set): """ Groups items into a dictionary where the keys are sets of attributes set for all items accessed in that key. @@ -532,14 +532,15 @@ def group_items_by_attributes_set(self, objects: list, processed_object_ids: set :return: dictionary grouping based on set attributes """ clustered_by_set_values = {} - improper_redis_keys = set() + improper_keys = set() + value_set_test = lambda x: True if (x is not None and x != [] and x != '') else False for obj in objects: # redis bulk loader needs columns not to include ':' # till backticks are implemented we should avoid these. key_filter = lambda k: ':' not in k - keys_with_values = frozenset([k for k in obj.keys() if obj[k] and key_filter(k)]) + keys_with_values = frozenset([k for k in obj.keys() if value_set_test(obj[k]) and key_filter(k)]) for key in [k for k in obj.keys() if obj[k] and not key_filter(k)]: - improper_redis_keys.add(key) + improper_keys.add(key) # group by attributes that have values. # Why? # Redis bulk loader has one issue # imagine we have {'name': 'x'} , {'name': 'y', 'is_metabolite': true} @@ -551,7 +552,7 @@ def group_items_by_attributes_set(self, objects: list, processed_object_ids: set if obj['id'] not in processed_object_ids: clustered_by_set_values[keys_with_values] = clustered_by_set_values.get(keys_with_values, []) clustered_by_set_values[keys_with_values].append(obj) - return clustered_by_set_values, improper_redis_keys + return clustered_by_set_values, improper_keys def write_bulk(self, bulk_path, obj_map, schema, state={}, is_relation=False, f=None): """ Write a bulk load group of objects. @@ -580,10 +581,10 @@ def write_bulk(self, bulk_path, obj_map, schema, state={}, is_relation=False, f= items = clustered_by_set_values[set_attributes] # When parted files are saved let the file names be collected here state['file_paths'] = state.get('file_paths', {}) - if key not in state['file_paths']: - state['file_paths'][key] = {set_attributes: ''} - out_file = state['file_paths'].get(key, {}).get(set_attributes, '') - + state['file_paths'][key] = state['file_paths'].get(key, {}) + out_file = state['file_paths'][key][set_attributes] = state['file_paths']\ + .get(key, {})\ + .get(set_attributes, '') # When calling write bulk , lets say we have processed some # chemicals from file 1 and we start processing file 2 # if we are using just index then we might (rather will) end up adding @@ -591,6 +592,7 @@ def write_bulk(self, bulk_path, obj_map, schema, state={}, is_relation=False, f= # by adding called_x_times , if we already found out-file from state obj # we are sure that the schemas match. out_file = f"{bulk_path}/{key}.csv-{index}-{called_x_times}" if not out_file else out_file + state['file_paths'][key][set_attributes] = out_file # store back file name new_file = not os.path.exists(out_file) keys_for_header = {x: all_keys[x] for x in all_keys if x in set_attributes} redis_schema_header = self.create_redis_schema_header(keys_for_header, is_relation) diff --git a/roger/test/data/bulk/named_thing.csv-0-1 b/roger/test/data/bulk/named_thing.csv-0-1 new file mode 100644 index 00000000..511a1f01 --- /dev/null +++ b/roger/test/data/bulk/named_thing.csv-0-1 @@ -0,0 +1,4 @@ +str:STRING|list_attr:ARRAY|bool_attr:BOOL|float_attr:FLOAT|int_attr:INT|id:ID +name|['x']|False|0.1|0|ID:1 +name|['x']|False|0.1|0|ID:1 +name|['x']|False|0.1|0|ID:1 diff --git a/roger/test/data/conflicting_prop_types.schema.edges.kgx.json b/roger/test/data/conflicting_prop_types.schema.edges.kgx.json new file mode 100644 index 00000000..60ba1689 --- /dev/null +++ b/roger/test/data/conflicting_prop_types.schema.edges.kgx.json @@ -0,0 +1,52 @@ +{ + "nodes": [], + "edges":[ + { + "id": "edge_1", + "edge_label": "edge_type_1", + "list_vs_str": [], + "list_vs_int": [], + "list_vs_bool": [], + "list_vs_float": [], + "str_vs_float": "", + "str_vs_bool": "", + "str_vs_int": "", + "int_vs_bool": 0, + "int_vs_float": 0, + "float_vs_bool": 0 + },{ + "id": "edge_2", + "edge_label": "edge_type_1", + "list_vs_str": "", + "list_vs_int": 0, + "list_vs_bool": true, + "list_vs_float": 0.0, + "str_vs_float": 0.0, + "str_vs_bool": false, + "str_vs_int": 0, + "int_vs_bool": true, + "int_vs_float": 0.0, + "float_vs_bool": true + + } + ], + "expected_schema": { + "predicate-schema.json": { + "edge_type_1": { + "id": "str", + "edge_label": "str", + "list_vs_str": "list", + "list_vs_int": "list", + "list_vs_bool": "list", + "list_vs_float": "list", + "str_vs_float": "str", + "str_vs_bool": "str", + "str_vs_int": "str", + "int_vs_bool": "str", + "int_vs_float": "str", + "float_vs_bool": "str" + } + }, + "category-schema.json": {} + } +} \ No newline at end of file diff --git a/roger/test/data/conflicting_prop_types.schema.nodes.kgx.json b/roger/test/data/conflicting_prop_types.schema.nodes.kgx.json new file mode 100644 index 00000000..7816036b --- /dev/null +++ b/roger/test/data/conflicting_prop_types.schema.nodes.kgx.json @@ -0,0 +1,52 @@ +{ + "nodes":[ + { + "id": "node_1", + "category": ["named_thing"], + "list_vs_str": [], + "list_vs_int": [], + "list_vs_bool": [], + "list_vs_float": [], + "str_vs_float": "", + "str_vs_bool": "", + "str_vs_int": "", + "int_vs_bool": 0, + "int_vs_float": 0, + "float_vs_bool": 0 + },{ + "id": "node_1", + "category": ["named_thing"], + "list_vs_str": "", + "list_vs_int": 0, + "list_vs_bool": true, + "list_vs_float": 0.0, + "str_vs_float": 0.0, + "str_vs_bool": false, + "str_vs_int": 0, + "int_vs_bool": true, + "int_vs_float": 0.0, + "float_vs_bool": true + + } + ], + "edges":[], + "expected_schema": { + "category-schema.json": { + "named_thing": { + "id": "str", + "category": "list", + "list_vs_str": "list", + "list_vs_int": "list", + "list_vs_bool": "list", + "list_vs_float": "list", + "str_vs_float": "str", + "str_vs_bool": "str", + "str_vs_int": "str", + "int_vs_bool": "str", + "int_vs_float": "str", + "float_vs_bool": "str" + } + }, + "predicate-schema.json": {} + } +} \ No newline at end of file diff --git a/roger/test/data/non_conflicting_prop_types.schema.kgx.json b/roger/test/data/non_conflicting_prop_types.schema.kgx.json new file mode 100644 index 00000000..b7b5cd10 --- /dev/null +++ b/roger/test/data/non_conflicting_prop_types.schema.kgx.json @@ -0,0 +1,91 @@ +{ + "nodes": [ + { + "id": "ID1", + "category": ["named_thing"], + "list_attr": [], + "bool_attr": false, + "int_attr": 0 + }, + { + "id": "ID2", + "category": ["named_thing"], + "str_attr": "", + "float_attr": 0.0 + }, + { + "id": "Id3", + "category": ["some_other_type"], + "attr_1": "", + "attr_2": [], + "attr_3": true, + "attr_4": 1 + } + ], + "edges": [ + { + "edge_label": "edge_label_1", + "id": "x", + "bool_attr": false, + "int_attr": 0, + "float_attr": 0.0 + },{ + "edge_label": "edge_label_1", + "id": "x3", + "str_attr": "str", + "list_attr": [] + },{ + "edge_label": "edge_label_2", + "id": "x4", + "str_attr": "str" + }, { + "edge_label": "edge_label_2", + "id": "x3", + "bool_attr": true, + "float_attr": 2.33, + "int_attr": 3092, + "str_att": "name" + } + ], + "expected_schema": { + "category-schema.json": { + "named_thing": { + "str_attr": "str", + "list_attr": "list", + "bool_attr": "bool", + "int_attr": "int", + "float_attr": "float", + "id": "str", + "category": "list" + }, + "some_other_type": { + "id": "str", + "category": "list", + "attr_1": "str", + "attr_2": "list", + "attr_3": "bool", + "attr_4": "int" + } + }, + "predicate-schema.json": { + "edge_label_1": { + "id": "str", + "edge_label": "str", + "str_attr": "str", + "list_attr": "list", + "bool_attr": "bool", + "int_attr": "int", + "float_attr": "float" + }, + "edge_label_2": { + "id": "str", + "str_attr": "str", + "edge_label": "str", + "bool_attr": "bool", + "float_attr": "float", + "int_attr": "int", + "str_att": "str" + } + } + } +} \ No newline at end of file diff --git a/roger/test/mocks.py b/roger/test/mocks.py new file mode 100644 index 00000000..eac01ee1 --- /dev/null +++ b/roger/test/mocks.py @@ -0,0 +1,71 @@ +import os +from roger.core import SchemaType + + +class BiolinkMock: + def __init__(self): + self.leafs = [ + 'chemical_substance', + 'molecular_activity', + 'gene', + 'biological_process', + 'disease', + 'phenotypic_feature' + ] + + def get_leaf_class(self, class_names): + for y in self.leafs: + if y in class_names: + return y + return class_names[0] + + +class UtilMock: + category = None + predicates = None + file_content_assertions = {} + kgx_files = [] + merged_files = [] + schema = { + SchemaType.PREDICATE: {}, + SchemaType.CATEGORY: {} + } + + def __init__(self): + pass + + @staticmethod + def kgx_objects(): + return [os.path.join(*os.path.split(__file__)[:-1], 'data', file) for file in UtilMock.kgx_files] + + @staticmethod + def merged_objects(): + return [os.path.join(*os.path.split(__file__)[:-1], 'data', file) for file in UtilMock.merged_files] + + @staticmethod + def bulk_path(*args, **kwargs): + return os.path.join(*os.path.split(__file__)[:-1], 'data', 'bulk') + + @staticmethod + def is_up_to_date(*args, **kwargs): + return False + + @staticmethod + def schema_path(name, *args, **kwargs): + return name + + @staticmethod + def read_schema(schema_type: SchemaType, *args, **kwargs): + return UtilMock.schema[schema_type] + + @staticmethod + def read_object(path, *args, **kwargs): + import json + with open(path) as f: + return json.load(f) + + @staticmethod + def write_object(dictionary, file_name): + print(dictionary, file_name) + print(UtilMock.file_content_assertions) + assert UtilMock.file_content_assertions[file_name] == dictionary \ No newline at end of file diff --git a/roger/test/test_KGX_Model.py b/roger/test/test_KGX_Model.py new file mode 100644 index 00000000..b019a76a --- /dev/null +++ b/roger/test/test_KGX_Model.py @@ -0,0 +1,47 @@ +import pytest +import json +from unittest.mock import patch +from roger.core import KGXModel +from roger.test.mocks import BiolinkMock, UtilMock + + +@pytest.fixture +def kgx_model(): + biolink = BiolinkMock() + kgx_model = KGXModel(biolink=biolink, config={}) + return kgx_model + + +def setup_mock_and_run_create_schema(test_file_name, kgx_model: KGXModel): + with patch('roger.core.Util', UtilMock): + UtilMock.kgx_files = [test_file_name] + with open(UtilMock.kgx_objects()[0]) as f: + + expected = json.load(f)['expected_schema'] + UtilMock.file_content_assertions = expected + kgx_model.create_schema() + + + +def test_create_schema_plain(kgx_model: KGXModel): + file_name = 'non_conflicting_prop_types.schema.kgx.json' + setup_mock_and_run_create_schema(file_name, kgx_model=kgx_model) + + +def test_create_schema_conflicting_nodes(kgx_model: KGXModel): + file_name = 'conflicting_prop_types.schema.nodes.kgx.json' + setup_mock_and_run_create_schema(file_name, kgx_model=kgx_model) + + +def test_create_schema_conflicting_edges(kgx_model: KGXModel): + file_name = 'conflicting_prop_types.schema.edges.kgx.json' + setup_mock_and_run_create_schema(file_name, kgx_model=kgx_model) + + +def test_merge(kgx_model: KGXModel): + with patch('roger.core.Util', UtilMock): + UtilMock.kgx_files = [ + 'data_1.merge.kgx.json', + 'data_2.merge.kgx.json' + ] + #@TODO add tests for merge nodes \ No newline at end of file diff --git a/roger/test/test_bulk_loader.py b/roger/test/test_bulk_loader.py new file mode 100644 index 00000000..867ea843 --- /dev/null +++ b/roger/test/test_bulk_loader.py @@ -0,0 +1,110 @@ +import pytest, os +from roger.core import BulkLoad +from roger.test.mocks import BiolinkMock, UtilMock +from unittest.mock import patch + + +@pytest.fixture +def bulk_loader(): + biolink = BiolinkMock() + return BulkLoad(biolink=biolink, config={'separator': 30}) + + +def test_create_redis_schema(): + test_schema = { + 'concept': { + 'attribute0': 'list', + 'attribute1': 'str', + 'attribute2': 'int', + 'attribute3': 'bool' + } + } + redis_schema = BulkLoad.create_redis_schema_header(test_schema['concept'], is_relation=False) + assert 'attribute0:ARRAY' in redis_schema + assert 'attribute1:STRING' in redis_schema + assert 'attribute2:INT' in redis_schema + assert 'attribute3:BOOL' in redis_schema + + redis_schema = BulkLoad.create_redis_schema_header(test_schema['concept'], is_relation=True) + assert 'attribute0:ARRAY' in redis_schema + assert 'attribute1:STRING' in redis_schema + assert 'attribute2:INT' in redis_schema + assert 'attribute3:BOOL' in redis_schema + + # should add these columns to relationships + assert 'internal_start_id:START_ID' in redis_schema + assert 'internal_end_id:END_ID' in redis_schema + + +def test_group_by_set_attr(): + items = [ + { # we need to make sure that empty values are the only ones ignored + # not values that evaluate to false. + 'id': 0, + 'attr_1': '', + 'attr_2': 2, + 'attr_3': [], + 'attr_4': False, + 'attr_5': None + }, + { + 'id': 1, + 'attr_1': 'a', + 'attr_2': 'b', + 'attr_3': 'c', + 'attr_4': '' + } + ] + # first group is attr_2, attr_4, 'id' + group_1 = frozenset(['attr_2', 'attr_4', 'id']) + # second group is attr_1, attr_2, attr_3 , 'id' + group_2 = frozenset(['attr_1', 'attr_2', 'attr_3', 'id']) + grouping, invalid_keys = BulkLoad.group_items_by_attributes_set(objects=items, + processed_object_ids=set()) + assert group_1 in grouping + assert group_2 in grouping + + assert items[0] in grouping[group_1] + assert items[1] in grouping[group_2] + + +def test_write_bulk_nodes(bulk_loader: BulkLoad): + nodes_schema = { + "named_thing": { + "id": "str", + "str": "str", + "list_attr": "list", + "bool_attr": "bool", + "float_attr": "float", + "int_attr": "int" + } + } + node_objects = { + "named_thing": [ + { + "id": "ID:1", + "str": "name", + "list_attr": ["x"], + "bool_attr": False, + "float_attr": 0.1, + "int_attr": 0 + } + ] + } + with patch('roger.core.Util', UtilMock): + bulk_path = UtilMock.bulk_path() + state = {} + bulk_loader.write_bulk(bulk_path=bulk_path, + obj_map=node_objects, + schema=nodes_schema, + state=state, + is_relation=False) + assert len(state['file_paths']) > 0 + # @TODO add assertions. + # with open(os.path.join(bulk_path,'named_thing_csv-0-1')) + + + + + + diff --git a/roger/test/test_type_conversion_util.py b/roger/test/test_type_conversion_util.py new file mode 100644 index 00000000..ab4e122f --- /dev/null +++ b/roger/test/test_type_conversion_util.py @@ -0,0 +1,49 @@ +from roger.components.data_conversion_utils import TypeConversionUtil + + +def test_type_comparision(): + datatype_1 = list.__name__ + datatype_2 = str.__name__ + datatype_3 = bool.__name__ + datatype_4 = float.__name__ + datatype_5 = int.__name__ + # list should always come first + assert datatype_1 == TypeConversionUtil.compare_types(datatype_1, datatype_2) + assert datatype_1 == TypeConversionUtil.compare_types(datatype_1, datatype_3) + assert datatype_1 == TypeConversionUtil.compare_types(datatype_1, datatype_4) + assert datatype_1 == TypeConversionUtil.compare_types(datatype_1, datatype_5) + + # then string + assert datatype_2 == TypeConversionUtil.compare_types(datatype_2, datatype_3) + assert datatype_2 == TypeConversionUtil.compare_types(datatype_2, datatype_4) + assert datatype_2 == TypeConversionUtil.compare_types(datatype_2, datatype_5) + + # the rest should always be casted up to string + assert datatype_2 == TypeConversionUtil.compare_types(datatype_3, datatype_4) + assert datatype_2 == TypeConversionUtil.compare_types(datatype_4, datatype_5) + assert datatype_2 == TypeConversionUtil.compare_types(datatype_5, datatype_3) + + # should raise error when sent 'Unknown' data types + bogus_dt = "bogus" + try: + TypeConversionUtil.compare_types(bogus_dt, datatype_1) + except AssertionError as error: + exception_raised = True + assert exception_raised + try: + TypeConversionUtil.compare_types(datatype_1, bogus_dt) + except AssertionError as error: + exception_raised = True + assert exception_raised + + +def test_casting_values(): + castable = [ + ["True", bool.__name__, True], + [1 , bool.__name__, True], + [1.0, bool.__name__, True], + [[], bool.__name__, False] + ] + for items in castable: + assert items[-1] == TypeConversionUtil.cast(*items[:-1]) # cast (value, type) + From 47ff9f29d252e2b6026c0d232fe8542697ed3312 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 14:19:19 -0500 Subject: [PATCH 017/332] testing out kubernetes executor --- tranql_translate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tranql_translate.py b/tranql_translate.py index fc5ee36c..1c5b5b62 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -29,7 +29,7 @@ ) as dag: """ Configure use of KubernetesExecutor. """ - at_k8s=False + at_k8s=True def get_executor_config (annotations=None): """ Get an executor configuration. @@ -38,7 +38,8 @@ def get_executor_config (annotations=None): """ k8s_executor_config = { "KubernetesExecutor": { - "annotations": annotations + "annotations": annotations, + "image": "renciorg/airflow:1.10.12-python3.8" } } return k8s_executor_config if at_k8s else None @@ -97,4 +98,3 @@ def create_python_task (name, a_callable): """ Build the DAG. """ intro >> get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ bulk_load >> validate >> finish - From 05c11faae28e61846ff9a84b68fdc895680cfe02 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 16:58:27 -0500 Subject: [PATCH 018/332] adding exit code to bash operator --- tranql_translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tranql_translate.py b/tranql_translate.py index 1c5b5b62..9dc60deb 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -86,7 +86,7 @@ def create_python_task (name, a_callable): ) """ Build the workflow tasks. """ - intro = BashOperator(task_id='Intro', bash_command='echo running tranql translator') + intro = BashOperator(task_id='Intro', bash_command='echo running tranql translator && exit 0') get_kgx = create_python_task ("GetSource", RogerUtil.get_kgx) create_schema = create_python_task ("CreateSchema", RogerUtil.create_schema) merge_nodes = create_python_task ("MergeNodes", RogerUtil.merge_nodes) From e9f533277a629cc6eae3058d3ef4eaa43da23586 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 17:05:20 -0500 Subject: [PATCH 019/332] Skipping intro --- tranql_translate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tranql_translate.py b/tranql_translate.py index 9dc60deb..31046b89 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -96,5 +96,6 @@ def create_python_task (name, a_callable): finish = BashOperator (task_id='Finish', bash_command='echo finish') """ Build the DAG. """ - intro >> get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ + # intro >> \ + get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ bulk_load >> validate >> finish From 2a3ff1b22d8f24b8d405ec7b619921f7df3ca1b3 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 7 Jan 2021 17:09:00 -0500 Subject: [PATCH 020/332] adding back intro --- tranql_translate.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tranql_translate.py b/tranql_translate.py index 31046b89..9dc60deb 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -96,6 +96,5 @@ def create_python_task (name, a_callable): finish = BashOperator (task_id='Finish', bash_command='echo finish') """ Build the DAG. """ - # intro >> \ - get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ + intro >> get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ bulk_load >> validate >> finish From 544cf855ad262c6b20532b2f730d91d19dd2297c Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Thu, 7 Jan 2021 20:05:50 -0500 Subject: [PATCH 021/332] testing out with built image --- docker/airflow/{DockerFile => Dockerfile} | 0 tranql_translate.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docker/airflow/{DockerFile => Dockerfile} (100%) diff --git a/docker/airflow/DockerFile b/docker/airflow/Dockerfile similarity index 100% rename from docker/airflow/DockerFile rename to docker/airflow/Dockerfile diff --git a/tranql_translate.py b/tranql_translate.py index 9dc60deb..fc0e23fe 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -39,7 +39,7 @@ def get_executor_config (annotations=None): k8s_executor_config = { "KubernetesExecutor": { "annotations": annotations, - "image": "renciorg/airflow:1.10.12-python3.8" + "image": "renciorg/airflow-roger:latest" } } return k8s_executor_config if at_k8s else None From 54a3c49739f38ea57f5cfc02892be0bedc7d69e7 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Fri, 8 Jan 2021 08:55:11 -0500 Subject: [PATCH 022/332] testing out k8s setup with pod override --- tranql_translate_k8s.py | 118 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 tranql_translate_k8s.py diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py new file mode 100644 index 00000000..90944039 --- /dev/null +++ b/tranql_translate_k8s.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# + +""" +An Airflow workflow for the Roger Translator KGX data pipeline. +""" + +import os +import subprocess +from airflow.operators.bash_operator import BashOperator +from airflow.contrib.example_dags.libs.helper import print_stuff +from airflow.models import DAG +from airflow.operators.python_operator import PythonOperator +from airflow.utils.dates import days_ago +from roger.core import RogerUtil +from roger.Config import get_default_config as get_config +from roger.roger_util import get_logger +from kubernetes.client import models as k8s + + +default_args = { + 'owner': 'RENCI', + 'start_date': days_ago(1) +} + +""" Build the workflow's tasks and DAG. """ +with DAG( + dag_id='tranql_translate', + default_args=default_args, + schedule_interval=None +) as dag: + + """ Configure use of KubernetesExecutor. """ + at_k8s=True + + def get_executor_config (annotations=None): + """ Get an executor configuration. + :param annotations: Annotations to attach to the executor. + :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. + """ + k8s_executor_config = { + "pod_override": k8s.V1Pod( + spec=k8s.V1PodSpec( + containers=[ + k8s.V1Container( + name="base", + volume_mounts=[ + k8s.V1VolumeMount( + mount_path="/foo/", name="example-kubernetes-test-volume" + ) + ], + ) + ], + volumes=[ + k8s.V1Volume( + name="example-kubernetes-test-volume", + host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), + ) + ], + ) + ), + } + return k8s_executor_config if at_k8s else None + + def task_wrapper(python_callable, **kwargs): + """ + Overrides configuration with config from airflow. + :param python_callable: + :param kwargs: + :return: + """ + # get dag config provided + dag_run = kwargs.get('dag_run') + dag_conf = {} + logger = get_logger() + config = get_config() + if dag_run: + dag_conf = dag_run.conf + # remove this since to send every other argument to the python callable. + del kwargs['dag_run'] + # overrides values + config.update(dag_conf) + logger.info("Config") + logger.info(config) + return python_callable(to_string=True, config=config) + + def create_python_task (name, a_callable): + """ Create a python task. + :param name: The name of the task. + :param a_callable: The code to run in this task. + """ + return PythonOperator( + task_id=name, + python_callable=task_wrapper, + op_kwargs={ + "python_callable": a_callable, + "to_string": True + }, + executor_config=get_executor_config (annotations={ + "task_name" : name + }), + dag=dag, + provide_context=True + ) + + """ Build the workflow tasks. """ + intro = BashOperator(task_id='Intro', bash_command='echo running tranql translator && exit 0') + get_kgx = create_python_task ("GetSource", RogerUtil.get_kgx) + create_schema = create_python_task ("CreateSchema", RogerUtil.create_schema) + merge_nodes = create_python_task ("MergeNodes", RogerUtil.merge_nodes) + create_bulk_load = create_python_task ("CreateBulkLoad", RogerUtil.create_bulk_load) + bulk_load = create_python_task ("BulkLoad", RogerUtil.bulk_load) + validate = create_python_task ("Validate", RogerUtil.validate) + finish = BashOperator (task_id='Finish', bash_command='echo finish') + + """ Build the DAG. """ + intro >> get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ + bulk_load >> validate >> finish From ba29a9343d4e60d629ec6ea91dc93306eb18205a Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Fri, 8 Jan 2021 08:56:53 -0500 Subject: [PATCH 023/332] testing out k8s setup with pod override --- tranql_translate_k8s.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 90944039..c45b125e 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -25,7 +25,7 @@ """ Build the workflow's tasks and DAG. """ with DAG( - dag_id='tranql_translate', + dag_id='tranql_translate_k8s_pod_overide_test', default_args=default_args, schedule_interval=None ) as dag: From 634698eabe773c14663aa29865e85e8a27b4fde8 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Fri, 8 Jan 2021 09:08:08 -0500 Subject: [PATCH 024/332] testing out k8s setup with pod override --- tranql_translate_k8s.py | 46 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index c45b125e..ce400899 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -39,26 +39,32 @@ def get_executor_config (annotations=None): :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. """ k8s_executor_config = { - "pod_override": k8s.V1Pod( - spec=k8s.V1PodSpec( - containers=[ - k8s.V1Container( - name="base", - volume_mounts=[ - k8s.V1VolumeMount( - mount_path="/foo/", name="example-kubernetes-test-volume" - ) - ], - ) - ], - volumes=[ - k8s.V1Volume( - name="example-kubernetes-test-volume", - host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), - ) - ], - ) - ), + "KubernetesExecutor": { + "annotations": annotations, + "image": "renciorg/airflow:1.10.12-python3.8" + } + + + # "pod_override": k8s.V1Pod( + # spec=k8s.V1PodSpec( + # containers=[ + # k8s.V1Container( + # name="base", + # volume_mounts=[ + # k8s.V1VolumeMount( + # mount_path="/foo/", name="example-kubernetes-test-volume" + # ) + # ], + # ) + # ], + # volumes=[ + # k8s.V1Volume( + # name="example-kubernetes-test-volume", + # host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), + # ) + # ], + # ) + # ), } return k8s_executor_config if at_k8s else None From f6dde690064d76c903bbfd6f7143f47fbd0a6925 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Fri, 8 Jan 2021 09:24:16 -0500 Subject: [PATCH 025/332] testing pod override --- tranql_translate_k8s.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index ce400899..edc0e22b 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -39,9 +39,15 @@ def get_executor_config (annotations=None): :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. """ k8s_executor_config = { - "KubernetesExecutor": { - "annotations": annotations, - "image": "renciorg/airflow:1.10.12-python3.8" + "pod_override": { + "containers": [ + {"name": "base", + "env": { + "name": "testing", + "value": "some_val" + } + } + ] } From c74e866924362bd1a31d0a47fc76559d4eb3c1e0 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 11 Jan 2021 14:17:20 -0500 Subject: [PATCH 026/332] git+ for bulkloader requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ec461207..e98fb19c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -142,7 +142,7 @@ rdflib-jsonld==0.5.0 redis==3.5.3 redisgraph==2.1.5 #redisgraph-bulk-loader==0.9.3 , @TODO select next version once its released -https://github.com/RedisGraph/redisgraph-bulk-loader.git +git+https://github.com/RedisGraph/redisgraph-bulk-loader.git requests==2.24.0 Send2Trash==1.5.0 setproctitle==1.1.10 From 4e6229260a404401a2f1c824840a306a627d6df2 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 11 Jan 2021 16:43:46 -0500 Subject: [PATCH 027/332] add psycopg2-binary to requirments --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index e98fb19c..dfaa8f30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -112,6 +112,7 @@ prologterms==0.0.6 prometheus-client==0.8.0 prompt-toolkit==3.0.8 psutil==5.7.2 +psycopg2-binary=2.8.6 PTable==0.9.2 ptyprocess==0.6.0 py==1.9.0 From 644dc15e8f020e3d60ffc77f9d394cf05aaea434 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 11 Jan 2021 16:48:16 -0500 Subject: [PATCH 028/332] add psycopg2-binary to requirments --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dfaa8f30..8531614a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -112,7 +112,7 @@ prologterms==0.0.6 prometheus-client==0.8.0 prompt-toolkit==3.0.8 psutil==5.7.2 -psycopg2-binary=2.8.6 +psycopg2-binary==2.8.6 PTable==0.9.2 ptyprocess==0.6.0 py==1.9.0 From 2286a45fe730ceb25e9f5d3e402202eff47f6ec5 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Tue, 12 Jan 2021 08:55:59 -0500 Subject: [PATCH 029/332] changing image for test --- tranql_translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tranql_translate.py b/tranql_translate.py index fc0e23fe..9989fde2 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -39,7 +39,7 @@ def get_executor_config (annotations=None): k8s_executor_config = { "KubernetesExecutor": { "annotations": annotations, - "image": "renciorg/airflow-roger:latest" + "image": "renciorg/roger-executor:latest" } } return k8s_executor_config if at_k8s else None From 54973b2f8b73af570e98c7d9f10ec401a9f2eb2e Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 12 Jan 2021 12:02:02 -0500 Subject: [PATCH 030/332] testing out pod k8s override --- tranql_translate_k8s.py | 43 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index edc0e22b..44d2d4b0 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -39,38 +39,17 @@ def get_executor_config (annotations=None): :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. """ k8s_executor_config = { - "pod_override": { - "containers": [ - {"name": "base", - "env": { - "name": "testing", - "value": "some_val" - } - } - ] - } - - - # "pod_override": k8s.V1Pod( - # spec=k8s.V1PodSpec( - # containers=[ - # k8s.V1Container( - # name="base", - # volume_mounts=[ - # k8s.V1VolumeMount( - # mount_path="/foo/", name="example-kubernetes-test-volume" - # ) - # ], - # ) - # ], - # volumes=[ - # k8s.V1Volume( - # name="example-kubernetes-test-volume", - # host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), - # ) - # ], - # ) - # ), + "pod_override": k8s.V1Pod( + spec=k8s.V1PodSpec( + containers=[ + k8s.V1Container( + name="base", + image="renciorg/roger-executor:0.1", + image_pull_policy="Always" + ) + ] + ) + ), } return k8s_executor_config if at_k8s else None From 31a8c29f834a4bcfe3ef846d2ae290608af2bc0e Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Tue, 12 Jan 2021 12:11:22 -0500 Subject: [PATCH 031/332] to_dict on pod def --- tranql_translate_k8s.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 44d2d4b0..d91a7222 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -49,7 +49,7 @@ def get_executor_config (annotations=None): ) ] ) - ), + ).to_dict(), } return k8s_executor_config if at_k8s else None From cab7b957c5362771f58401975a3bc08c991f913d Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Tue, 12 Jan 2021 22:15:31 -0500 Subject: [PATCH 032/332] testing volume override --- tranql_translate_k8s.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index d91a7222..c00c3f80 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -39,18 +39,17 @@ def get_executor_config (annotations=None): :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. """ k8s_executor_config = { - "pod_override": k8s.V1Pod( - spec=k8s.V1PodSpec( - containers=[ - k8s.V1Container( - name="base", - image="renciorg/roger-executor:0.1", - image_pull_policy="Always" - ) - ] - ) - ).to_dict(), + "KubernetesExecutor": { + "volume_mounts": [ + { + "name": "airflow-dags", + "readOnly": "true", + "mountPath": "/opt/airflow/dags", + "subPath": "dags" + }, + ] } + } return k8s_executor_config if at_k8s else None def task_wrapper(python_callable, **kwargs): From 22212537e665dbb5339d677ea3a29ae96a16f90b Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 12 Jan 2021 22:20:22 -0500 Subject: [PATCH 033/332] baseline testing volumes --- tranql_translate_k8s.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index c00c3f80..9b74004a 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -40,14 +40,7 @@ def get_executor_config (annotations=None): """ k8s_executor_config = { "KubernetesExecutor": { - "volume_mounts": [ - { - "name": "airflow-dags", - "readOnly": "true", - "mountPath": "/opt/airflow/dags", - "subPath": "dags" - }, - ] + "volume_mounts": [] } } return k8s_executor_config if at_k8s else None From bc3a30e4269d4175cc1ea44d3f0f3414fb7037cb Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 12 Jan 2021 22:47:01 -0500 Subject: [PATCH 034/332] baseline testing volumes --- tranql_translate_k8s.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 9b74004a..87d4465e 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -40,7 +40,18 @@ def get_executor_config (annotations=None): """ k8s_executor_config = { "KubernetesExecutor": { - "volume_mounts": [] + "volumes": [ + { + "name": "test-data", + "emptyDir": {}, + }, + ], + "volume_mounts": [ + { + "mountPath": "/opt/test", + "name": "test-data", + }, + ] } } return k8s_executor_config if at_k8s else None From c3c36a822a6462d32e6f28cb4c5673da2216c492 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 12 Jan 2021 22:54:01 -0500 Subject: [PATCH 035/332] baseline testing volumes --- tranql_translate_k8s.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 87d4465e..a7f2213f 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -54,7 +54,7 @@ def get_executor_config (annotations=None): ] } } - return k8s_executor_config if at_k8s else None + return k8s_executor_config # if at_k8s else None def task_wrapper(python_callable, **kwargs): """ @@ -90,9 +90,25 @@ def create_python_task (name, a_callable): "python_callable": a_callable, "to_string": True }, - executor_config=get_executor_config (annotations={ - "task_name" : name - }), + executor_config={ + "KubernetesExecutor": { + "volumes": [ + { + "name": "test-data", + "emptyDir": {}, + }, + ], + "volume_mounts": [ + { + "mountPath": "/opt/test", + "name": "test-data", + }, + ] + } + }, + # get_executor_config (annotations={ + # "task_name" : name + # }), dag=dag, provide_context=True ) From 864e0ad5e10be1593772e13c7f69e4b02cc66908 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 12 Jan 2021 22:58:25 -0500 Subject: [PATCH 036/332] baseline testing volumes --- tranql_translate_k8s.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index a7f2213f..16b2ece6 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -54,7 +54,7 @@ def get_executor_config (annotations=None): ] } } - return k8s_executor_config # if at_k8s else None + return k8s_executor_config if at_k8s else None def task_wrapper(python_callable, **kwargs): """ @@ -90,31 +90,17 @@ def create_python_task (name, a_callable): "python_callable": a_callable, "to_string": True }, - executor_config={ - "KubernetesExecutor": { - "volumes": [ - { - "name": "test-data", - "emptyDir": {}, - }, - ], - "volume_mounts": [ - { - "mountPath": "/opt/test", - "name": "test-data", - }, - ] - } - }, - # get_executor_config (annotations={ - # "task_name" : name - # }), + executor_config=get_executor_config (annotations={ + "task_name" : name + }), dag=dag, provide_context=True ) """ Build the workflow tasks. """ - intro = BashOperator(task_id='Intro', bash_command='echo running tranql translator && exit 0') + intro = BashOperator(task_id='Intro', + bash_command='echo running tranql translator && exit 0', + executor_config= get_executor_config()) get_kgx = create_python_task ("GetSource", RogerUtil.get_kgx) create_schema = create_python_task ("CreateSchema", RogerUtil.create_schema) merge_nodes = create_python_task ("MergeNodes", RogerUtil.merge_nodes) From 8977ae5fea3d6df85e0f4768348bea096fe4354f Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 12 Jan 2021 23:05:06 -0500 Subject: [PATCH 037/332] baseline testing volumes --- tranql_translate_k8s.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 16b2ece6..d12891d8 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -51,6 +51,10 @@ def get_executor_config (annotations=None): "mountPath": "/opt/test", "name": "test-data", }, + { + "name": "airflow-dags", + "mountPath": "/opt/airflow/dags" + } ] } } From 61176fc48bb11bd8b0eff6b791f8cabb77688de2 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 00:45:42 -0500 Subject: [PATCH 038/332] baseline testing volumes --- tranql_translate_k8s.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index d12891d8..09f68bb0 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -40,6 +40,10 @@ def get_executor_config (annotations=None): """ k8s_executor_config = { "KubernetesExecutor": { + "env": { + "name": "test", + "value": "val" + }, "volumes": [ { "name": "test-data", From 3d4f8f7552f9ebd8360184385215b7cb679b7d5f Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 01:01:00 -0500 Subject: [PATCH 039/332] baseline testing volumes --- tranql_translate_k8s.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 09f68bb0..555e9f45 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -40,10 +40,6 @@ def get_executor_config (annotations=None): """ k8s_executor_config = { "KubernetesExecutor": { - "env": { - "name": "test", - "value": "val" - }, "volumes": [ { "name": "test-data", @@ -59,7 +55,14 @@ def get_executor_config (annotations=None): "name": "airflow-dags", "mountPath": "/opt/airflow/dags" } - ] + ], + "pod_override": { + "containers": [ + { + "env": [{"name": "XX", "value": "test"}] + } + ] + } } } return k8s_executor_config if at_k8s else None From dc6b672c11a7a5eab469012ebda3f1a1a3ca06b7 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 01:06:25 -0500 Subject: [PATCH 040/332] baseline testing volumes --- tranql_translate_k8s.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 555e9f45..47ff7e03 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -59,7 +59,7 @@ def get_executor_config (annotations=None): "pod_override": { "containers": [ { - "env": [{"name": "XX", "value": "test"}] + "spec": {"env": [{"name": "XX", "value": "test"}]} } ] } From baeb05de2affc3028a409f7347b572de29dddd4d Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 01:11:31 -0500 Subject: [PATCH 041/332] baseline testing volumes --- tranql_translate_k8s.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 47ff7e03..3ef30664 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -32,7 +32,7 @@ """ Configure use of KubernetesExecutor. """ at_k8s=True - + def get_executor_config (annotations=None): """ Get an executor configuration. :param annotations: Annotations to attach to the executor. @@ -55,14 +55,14 @@ def get_executor_config (annotations=None): "name": "airflow-dags", "mountPath": "/opt/airflow/dags" } - ], - "pod_override": { - "containers": [ - { - "spec": {"env": [{"name": "XX", "value": "test"}]} - } - ] - } + ] + }, + "pod_override": { + "containers": [ + { + "spec": {"env": [{"name": "XX", "value": "test"}]} + } + ] } } return k8s_executor_config if at_k8s else None From 0a1d17977cf16abfac0fa5d5f3febb458e49272b Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 01:29:21 -0500 Subject: [PATCH 042/332] baseline testing volumes --- tranql_translate_k8s.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py index 3ef30664..575d9420 100644 --- a/tranql_translate_k8s.py +++ b/tranql_translate_k8s.py @@ -56,13 +56,6 @@ def get_executor_config (annotations=None): "mountPath": "/opt/airflow/dags" } ] - }, - "pod_override": { - "containers": [ - { - "spec": {"env": [{"name": "XX", "value": "test"}]} - } - ] } } return k8s_executor_config if at_k8s else None From 437c76672d4e5abdf375c749294150c87dd339bf Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Wed, 13 Jan 2021 08:42:37 -0500 Subject: [PATCH 043/332] data volume mount pod override. --- docker/airflow/Dockerfile | 3 +- tranql_translate.py | 34 ++++++++--- tranql_translate_k8s.py | 118 -------------------------------------- 3 files changed, 27 insertions(+), 128 deletions(-) delete mode 100644 tranql_translate_k8s.py diff --git a/docker/airflow/Dockerfile b/docker/airflow/Dockerfile index ed7d953d..8a98beb4 100644 --- a/docker/airflow/Dockerfile +++ b/docker/airflow/Dockerfile @@ -1,4 +1,5 @@ -FROM apache/airflow:1.10.12-python3.8 +FROM apache/airflow:1.10.14-python3.8 +#https://github.com/apache/airflow/blob/4aba9c5a8b89d2827683fb4c84ac481c89ebc2b3/CHANGELOG.txt#L2065 USER root RUN apt-get update && apt-get install -y git USER airflow diff --git a/tranql_translate.py b/tranql_translate.py index 9989fde2..d3ebb996 100644 --- a/tranql_translate.py +++ b/tranql_translate.py @@ -8,7 +8,6 @@ import os import subprocess from airflow.operators.bash_operator import BashOperator -from airflow.contrib.example_dags.libs.helper import print_stuff from airflow.models import DAG from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago @@ -30,16 +29,28 @@ """ Configure use of KubernetesExecutor. """ at_k8s=True - - def get_executor_config (annotations=None): + + def get_executor_config (annotations=None, data_path='/opt/roger/data'): """ Get an executor configuration. :param annotations: Annotations to attach to the executor. :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. """ k8s_executor_config = { "KubernetesExecutor": { - "annotations": annotations, - "image": "renciorg/roger-executor:latest" + "volumes": [ + { + "name": "roger-data", + "persistentVolumeClaim": { + "claimName": "roger-data-pvc" + } + } + ], + "volume_mounts": [ + { + "mountPath": data_path, + "name": "roger-data", + } + ] } } return k8s_executor_config if at_k8s else None @@ -56,6 +67,7 @@ def task_wrapper(python_callable, **kwargs): dag_conf = {} logger = get_logger() config = get_config() + config.update({'data_root': kwargs.get('data_path')}) if dag_run: dag_conf = dag_run.conf # remove this since to send every other argument to the python callable. @@ -71,22 +83,26 @@ def create_python_task (name, a_callable): :param name: The name of the task. :param a_callable: The code to run in this task. """ + data_path = '/opt/roger/data' return PythonOperator( task_id=name, python_callable=task_wrapper, op_kwargs={ "python_callable": a_callable, - "to_string": True + "to_string": True, + "data_path": data_path, }, executor_config=get_executor_config (annotations={ - "task_name" : name - }), + "task_name": name + }, data_path=data_path), dag=dag, provide_context=True ) """ Build the workflow tasks. """ - intro = BashOperator(task_id='Intro', bash_command='echo running tranql translator && exit 0') + intro = BashOperator(task_id='Intro', + bash_command='echo running tranql translator && exit 0', + executor_config= get_executor_config()) get_kgx = create_python_task ("GetSource", RogerUtil.get_kgx) create_schema = create_python_task ("CreateSchema", RogerUtil.create_schema) merge_nodes = create_python_task ("MergeNodes", RogerUtil.merge_nodes) diff --git a/tranql_translate_k8s.py b/tranql_translate_k8s.py deleted file mode 100644 index 575d9420..00000000 --- a/tranql_translate_k8s.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# - -""" -An Airflow workflow for the Roger Translator KGX data pipeline. -""" - -import os -import subprocess -from airflow.operators.bash_operator import BashOperator -from airflow.contrib.example_dags.libs.helper import print_stuff -from airflow.models import DAG -from airflow.operators.python_operator import PythonOperator -from airflow.utils.dates import days_ago -from roger.core import RogerUtil -from roger.Config import get_default_config as get_config -from roger.roger_util import get_logger -from kubernetes.client import models as k8s - - -default_args = { - 'owner': 'RENCI', - 'start_date': days_ago(1) -} - -""" Build the workflow's tasks and DAG. """ -with DAG( - dag_id='tranql_translate_k8s_pod_overide_test', - default_args=default_args, - schedule_interval=None -) as dag: - - """ Configure use of KubernetesExecutor. """ - at_k8s=True - - def get_executor_config (annotations=None): - """ Get an executor configuration. - :param annotations: Annotations to attach to the executor. - :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. - """ - k8s_executor_config = { - "KubernetesExecutor": { - "volumes": [ - { - "name": "test-data", - "emptyDir": {}, - }, - ], - "volume_mounts": [ - { - "mountPath": "/opt/test", - "name": "test-data", - }, - { - "name": "airflow-dags", - "mountPath": "/opt/airflow/dags" - } - ] - } - } - return k8s_executor_config if at_k8s else None - - def task_wrapper(python_callable, **kwargs): - """ - Overrides configuration with config from airflow. - :param python_callable: - :param kwargs: - :return: - """ - # get dag config provided - dag_run = kwargs.get('dag_run') - dag_conf = {} - logger = get_logger() - config = get_config() - if dag_run: - dag_conf = dag_run.conf - # remove this since to send every other argument to the python callable. - del kwargs['dag_run'] - # overrides values - config.update(dag_conf) - logger.info("Config") - logger.info(config) - return python_callable(to_string=True, config=config) - - def create_python_task (name, a_callable): - """ Create a python task. - :param name: The name of the task. - :param a_callable: The code to run in this task. - """ - return PythonOperator( - task_id=name, - python_callable=task_wrapper, - op_kwargs={ - "python_callable": a_callable, - "to_string": True - }, - executor_config=get_executor_config (annotations={ - "task_name" : name - }), - dag=dag, - provide_context=True - ) - - """ Build the workflow tasks. """ - intro = BashOperator(task_id='Intro', - bash_command='echo running tranql translator && exit 0', - executor_config= get_executor_config()) - get_kgx = create_python_task ("GetSource", RogerUtil.get_kgx) - create_schema = create_python_task ("CreateSchema", RogerUtil.create_schema) - merge_nodes = create_python_task ("MergeNodes", RogerUtil.merge_nodes) - create_bulk_load = create_python_task ("CreateBulkLoad", RogerUtil.create_bulk_load) - bulk_load = create_python_task ("BulkLoad", RogerUtil.bulk_load) - validate = create_python_task ("Validate", RogerUtil.validate) - finish = BashOperator (task_id='Finish', bash_command='echo finish') - - """ Build the DAG. """ - intro >> get_kgx >> [ create_schema, merge_nodes ] >> create_bulk_load >> \ - bulk_load >> validate >> finish From 006c480de96f155988ee505dcebe6c8184fc4305 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Wed, 13 Jan 2021 09:44:09 -0500 Subject: [PATCH 044/332] data root to be resolved on method call, having it global causes issues. --- roger/core.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/roger/core.py b/roger/core.py index 7d4edc3e..6497e6b1 100644 --- a/roger/core.py +++ b/roger/core.py @@ -22,8 +22,7 @@ log = get_logger () config = get_config () -data_root = config['data_root'] - + class SchemaType(Enum): """ High level semantic metatdata concepts. Categories are classes in an ontological model like Biolink. @@ -133,6 +132,7 @@ def write_object (obj, path, key=None): def kgx_path (name): """ Form a KGX object path. :path name: Name of the KGX object. """ + data_root = get_config()['data_root'] return os.path.join (data_root, "kgx", name) @staticmethod @@ -145,6 +145,7 @@ def kgx_objects (): def merge_path (name): """ Form a merged KGX object path. :path name: Name of the merged KGX object. """ + data_root = get_config()['data_root'] return os.path.join (data_root, "merge", name) @staticmethod @@ -157,12 +158,14 @@ def merged_objects (): def schema_path (name): """ Path to a schema object. :param name: Name of the object to get a path for. """ + data_root = get_config()['data_root'] return os.path.join (data_root, "schema", name) @staticmethod def bulk_path (name): """ Path to a bulk load object. :param name: Name of the object. """ + data_root = get_config()['data_root'] return os.path.join (data_root, "bulk", name) @staticmethod From 86568890be06e97c8fff1aeda03a504faace64c4 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Wed, 13 Jan 2021 11:59:06 -0500 Subject: [PATCH 045/332] airflow k8s custom values and bash script minor edits for namespace and release env var usage. --- bin/airk8s | 17 ++++++++++++----- bin/custom-values.yaml | 18 ++++++++++++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/bin/airk8s b/bin/airk8s index b75cbf25..4c6f8fc8 100755 --- a/bin/airk8s +++ b/bin/airk8s @@ -3,28 +3,35 @@ set -x set -e -namespace=${NAMESPACE:-scox} +namespace=${NAMESPACE:-default} version=v7.15.0 +release=${RELEASE:-airflow} init () { helm repo add airflow-stable https://airflow-helm.github.io/charts helm repo update } start () { - helm install "airflow" airflow-stable/airflow \ + helm install $release airflow-stable/airflow \ + --version "$version" \ + --namespace "$namespace" \ + --values ./custom-values.yaml +} +update () { + helm upgrade --install $release airflow-stable/airflow \ --version "$version" \ --namespace "$namespace" \ --values ./custom-values.yaml } status () { - helm status "airflow" --namespace $namespace + helm status $release --namespace $namespace echo Scheduler: kubectl -n $namespace logs $(kubectl get pods | grep airflow-scheduler | awk '{ print $1 }') -c git-sync echo Worker: kubectl -n $namespace logs $(kubectl get pods | grep airflow-worker | awk '{ print $1 }') -c git-sync } stop () { - helm delete "airflow" --namespace $namespace + helm delete $release --namespace $namespace } connect () { kubectl exec -it \ @@ -34,7 +41,7 @@ connect () { /bin/bash } web () { - export NODE_PORT=$(kubectl get --namespace $namespace -o jsonpath="{.spec.ports[0].nodePort}" services airflow-web) + export NODE_PORT=$(kubectl get --namespace $namespace -o jsonpath="{.spec.ports[0].nodePort}" services ${release}-web) export NODE_IP=$(kubectl get nodes --namespace $namespace -o jsonpath="{.items[0].status.addresses[0].address}") echo http://$NODE_IP:$NODE_PORT/ export AIRFLOW_UI=http://$NODE_IP:$NODE_PORT/ diff --git a/bin/custom-values.yaml b/bin/custom-values.yaml index 5e45669d..84fbf2fb 100644 --- a/bin/custom-values.yaml +++ b/bin/custom-values.yaml @@ -13,10 +13,10 @@ airflow: ## the airflow executor type to use ## image: - repository: renciorg/apache-airflow-1.10.12-python-3.8-git + repository: renciorg/apache-airflow-1.10.14-python-3.8-git tag: latest - executor: CeleryExecutor -# executor: KubernetesExecutor +# executor: CeleryExecutor + executor: KubernetesExecutor ## the fernet key used to encrypt the connections in the database ## @@ -30,7 +30,17 @@ airflow: AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" AIRFLOW__WEBSERVER__RBAC: "False" - + AIRFLOW__KUBERNETES__GIT_REPO: "ssh://git@github.com/helxplatform/roger.git" + # https://airflow.apache.org/docs/apache-airflow/1.10.12/configurations-ref.html#git-ssh-key-secret-name + AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME: "airflow-secrets" + # @TODO change this to master. + AIRFLOW__KUBERNETES__GIT_BRANCH: "develop" + AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT: "/opt/airflow/dags" + AIRFLOW__KUBERNETES__GIT_SYNC_DEST: "roger" + AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH: "roger" + AIRFLOW__KUBERNETES__DELETE_WORKER_PODS: "TRUE" + AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: "renciorg/roger-executor" + AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: "0.26" # DAGS AIRFLOW__CORE__LOAD_EXAMPLES: "False" From 88579c7926b9ffdc437dccdca2e4e2bdf5f5c10f Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Wed, 13 Jan 2021 11:59:48 -0500 Subject: [PATCH 046/332] Docker file for Roger-executor. --- docker/airflow/roger-executor/Dockerfile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 docker/airflow/roger-executor/Dockerfile diff --git a/docker/airflow/roger-executor/Dockerfile b/docker/airflow/roger-executor/Dockerfile new file mode 100644 index 00000000..92c325c4 --- /dev/null +++ b/docker/airflow/roger-executor/Dockerfile @@ -0,0 +1,16 @@ +FROM apache/airflow:1.10.12-python3.8 +USER root +RUN apt-get update && apt-get install -y git gcc python3-dev +USER airflow +RUN mkdir -p /home/airflow/git/ +WORKDIR /home/airflow/git +RUN git clone https://github.com/helxplatform/roger.git --single-branch --branch redis-helm /home/airflow/git/roger +USER root +RUN pip install -r /home/airflow/git/roger/requirements.txt +USER airflow +ENV PYTHONPATH=/home/airflow/git/roger/ + + + + + From aad82a0403cce74333f41520dfe4dbc42e809892 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 15:41:03 -0500 Subject: [PATCH 047/332] adds docs about bulk create and schema steps --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index 0e7038aa..350375d6 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,32 @@ Fetches KGX files according to a data version selecting the set of files to use. Merges nodes duplicated across files aggregating properties from all nodes ### Schema Identify and record the schema (properties) of every edge and node type. +Schema records the type resolved for each property of a node/edge. The **Schema** step generates category +schema file for node schema and predicate schema for edges. In these files properties are collected and +scoped based on type of the edges and nodes found. For instances where properties do not have consistent data +type across a given scope, the following rule is used to resolve to final data type: + +* If the property has fluctuating type among a boolean, a float or an Integer in the same scope, +it's final data type would be a string. +* If conflicting property is ever a string but never a list in the scope, it's final data type will be string. +* If conflicting property is ever a list , it's final data type will be a list. + +Using this approach attributes will be casted based on the resolution set here when loading to the graph database +in subsequent steps. ### Bulk Create Create bulk load CSV files conforming to the Redisgraph Bulk Loader's requirements. +**Bulk create** uses the Schema generated in **Schema** step to generate csv headers +([redis csv headers](https://github.com/RedisGraph/redisgraph-bulk-loader#input-schemas)) with +the assumed types . Currently redis bulk loader requires every column to have a value. +To address this issue, this step groups the entities being processed (edges/nodes) +based on attributes that have values. Then these groups are written into separate csv files. Nodes +are written as csv(s) under `/bulk/nodes` for nodes and `/bulk/edges`. +Each csv with these folders has the following naming convention +`.csv--`. + +When populating the CSV with values, the appropriate casting is done on the properties to normalize +them to the data types defined in the **Schema** step. + ### Bulk Load Use the bulk loader to load Redisgraph logging statistics on each type of loaded object. ### Validate From e9fe119350f75e15763477e8a1a9ab8649fe3936 Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Wed, 13 Jan 2021 15:44:20 -0500 Subject: [PATCH 048/332] adds docs about bulk create and schema steps --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 350375d6..0983a18d 100644 --- a/README.md +++ b/README.md @@ -78,10 +78,9 @@ Create bulk load CSV files conforming to the Redisgraph Bulk Loader's requiremen the assumed types . Currently redis bulk loader requires every column to have a value. To address this issue, this step groups the entities being processed (edges/nodes) based on attributes that have values. Then these groups are written into separate csv files. Nodes -are written as csv(s) under `/bulk/nodes` for nodes and `/bulk/edges`. +are written as csv(s) under `/bulk/nodes` and edges under `/bulk/edges`. Each csv with these folders has the following naming convention `.csv--`. - When populating the CSV with values, the appropriate casting is done on the properties to normalize them to the data types defined in the **Schema** step. From 6b21a6ac2642f33edd2b73f3bd27fe42f562bd88 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Wed, 13 Jan 2021 16:57:46 -0500 Subject: [PATCH 049/332] readme updates on k8s install --- README.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0983a18d..b40bd6b8 100644 --- a/README.md +++ b/README.md @@ -520,7 +520,90 @@ python tranql_translator.py The Airflow interface shows the workflow: ![image](https://user-images.githubusercontent.com/306971/97787955-b968f680-1b8b-11eb-86cc-4d93842eafd3.png) -Use the Trigger icon to run the workflow immediatley. +Use the Trigger icon to run the workflow immediately. + + +### Running Roger in Kubernetes + +#### 1. Setup Airflow + Roger supports installing airflow on kubernetes via [Helm](helm.sh). + + Create a pvc(roger-data-pvc) for storing roger Data with the following definition. + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: roger-data-pvc +spec: + storageClassName: + accessModes: + - ReadWriteMany + resources: + requests: + storage: +``` + +Then run : + +```shell script +kubectl -n create -f pvc.yaml +``` + +Navigate to `roger/bin` dir, and init airflow (adds [airflow helm repo](https://airflow-helm.github.io/charts)) +```shell script +cd bin/ +export NAMESPACE= +export RELEASE_NAME= +./airk8s init +``` + +Start airflow + +```shell script +./airk8s start +``` + +To get to airflow web interface +```shell script +./airk8s web +``` + +#### 2. Setup Redis + +Initialize Redis bitnami [helm chart](https://github.com/bitnami/charts/tree/master/bitnami/redis#redis). + +```shell script +export NAMESPACE= +export RELEASE=redisgraph +export REDIS_IMAGE=redislabs/redisgraph +export REDIS_IMAGE_TAG=edge +export CLUSTER_DOMAIN=cluster.local +export REDIS_WORKER_COUNT=1 + +./roger init +``` + +Start redis cluster + +```shell script +./roger start +``` + +#### 3. Run Roger + +The Airflow interface shows the workflow: +![image](https://user-images.githubusercontent.com/45075777/104513185-403f4400-55bd-11eb-9142-cbfd7879504b.png) + +Press Trigger to get to the following page: +![image](https://user-images.githubusercontent.com/45075777/104513451-b04dca00-55bd-11eb-837c-65d20d697fff.png) + +Enter the configuration parameters to get to Redis cluster installed in step 2: +```json +{"redisgraph": {"host": "", "port": 6379 , "graph" : "graph-name" }} +``` +And run work flow. + From b443e51546fd67b38968371d1d56d5bda3469c2c Mon Sep 17 00:00:00 2001 From: yaphetkg Date: Tue, 9 Feb 2021 10:44:54 -0500 Subject: [PATCH 050/332] helm chart initial commit --- helm/roger/.gitignore | 1 + helm/roger/.helmignore | 23 ++ helm/roger/Chart.lock | 9 + helm/roger/Chart.yaml | 32 +++ helm/roger/Readme.md | 6 + helm/roger/templates/NOTES.txt | 1 + helm/roger/templates/_helpers.tpl | 51 ++++ helm/roger/templates/tranql-config-map.yaml | 23 ++ helm/roger/templates/tranql_deployment.yaml | 57 +++++ helm/roger/templates/tranql_service.yaml | 16 ++ helm/roger/values.yaml | 245 ++++++++++++++++++++ 11 files changed, 464 insertions(+) create mode 100644 helm/roger/.gitignore create mode 100644 helm/roger/.helmignore create mode 100644 helm/roger/Chart.lock create mode 100644 helm/roger/Chart.yaml create mode 100644 helm/roger/Readme.md create mode 100644 helm/roger/templates/NOTES.txt create mode 100644 helm/roger/templates/_helpers.tpl create mode 100644 helm/roger/templates/tranql-config-map.yaml create mode 100644 helm/roger/templates/tranql_deployment.yaml create mode 100644 helm/roger/templates/tranql_service.yaml create mode 100644 helm/roger/values.yaml diff --git a/helm/roger/.gitignore b/helm/roger/.gitignore new file mode 100644 index 00000000..711a39c5 --- /dev/null +++ b/helm/roger/.gitignore @@ -0,0 +1 @@ +charts/ \ No newline at end of file diff --git a/helm/roger/.helmignore b/helm/roger/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/helm/roger/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm/roger/Chart.lock b/helm/roger/Chart.lock new file mode 100644 index 00000000..a7002164 --- /dev/null +++ b/helm/roger/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: airflow + repository: https://airflow-helm.github.io/charts + version: 7.15.0 +- name: redis + repository: https://charts.bitnami.com/bitnami + version: 12.7.4 +digest: sha256:46d75a6fdcd58a5a57baf09a75918ec3264bfca30b7c065fb4e4f517980168df +generated: "2021-02-08T12:17:50.2207903-05:00" diff --git a/helm/roger/Chart.yaml b/helm/roger/Chart.yaml new file mode 100644 index 00000000..925ca14d --- /dev/null +++ b/helm/roger/Chart.yaml @@ -0,0 +1,32 @@ +apiVersion: v2 +name: roger +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 + +# Dependencies that roger requires +dependencies: + - name: airflow + version: "v7.15.0" + repository: "https://airflow-helm.github.io/charts" + - name: redis + version: "12.7.4" + repository: "https://charts.bitnami.com/bitnami" diff --git a/helm/roger/Readme.md b/helm/roger/Readme.md new file mode 100644 index 00000000..3eea1bcf --- /dev/null +++ b/helm/roger/Readme.md @@ -0,0 +1,6 @@ +Roger helm chart +---- + +### Introduction + +This helm chart installs \ No newline at end of file diff --git a/helm/roger/templates/NOTES.txt b/helm/roger/templates/NOTES.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/helm/roger/templates/NOTES.txt @@ -0,0 +1 @@ + diff --git a/helm/roger/templates/_helpers.tpl b/helm/roger/templates/_helpers.tpl new file mode 100644 index 00000000..070e2960 --- /dev/null +++ b/helm/roger/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "roger.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "roger.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "roger.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "roger.labels" -}} +helm.sh/chart: {{ include "roger.chart" . }} +{{ include "roger.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "roger.selectorLabels" -}} +app.kubernetes.io/name: {{ include "roger.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} \ No newline at end of file diff --git a/helm/roger/templates/tranql-config-map.yaml b/helm/roger/templates/tranql-config-map.yaml new file mode 100644 index 00000000..fe01d30d --- /dev/null +++ b/helm/roger/templates/tranql-config-map.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "roger.fullname" . }}-tranql-configmap + labels: + {{- include "roger.labels" . | nindent 4 }} +data: + conf.yml: | + m: z + schema.yml: | + doc: | + Schema configuration for tranql + schema: + redis: + doc: | + Roger is a knowledge graph built by aggregeting several kgx formatted knowledge graphs from several sources. + url: "redis:" + redis: true + redis_connection_params: + host: {{ .Release.Name }}-redis-master + port: 6379 + # SET USERNAME and PASSWORD + # via ROGER_USERNAME , ROGER_PASSWORD Env vars (i.e capitialize service name) diff --git a/helm/roger/templates/tranql_deployment.yaml b/helm/roger/templates/tranql_deployment.yaml new file mode 100644 index 00000000..8c51637e --- /dev/null +++ b/helm/roger/templates/tranql_deployment.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "roger.fullname" . }}-tranql-frontend + labels: + {{- include "roger.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.tranql.replicas }} + selector: + matchLabels: + {{- include "roger.selectorLabels" . | nindent 6 }} + service-type: web-server + template: + metadata: + labels: + {{- include "roger.selectorLabels" . | nindent 8 }} + service-type: web-server + spec: + volumes: + - name: config-yaml + configMap: + name: {{ include "roger.fullname" . }}-tranql-configmap + defaultMode: 0777 + - name: logs + emptyDir: {} + containers: + - name: {{ include "roger.fullname" . }}-tranql-web-container + image: {{ .Values.tranql.image }}:{{ .Values.tranql.imageTag }} + command: + - "/usr/local/bin/gunicorn" + - "--workers={{ .Values.tranql.gunicorn.workerCount }}" + - "--bind=0.0.0.0:{{ .Values.tranql.port }}" + - "--timeout={{ .Values.tranql.gunicorn.workerTimeout }}" + - "--access-logfile=$(ACCESS_LOG)" + - "--error-logfile=$(ERROR_LOG)" + - "--log-level=debug" + - "tranql.api:app" + ports: + - containerPort: {{ .Values.tranql.port }} + name: http + env: + - name: APP_PORT + value: {{ .Values.tranql.webPort | quote }} + - name: ACCESS_LOG + value: "/var/nfs/tranql-frontend_access_log" + - name: ERROR_LOG + value: "/var/nfs/tranql-frontend_error_log" + volumeMounts: + - name: logs + mountPath: /var/nfs + - name: config-yaml + subPath: conf.yml + mountPath: /tranql/tranql/conf.yml + - name: config-yaml + subPath: schema.yml + mountPath: /tranql/tranql/conf/schema.yaml + restartPolicy: Always \ No newline at end of file diff --git a/helm/roger/templates/tranql_service.yaml b/helm/roger/templates/tranql_service.yaml new file mode 100644 index 00000000..74842234 --- /dev/null +++ b/helm/roger/templates/tranql_service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "roger.fullname" . }}-tranql-service + labels: + {{- include "roger.labels" . | nindent 4 }} +spec: + type: {{ .Values.tranql.service.type }} + ports: + - port: {{ .Values.tranql.port }} + targetPort: {{ .Values.tranql.port }} + protocol: TCP + name: http + selector: + {{- include "roger.selectorLabels" . | nindent 4 }} + service-type: web-server \ No newline at end of file diff --git a/helm/roger/values.yaml b/helm/roger/values.yaml new file mode 100644 index 00000000..ba373ed6 --- /dev/null +++ b/helm/roger/values.yaml @@ -0,0 +1,245 @@ +redis: + image: + repository: redislabs/redisgraph + tag: 2.2.13 + redis: + command: "redis-server" + clusterDomain: "blackbalsam-cluster" + cluster: + slaveCount: 1 + usePassword: false + master: + command: "" + readinessProbe: + enabled: false + livenessProbe: + enabled: false + extraFlags: + - "--loadmodule /usr/lib/redis/modules/redisgraph.so" + slave: + command: "" + readinessProbe: + enabled: false + livenessProbe: + enabled: false + extraFlags: + - "--loadmodule /usr/lib/redis/modules/redisgraph.so" + + +airflow: + # + # NOTE: + # - This is intended to be a `custom-values.yaml` starting point for non-production deployment (like minikube) + + # External Dependencies: + # - A PUBLIC git repo for DAGs: ssh://git@repo.example.com:my-airflow-dags.git + # + + ################################### + # Airflow - Common Configs + ################################### + airflow: + ## the airflow executor type to use + ## + image: + repository: renciorg/apache-airflow-1.10.14-python-3.8-git + tag: latest + # executor: CeleryExecutor + executor: KubernetesExecutor + + ## the fernet key used to encrypt the connections in the database + ## + fernetKey: "7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=" + + ## environment variables for the web/scheduler/worker Pods (for airflow configs) + ## + config: + # Security + AIRFLOW__CORE__SECURE_MODE: "True" + AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" + AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" + AIRFLOW__WEBSERVER__RBAC: "False" + AIRFLOW__KUBERNETES__GIT_REPO: "ssh://git@github.com/helxplatform/roger.git" + # https://airflow.apache.org/docs/apache-airflow/1.10.12/configurations-ref.html#git-ssh-key-secret-name + AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME: "airflow-secrets" + # @TODO change this to master. + AIRFLOW__KUBERNETES__GIT_BRANCH: "develop" + AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT: "/opt/airflow/dags" + AIRFLOW__KUBERNETES__GIT_SYNC_DEST: "roger" + AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH: "roger" + AIRFLOW__KUBERNETES__DELETE_WORKER_PODS: "FALSE" + AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: "renciorg/roger-executor" + AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: "0.26" + # DAGS + AIRFLOW__CORE__LOAD_EXAMPLES: "False" + + ## Disable noisy "Handling signal: ttou" Gunicorn log messages + GUNICORN_CMD_ARGS: "--log-level WARNING" + + extraPipPackages: + - Babel==2.8.0 + - biolink-model==1.2.5 + - biolinkml==1.5.8 + - redisgraph==2.1.5 + - git+https://github.com/RedisGraph/redisgraph-bulk-loader.git + - flatten-dict + - git+https://github.com/stevencox/kgx.git + ################################### + # Volumes + ################################### + + ## EXAMPLE: + ## extraVolumeMounts: + ## - name: synchronised-dags + ## mountPath: /opt/airflow/dags + ## + extraVolumeMounts: + - name: roger-data + mountPath: /dags/roger/data + + ## extra volumes for the web/scheduler/worker Pods + ## + ## EXAMPLE: + ## extraVolumes: + ## - name: synchronised-dags + ## emptyDir: {} + ## + extraVolumes: + - name: roger-data + emptyDir: {} + + ################################### + # Airflow - Scheduler Configs + ################################### + scheduler: + + ## custom airflow connections for the airflow scheduler + ## + # connections: + # - id: my_aws + # type: aws + # extra: | + # { + # "aws_access_key_id": "XXXXXXXXXXXXXXXXXXX", + # "aws_secret_access_key": "XXXXXXXXXXXXXXX", + # "region_name":"eu-central-1" + # } + + ## custom airflow variables for the airflow scheduler + ## + variables: | + { "environment": "dev" } + + ## custom airflow pools for the airflow scheduler + ## + pools: | + { + "example": { + "description": "This is an example pool with 2 slots.", + "slots": 2 + } + } + + ################################### + # Airflow - WebUI Configs + ################################### + web: + ## configs for the Service of the web Pods + ## + service: + type: NodePort + + ################################### + # Airflow - Worker Configs + ################################### + workers: + ## the number of workers Pods to run + ## + replicas: 1 + + ################################### + # Airflow - DAGs Configs + ################################### + dags: + ## configs for the DAG git repository & sync container + ## + git: + ## url of the git repository + ## + #url: "ssh://git@repo.example.com/my-airflow-dags.git" + #url: "ssh://git@github.com/stevencox/airflow.git" + url: "ssh://git@github.com/helxplatform/roger.git" + + ## the branch/tag/sha1 which we clone + ## + ref: redis-helm + + ## the name of a pre-created secret containing files for ~/.ssh/ + ## + ## NOTE: + ## - this is ONLY RELEVANT for SSH git repos + ## - the secret commonly includes files: id_rsa, id_rsa.pub, known_hosts + ## - known_hosts is NOT NEEDED if `git.sshKeyscan` is true + ## + secret: airflow-git-keys + + ## the name of the private key file in your `git.secret` + ## + ## NOTE: + ## - this is ONLY RELEVANT for PRIVATE SSH git repos + ## + privateKeyName: id_rsa + + ## the host name of the git repo + ## + ## NOTE: + ## - this is ONLY REQUIRED for SSH git repos + ## + ## EXAMPLE: + ## repoHost: "github.com" + ## + repoHost: "github.com" + + ## the port of the git repo + ## + ## NOTE: + ## - this is ONLY REQUIRED for SSH git repos + ## + repoPort: 22 + + ## configs for the git-sync container + ## + gitSync: + ## enable the git-sync sidecar container + ## + enabled: true + + ## the git sync interval in seconds + ## + refreshTime: 60 + installRequirments: true + + ################################### + # Database - PostgreSQL Chart + ################################### + postgresql: + enabled: true + + ################################### + # Database - Redis Chart + ################################### + redis: + enabled: false + + +tranql: + image: renciorg/tranql-app + imageTag: develop-test + replicas: 1 + port: 8081 + gunicorn: + workerCount: 4 + workerTimeout: 300 + service: + type: ClusterIP + From 593b4deed8361b401829c09d2022ca78dc197976 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Tue, 9 Feb 2021 16:44:27 -0500 Subject: [PATCH 051/332] Readme update --- helm/roger/Readme.md | 119 ++++++++++++++++++++++++++++++++++++++++- helm/roger/values.yaml | 62 +++------------------ 2 files changed, 125 insertions(+), 56 deletions(-) diff --git a/helm/roger/Readme.md b/helm/roger/Readme.md index 3eea1bcf..b746b1a2 100644 --- a/helm/roger/Readme.md +++ b/helm/roger/Readme.md @@ -3,4 +3,121 @@ Roger helm chart ### Introduction -This helm chart installs \ No newline at end of file +This chart can be used to run Roger (graph curation pipeline) interfaced with + [tranql](https://github.com/NCATS-Tangerine/tranql) used as a query engine. + + +![image](https://user-images.githubusercontent.com/45075777/107399084-54983300-6ace-11eb-929d-0d8113405cce.png) + +This chart has Airflow and Redis added as dependencies. + +### Parameters + +#### Tranql + +| Parameter | Description | Default | +| --------- | ---- | ---- | +| `tranql.image` | Docker image | `renciorg/tranql-app` +| `tranql.imageTag` | Docker image tag | `develop-test` +| `tranql.replicas` | Web server replicas | `1` +| `tranql.port` | Web server port | `8081` +| `tranql.gunicorn.workerCount` | Gunicorn worker thread counts | `4` +| `tranql.gunicorn.workerTimeout` | Gunicorn worker timeout | `300` +| `tranql.service.type` | Tranql service | `ClusterIP` + + +#### Airflow + +For more details on these defaults and additional customization, +please refer to [Airflow helm docs](https://github.com/helm/charts/tree/master/stable/airflow). + +> **Note**: We use a custom build airflow image(`renciorg/apache-airflow-1.10.14-python-3.8-git`) to support pip installs form git. + + +| Parameter | Default | +| --------- | ---- | +| `airflow.airflow.image.repository` | `renciorg/apache-airflow-1.10.14-python-3.8-git` +| `airflow.airflow.image.tag` | `latest` +| `airflow.airflow.executor` | `KubernetesExecutor` +| `airflow.airflow.fernetKey` | `7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=` +| `airflow.airflow.config.AIRFLOW__CORE__SECURE_MODE` | `True` +| `airflow.airflow.config.AIRFLOW__API__AUTH_BACKEND` | `airflow.api.auth.backend.deny_all` +| `airflow.airflow.config.AIRFLOW__WEBSERVER__EXPOSE_CONFIG` | `False` +| `airflow.airflow.config.AIRFLOW__WEBSERVER__RBAC` | `False` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_REPO` | `ssh://git@github.com/helxplatform/roger.git` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` | `airflow-secrets` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_BRANCH` | `develop` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT` | `/opt/airflow/dags` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SYNC_DEST` | `roger` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH` | `roger` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__DELETE_WORKER_PODS` | `FALSE` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY` | `renciorg/roger-executor` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG` | `0.26` +| `airflow.airflow.config.AIRFLOW__CORE__LOAD_EXAMPLES` | `False` +| `airflow.airflow.config.GUNICORN_CMD_ARGS` | `--log-level WARNING` +| `airflow.airflow.extraPipPackages` | `['Babel==2.8.0', 'biolink-model==1.2.5', 'biolinkml==1.5.8', 'redisgraph==2.1.5', 'git+https://github.com/RedisGraph/redisgraph-bulk-loader.git', 'flatten-dict', 'git+https://github.com/stevencox/kgx.git']` +| `airflow.airflow.extraVolumeMounts` | `[{'name': 'roger-data', 'mountPath': '/dags/roger/data'}]` +| `airflow.airflow.extraVolumes` | `[{'name': 'roger-data', 'emptyDir': {}}]` +| `airflow.web.service.type` | `ClusterIP` +| `airflow.workers.replicas` | `1` +| `airflow.dags.git.url` | `ssh://git@github.com/helxplatform/roger.git` +| `airflow.dags.git.ref` | `redis-helm` +| `airflow.dags.git.secret` | `airflow-git-keys` +| `airflow.dags.git.privateKeyName` | `id_rsa` +| `airflow.dags.git.repoHost` | `github.com` +| `airflow.dags.git.repoPort` | `22` +| `airflow.dags.git.gitSync.enabled` | `True` +| `airflow.dags.git.gitSync.refreshTime` | `60` +| `airflow.dags.installRequirments` | `True` +| `airflow.postgresql.enabled` | `True` +| `airflow.redis.enabled` | `False` + + +#### Redis + +For more details on these values and additional configuration options please +refer to this [Redis helm chart](https://github.com/bitnami/charts/tree/master/bitnami/redis). + +| Parameter | Default | +| --------- | ---- | +| `redis.image.repository` | `redislabs/redisgraph` +| `redis.image.tag` | `2.2.13` +| `redis.redis.command` | `redis-server` +| `redis.clusterDomain` | `cluster-domain` +| `redis.cluster.slaveCount` | `1` +| `redis.usePassword` | `False` +| `redis.master.command` | `nil` +| `redis.master.readinessProbe.enabled` | `False` +| `redis.master.livenessProbe.enabled` | `False` +| `redis.master.extraFlags` | `['--loadmodule /usr/lib/redis/modules/redisgraph.so']` +| `redis.slave.command` | `nil` +| `redis.slave.readinessProbe.enabled` | `False` +| `redis.slave.livenessProbe.enabled` | `False` +| `redis.slave.extraFlags` | `['--loadmodule /usr/lib/redis/modules/redisgraph.so']` + + +### Installation + +To install `my-release` to kubernetes. + +```shell script +$ helm install my-release . +``` + + +### Upgrading + +To upgrade `my-release` with an example change to `tranql.imageTag` value. + +```shell script +$ helm upgrade --set tranql.imageTag=0.33 my-release . +``` + + +### Uninstalling + +To remove `my-release` + +```shell script +$ helm uninstall my-release +``` \ No newline at end of file diff --git a/helm/roger/values.yaml b/helm/roger/values.yaml index ba373ed6..bbf814ad 100644 --- a/helm/roger/values.yaml +++ b/helm/roger/values.yaml @@ -84,61 +84,21 @@ airflow: - git+https://github.com/RedisGraph/redisgraph-bulk-loader.git - flatten-dict - git+https://github.com/stevencox/kgx.git - ################################### - # Volumes - ################################### - ## EXAMPLE: - ## extraVolumeMounts: - ## - name: synchronised-dags - ## mountPath: /opt/airflow/dags - ## + ################################### + # Volumes + ################################### extraVolumeMounts: - name: roger-data mountPath: /dags/roger/data - ## extra volumes for the web/scheduler/worker Pods - ## - ## EXAMPLE: - ## extraVolumes: - ## - name: synchronised-dags - ## emptyDir: {} - ## + ## extra volumes for the web/scheduler/worker Pods + extraVolumes: - name: roger-data emptyDir: {} - ################################### - # Airflow - Scheduler Configs - ################################### - scheduler: - - ## custom airflow connections for the airflow scheduler - ## - # connections: - # - id: my_aws - # type: aws - # extra: | - # { - # "aws_access_key_id": "XXXXXXXXXXXXXXXXXXX", - # "aws_secret_access_key": "XXXXXXXXXXXXXXX", - # "region_name":"eu-central-1" - # } - - ## custom airflow variables for the airflow scheduler - ## - variables: | - { "environment": "dev" } - ## custom airflow pools for the airflow scheduler - ## - pools: | - { - "example": { - "description": "This is an example pool with 2 slots.", - "slots": 2 - } - } ################################### # Airflow - WebUI Configs @@ -147,7 +107,7 @@ airflow: ## configs for the Service of the web Pods ## service: - type: NodePort + type: ClusterIP ################################### # Airflow - Worker Configs @@ -165,14 +125,11 @@ airflow: ## git: ## url of the git repository - ## - #url: "ssh://git@repo.example.com/my-airflow-dags.git" - #url: "ssh://git@github.com/stevencox/airflow.git" url: "ssh://git@github.com/helxplatform/roger.git" ## the branch/tag/sha1 which we clone ## - ref: redis-helm + ref: develop ## the name of a pre-created secret containing files for ~/.ssh/ ## @@ -201,21 +158,16 @@ airflow: repoHost: "github.com" ## the port of the git repo - ## ## NOTE: ## - this is ONLY REQUIRED for SSH git repos ## repoPort: 22 ## configs for the git-sync container - ## gitSync: ## enable the git-sync sidecar container - ## enabled: true - ## the git sync interval in seconds - ## refreshTime: 60 installRequirments: true From b3e7d2572974d49a3f84bde557d6d93b99adeadc Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Tue, 9 Feb 2021 16:47:07 -0500 Subject: [PATCH 052/332] Readme update --- helm/roger/Readme.md | 1 + 1 file changed, 1 insertion(+) diff --git a/helm/roger/Readme.md b/helm/roger/Readme.md index b746b1a2..076dd871 100644 --- a/helm/roger/Readme.md +++ b/helm/roger/Readme.md @@ -101,6 +101,7 @@ refer to this [Redis helm chart](https://github.com/bitnami/charts/tree/master/b To install `my-release` to kubernetes. ```shell script +$ helm dependency update . $ helm install my-release . ``` From d8efd8f26e8d0accde2fa8e86eeef20ec1f9b4bd Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Thu, 11 Feb 2021 14:57:39 -0500 Subject: [PATCH 053/332] Adding prerequisite secretes and volume --- helm/roger/Readme.md | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/helm/roger/Readme.md b/helm/roger/Readme.md index 076dd871..f7ad6409 100644 --- a/helm/roger/Readme.md +++ b/helm/roger/Readme.md @@ -11,6 +11,59 @@ This chart can be used to run Roger (graph curation pipeline) interfaced with This chart has Airflow and Redis added as dependencies. +### Pre-install Volumes and Secrets + +##### PVC + + +This installation requires a PVC `roger-data-pvc` to store Roger pipeline data. Here is a template +to create the PVC: +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: roger-data-pvc +spec: + storageClassName: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 20Gi +``` + +--- + +#### Secrets: + +There are two secrets for airflow required for Git syncronization. + +This is used by `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` + ```yaml + kind: Secret + apiVersion: v1 + metadata: + name: airflow-secrets + data: + gitSshKey: >- + ###### + type: Opaque + ``` + +This used by `airflow.dags.git.secret` + +```yaml +kind: Secret +apiVersion: v1 +metadata: + name: airflow-git-keys +data: + id_rsa: + id_rsa.pub: + known_hosts: +type: Opaque +``` + ### Parameters #### Tranql From 99b13e250b57e0216c29e76a6d32fdc56eac6f32 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Tue, 16 Feb 2021 10:59:21 -0500 Subject: [PATCH 054/332] Adding prerequisite secretes and volume to read me , some instructions for easier run of roger helm chart. --- README.md | 85 ++-- bin/airk8s | 5 + bin/roger | 35 +- helm/{roger => }/.gitignore | 0 helm/{roger => }/.helmignore | 0 helm/{roger => }/Chart.lock | 0 helm/{roger => }/Chart.yaml | 0 helm/{roger => }/Readme.md | 352 ++++++++-------- helm/roger/templates/NOTES.txt | 1 - helm/templates/NOTES.txt | 21 + helm/{roger => }/templates/_helpers.tpl | 0 .../templates/tranql-config-map.yaml | 46 +- .../templates/tranql_deployment.yaml | 112 ++--- .../{roger => }/templates/tranql_service.yaml | 30 +- helm/{roger => }/values.yaml | 394 +++++++++--------- 15 files changed, 558 insertions(+), 523 deletions(-) rename helm/{roger => }/.gitignore (100%) rename helm/{roger => }/.helmignore (100%) rename helm/{roger => }/Chart.lock (100%) rename helm/{roger => }/Chart.yaml (100%) rename helm/{roger => }/Readme.md (97%) delete mode 100644 helm/roger/templates/NOTES.txt create mode 100644 helm/templates/NOTES.txt rename helm/{roger => }/templates/_helpers.tpl (100%) rename helm/{roger => }/templates/tranql-config-map.yaml (96%) rename helm/{roger => }/templates/tranql_deployment.yaml (97%) rename helm/{roger => }/templates/tranql_service.yaml (96%) rename helm/{roger => }/values.yaml (96%) diff --git a/README.md b/README.md index b40bd6b8..aa4eb094 100644 --- a/README.md +++ b/README.md @@ -525,8 +525,11 @@ Use the Trigger icon to run the workflow immediately. ### Running Roger in Kubernetes -#### 1. Setup Airflow - Roger supports installing airflow on kubernetes via [Helm](helm.sh). +Roger supports installing on kubernetes via [Helm](helm.sh). + +### Prerequisites + +#### 1. Setup persistence volume Create a pvc(roger-data-pvc) for storing roger Data with the following definition. @@ -550,47 +553,62 @@ Then run : kubectl -n create -f pvc.yaml ``` -Navigate to `roger/bin` dir, and init airflow (adds [airflow helm repo](https://airflow-helm.github.io/charts)) -```shell script -cd bin/ -export NAMESPACE= -export RELEASE_NAME= -./airk8s init -``` +#### 2. Create git ssh secrets: -Start airflow +There are two secrets for airflow required for Git syncronization. -```shell script -./airk8s start -``` +This is used by `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` + ```yaml + kind: Secret + apiVersion: v1 + metadata: + name: airflow-secrets + data: + gitSshKey: >- + + type: Opaque + ``` -To get to airflow web interface -```shell script -./airk8s web +This used by `airflow.dags.git.secret` + +```yaml +kind: Secret +apiVersion: v1 +metadata: + name: airflow-git-keys +data: + id_rsa: + id_rsa.pub: + known_hosts: +type: Opaque ``` -#### 2. Setup Redis +### Installing -Initialize Redis bitnami [helm chart](https://github.com/bitnami/charts/tree/master/bitnami/redis#redis). +#### 1. Init helm dependencies +Navigate to `roger/bin` dir, and run `roger init`. This will initialize helm dependencies for [airflow helm repo](https://airflow-helm.github.io/charts)) +and [redis helm repo](https://github.com/bitnami/charts/tree/master/bitnami/redis#redis). ```shell script -export NAMESPACE= -export RELEASE=redisgraph -export REDIS_IMAGE=redislabs/redisgraph -export REDIS_IMAGE_TAG=edge +cd bin/ +export NAMESPACE= +export RELEASE_NAME= export CLUSTER_DOMAIN=cluster.local -export REDIS_WORKER_COUNT=1 - -./roger init +./roger init ``` -Start redis cluster +#### 2. Installing + +Run and flow the notes to access the servers. ```shell script -./roger start +./roger start ``` -#### 3. Run Roger +#### 3. Run Roger workflow + +In the Notes a port forward command should be printed. Use that to +access airflow UI and run the following steps to run Roger workflow. The Airflow interface shows the workflow: ![image](https://user-images.githubusercontent.com/45075777/104513185-403f4400-55bd-11eb-9142-cbfd7879504b.png) @@ -605,7 +623,14 @@ Enter the configuration parameters to get to Redis cluster installed in step 2: And run work flow. +#### 4. Other Commands: +To shutdown and remove the setup from k8s: +```shell script +./roger stop +``` - - +To restart the setup: +```shell script +./roger restart +``` diff --git a/bin/airk8s b/bin/airk8s index 4c6f8fc8..659c074f 100755 --- a/bin/airk8s +++ b/bin/airk8s @@ -53,6 +53,11 @@ gitsecret () { --from-file=id_rsa.pub=$HOME/.ssh/id_rsa.pub \ --from-file=known_hosts=$HOME/.ssh/known_hosts \ --namespace $namespace + + kubectl create secret generic \ + airflow-secrets + --from-file=gitSshKey=$HOME/.ssh/id_rsa \ + --namespace $namespace } $* diff --git a/bin/roger b/bin/roger index 039e501a..fff1d380 100755 --- a/bin/roger +++ b/bin/roger @@ -2,35 +2,22 @@ #set -x set -e -namespace=${NAMESPACE:-scox} -release=${RELEASE:-redisgraph-test} +namespace=${NAMESPACE:-} +release=${RELEASE:-roger} image_repository=${REDIS_IMAGE:-redislabs/redisgraph} -image_tag=${REDIS_IMAGE_TAG:-edge} +image_tag=${REDIS_IMAGE_TAG:-2.2.13} cluster_domain=${CLUSTER_DOMAIN:-cluster.local} redis_worker_count=${REDIS_WORKER_COUNT:-1} # https://github.com/bitnami/charts/tree/master/bitnami/redis init () { - helm repo add bitnami https://charts.bitnami.com/bitnami + helm dependency update ../helm } start () { - helm install $release \ - --set image.repository=$image_repository \ - --set image.tag=$image_tag \ - --set redis.command="redis-server" \ - --set clusterDomain=$cluster_domain \ - --set cluster.slaveCount=$redis_worker_count \ - --set usePassword=false \ - --set master.command="" \ - --set slave.command="" \ - --set master.readinessProbe.enabled="false" \ - --set master.livenessProbe.enabled="false" \ - --set slave.readinessProbe.enabled="false" \ - --set slave.livenessProbe.enabled="false" \ - --set master.extraFlags[0]="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ - --set slave.extraFlags[0]="--loadmodule /usr/lib/redis/modules/redisgraph.so" \ - --namespace=$namespace \ - bitnami/redis + helm upgrade --install $release \ + --set redis.clusterDomain=$cluster_domain \ + --namespace=$namespace \ + ../helm } stop () { helm delete $release \ @@ -41,12 +28,10 @@ restart () { start } status () { - kubectl --namespace=$namespace get pods | grep $release - export REDIS_PASSWORD=$(kubectl get secret --namespace $namespace redisgraph -o jsonpath="{.data.redis-password}" | base64 --decode) + helm --namespace=$namespace status $release } client () { - #kubectl port-forward --namespace $namespace svc/redisgraph-master 6380:6379 & - redis-cli -h 127.0.0.1 -p 6380 -a $REDIS_PASSWORD + redis-cli -h 127.0.0.1 -p 6379 -a $REDIS_PASSWORD } #---------------------------- diff --git a/helm/roger/.gitignore b/helm/.gitignore similarity index 100% rename from helm/roger/.gitignore rename to helm/.gitignore diff --git a/helm/roger/.helmignore b/helm/.helmignore similarity index 100% rename from helm/roger/.helmignore rename to helm/.helmignore diff --git a/helm/roger/Chart.lock b/helm/Chart.lock similarity index 100% rename from helm/roger/Chart.lock rename to helm/Chart.lock diff --git a/helm/roger/Chart.yaml b/helm/Chart.yaml similarity index 100% rename from helm/roger/Chart.yaml rename to helm/Chart.yaml diff --git a/helm/roger/Readme.md b/helm/Readme.md similarity index 97% rename from helm/roger/Readme.md rename to helm/Readme.md index f7ad6409..c44f1dc4 100644 --- a/helm/roger/Readme.md +++ b/helm/Readme.md @@ -1,177 +1,177 @@ -Roger helm chart ----- - -### Introduction - -This chart can be used to run Roger (graph curation pipeline) interfaced with - [tranql](https://github.com/NCATS-Tangerine/tranql) used as a query engine. - - -![image](https://user-images.githubusercontent.com/45075777/107399084-54983300-6ace-11eb-929d-0d8113405cce.png) - -This chart has Airflow and Redis added as dependencies. - -### Pre-install Volumes and Secrets - -##### PVC - - -This installation requires a PVC `roger-data-pvc` to store Roger pipeline data. Here is a template -to create the PVC: -```yaml -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: roger-data-pvc -spec: - storageClassName: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 20Gi -``` - ---- - -#### Secrets: - -There are two secrets for airflow required for Git syncronization. - -This is used by `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` - ```yaml - kind: Secret - apiVersion: v1 - metadata: - name: airflow-secrets - data: - gitSshKey: >- - ###### - type: Opaque - ``` - -This used by `airflow.dags.git.secret` - -```yaml -kind: Secret -apiVersion: v1 -metadata: - name: airflow-git-keys -data: - id_rsa: - id_rsa.pub: - known_hosts: -type: Opaque -``` - -### Parameters - -#### Tranql - -| Parameter | Description | Default | -| --------- | ---- | ---- | -| `tranql.image` | Docker image | `renciorg/tranql-app` -| `tranql.imageTag` | Docker image tag | `develop-test` -| `tranql.replicas` | Web server replicas | `1` -| `tranql.port` | Web server port | `8081` -| `tranql.gunicorn.workerCount` | Gunicorn worker thread counts | `4` -| `tranql.gunicorn.workerTimeout` | Gunicorn worker timeout | `300` -| `tranql.service.type` | Tranql service | `ClusterIP` - - -#### Airflow - -For more details on these defaults and additional customization, -please refer to [Airflow helm docs](https://github.com/helm/charts/tree/master/stable/airflow). - -> **Note**: We use a custom build airflow image(`renciorg/apache-airflow-1.10.14-python-3.8-git`) to support pip installs form git. - - -| Parameter | Default | -| --------- | ---- | -| `airflow.airflow.image.repository` | `renciorg/apache-airflow-1.10.14-python-3.8-git` -| `airflow.airflow.image.tag` | `latest` -| `airflow.airflow.executor` | `KubernetesExecutor` -| `airflow.airflow.fernetKey` | `7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=` -| `airflow.airflow.config.AIRFLOW__CORE__SECURE_MODE` | `True` -| `airflow.airflow.config.AIRFLOW__API__AUTH_BACKEND` | `airflow.api.auth.backend.deny_all` -| `airflow.airflow.config.AIRFLOW__WEBSERVER__EXPOSE_CONFIG` | `False` -| `airflow.airflow.config.AIRFLOW__WEBSERVER__RBAC` | `False` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_REPO` | `ssh://git@github.com/helxplatform/roger.git` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` | `airflow-secrets` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_BRANCH` | `develop` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT` | `/opt/airflow/dags` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SYNC_DEST` | `roger` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH` | `roger` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__DELETE_WORKER_PODS` | `FALSE` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY` | `renciorg/roger-executor` -| `airflow.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG` | `0.26` -| `airflow.airflow.config.AIRFLOW__CORE__LOAD_EXAMPLES` | `False` -| `airflow.airflow.config.GUNICORN_CMD_ARGS` | `--log-level WARNING` -| `airflow.airflow.extraPipPackages` | `['Babel==2.8.0', 'biolink-model==1.2.5', 'biolinkml==1.5.8', 'redisgraph==2.1.5', 'git+https://github.com/RedisGraph/redisgraph-bulk-loader.git', 'flatten-dict', 'git+https://github.com/stevencox/kgx.git']` -| `airflow.airflow.extraVolumeMounts` | `[{'name': 'roger-data', 'mountPath': '/dags/roger/data'}]` -| `airflow.airflow.extraVolumes` | `[{'name': 'roger-data', 'emptyDir': {}}]` -| `airflow.web.service.type` | `ClusterIP` -| `airflow.workers.replicas` | `1` -| `airflow.dags.git.url` | `ssh://git@github.com/helxplatform/roger.git` -| `airflow.dags.git.ref` | `redis-helm` -| `airflow.dags.git.secret` | `airflow-git-keys` -| `airflow.dags.git.privateKeyName` | `id_rsa` -| `airflow.dags.git.repoHost` | `github.com` -| `airflow.dags.git.repoPort` | `22` -| `airflow.dags.git.gitSync.enabled` | `True` -| `airflow.dags.git.gitSync.refreshTime` | `60` -| `airflow.dags.installRequirments` | `True` -| `airflow.postgresql.enabled` | `True` -| `airflow.redis.enabled` | `False` - - -#### Redis - -For more details on these values and additional configuration options please -refer to this [Redis helm chart](https://github.com/bitnami/charts/tree/master/bitnami/redis). - -| Parameter | Default | -| --------- | ---- | -| `redis.image.repository` | `redislabs/redisgraph` -| `redis.image.tag` | `2.2.13` -| `redis.redis.command` | `redis-server` -| `redis.clusterDomain` | `cluster-domain` -| `redis.cluster.slaveCount` | `1` -| `redis.usePassword` | `False` -| `redis.master.command` | `nil` -| `redis.master.readinessProbe.enabled` | `False` -| `redis.master.livenessProbe.enabled` | `False` -| `redis.master.extraFlags` | `['--loadmodule /usr/lib/redis/modules/redisgraph.so']` -| `redis.slave.command` | `nil` -| `redis.slave.readinessProbe.enabled` | `False` -| `redis.slave.livenessProbe.enabled` | `False` -| `redis.slave.extraFlags` | `['--loadmodule /usr/lib/redis/modules/redisgraph.so']` - - -### Installation - -To install `my-release` to kubernetes. - -```shell script -$ helm dependency update . -$ helm install my-release . -``` - - -### Upgrading - -To upgrade `my-release` with an example change to `tranql.imageTag` value. - -```shell script -$ helm upgrade --set tranql.imageTag=0.33 my-release . -``` - - -### Uninstalling - -To remove `my-release` - -```shell script -$ helm uninstall my-release +Roger helm chart +---- + +### Introduction + +This chart can be used to run Roger (graph curation pipeline) interfaced with + [tranql](https://github.com/NCATS-Tangerine/tranql) used as a query engine. + + +![image](https://user-images.githubusercontent.com/45075777/107399084-54983300-6ace-11eb-929d-0d8113405cce.png) + +This chart has Airflow and Redis added as dependencies. + +### Pre-install Volumes and Secrets + +##### PVC + + +This installation requires a PVC `roger-data-pvc` to store Roger pipeline data. Here is a template +to create the PVC: +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: roger-data-pvc +spec: + storageClassName: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 20Gi +``` + +--- + +#### Secrets: + +There are two secrets for airflow required for Git syncronization. + +This is used by `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` + ```yaml + kind: Secret + apiVersion: v1 + metadata: + name: airflow-secrets + data: + gitSshKey: >- + ###### + type: Opaque + ``` + +This used by `airflow.dags.git.secret` + +```yaml +kind: Secret +apiVersion: v1 +metadata: + name: airflow-git-keys +data: + id_rsa: + id_rsa.pub: + known_hosts: +type: Opaque +``` + +### Parameters + +#### Tranql + +| Parameter | Description | Default | +| --------- | ---- | ---- | +| `tranql.image` | Docker image | `renciorg/tranql-app` +| `tranql.imageTag` | Docker image tag | `develop-test` +| `tranql.replicas` | Web server replicas | `1` +| `tranql.port` | Web server port | `8081` +| `tranql.gunicorn.workerCount` | Gunicorn worker thread counts | `4` +| `tranql.gunicorn.workerTimeout` | Gunicorn worker timeout | `300` +| `tranql.service.type` | Tranql service | `ClusterIP` + + +#### Airflow + +For more details on these defaults and additional customization, +please refer to [Airflow helm docs](https://github.com/helm/charts/tree/master/stable/airflow). + +> **Note**: We use a custom build airflow image(`renciorg/apache-airflow-1.10.14-python-3.8-git`) to support pip installs form git. + + +| Parameter | Default | +| --------- | ---- | +| `airflow.airflow.image.repository` | `renciorg/apache-airflow-1.10.14-python-3.8-git` +| `airflow.airflow.image.tag` | `latest` +| `airflow.airflow.executor` | `KubernetesExecutor` +| `airflow.airflow.fernetKey` | `7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=` +| `airflow.airflow.config.AIRFLOW__CORE__SECURE_MODE` | `True` +| `airflow.airflow.config.AIRFLOW__API__AUTH_BACKEND` | `airflow.api.auth.backend.deny_all` +| `airflow.airflow.config.AIRFLOW__WEBSERVER__EXPOSE_CONFIG` | `False` +| `airflow.airflow.config.AIRFLOW__WEBSERVER__RBAC` | `False` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_REPO` | `ssh://git@github.com/helxplatform/roger.git` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME` | `airflow-secrets` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_BRANCH` | `develop` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT` | `/opt/airflow/dags` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__GIT_SYNC_DEST` | `roger` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH` | `roger` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__DELETE_WORKER_PODS` | `FALSE` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY` | `renciorg/roger-executor` +| `airflow.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG` | `0.26` +| `airflow.airflow.config.AIRFLOW__CORE__LOAD_EXAMPLES` | `False` +| `airflow.airflow.config.GUNICORN_CMD_ARGS` | `--log-level WARNING` +| `airflow.airflow.extraPipPackages` | `['Babel==2.8.0', 'biolink-model==1.2.5', 'biolinkml==1.5.8', 'redisgraph==2.1.5', 'git+https://github.com/RedisGraph/redisgraph-bulk-loader.git', 'flatten-dict', 'git+https://github.com/stevencox/kgx.git']` +| `airflow.airflow.extraVolumeMounts` | `[{'name': 'roger-data', 'mountPath': '/dags/roger/data'}]` +| `airflow.airflow.extraVolumes` | `[{'name': 'roger-data', 'emptyDir': {}}]` +| `airflow.web.service.type` | `ClusterIP` +| `airflow.workers.replicas` | `1` +| `airflow.dags.git.url` | `ssh://git@github.com/helxplatform/roger.git` +| `airflow.dags.git.ref` | `redis-helm` +| `airflow.dags.git.secret` | `airflow-git-keys` +| `airflow.dags.git.privateKeyName` | `id_rsa` +| `airflow.dags.git.repoHost` | `github.com` +| `airflow.dags.git.repoPort` | `22` +| `airflow.dags.git.gitSync.enabled` | `True` +| `airflow.dags.git.gitSync.refreshTime` | `60` +| `airflow.dags.installRequirments` | `True` +| `airflow.postgresql.enabled` | `True` +| `airflow.redis.enabled` | `False` + + +#### Redis + +For more details on these values and additional configuration options please +refer to this [Redis helm chart](https://github.com/bitnami/charts/tree/master/bitnami/redis). + +| Parameter | Default | +| --------- | ---- | +| `redis.image.repository` | `redislabs/redisgraph` +| `redis.image.tag` | `2.2.13` +| `redis.redis.command` | `redis-server` +| `redis.clusterDomain` | `cluster-domain` +| `redis.cluster.slaveCount` | `1` +| `redis.usePassword` | `False` +| `redis.master.command` | `nil` +| `redis.master.readinessProbe.enabled` | `False` +| `redis.master.livenessProbe.enabled` | `False` +| `redis.master.extraFlags` | `['--loadmodule /usr/lib/redis/modules/redisgraph.so']` +| `redis.slave.command` | `nil` +| `redis.slave.readinessProbe.enabled` | `False` +| `redis.slave.livenessProbe.enabled` | `False` +| `redis.slave.extraFlags` | `['--loadmodule /usr/lib/redis/modules/redisgraph.so']` + + +### Installation + +To install `my-release` to kubernetes. + +```shell script +$ helm dependency update . +$ helm install my-release . +``` + + +### Upgrading + +To upgrade `my-release` with an example change to `tranql.imageTag` value. + +```shell script +$ helm upgrade --set tranql.imageTag=0.33 my-release . +``` + + +### Uninstalling + +To remove `my-release` + +```shell script +$ helm uninstall my-release ``` \ No newline at end of file diff --git a/helm/roger/templates/NOTES.txt b/helm/roger/templates/NOTES.txt deleted file mode 100644 index 8b137891..00000000 --- a/helm/roger/templates/NOTES.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/helm/templates/NOTES.txt b/helm/templates/NOTES.txt new file mode 100644 index 00000000..89422c73 --- /dev/null +++ b/helm/templates/NOTES.txt @@ -0,0 +1,21 @@ +Installed {{ .Release.Namespace }} + +To access Tranql web use the following command: + +> export TRANQL_PORT={{ .Values.tranql.port }} && kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "roger.fullname" . }}-tranql-service $TRANQL_PORT + +Browse to http://localhost:{{ .Values.tranql.port }} + + +To access Airflow web UI use the following command: + +> export AIRFLOW_UI_PORT={{ .Values.airflow.web.service.externalPort }} && kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "roger.fullname" . }}-web $AIRFLOW_UI_PORT + +Browse to http://localhost:{{ .Values.airflow.web.service.externalPort }} + +To access Redis data store use the following command + +> export REDIS_PORT={{ .Values.redis.redisPort }} && kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "roger.fullname" . }}-redis-master $REDIS_PORT + +On another terminal + > redis-cli -h 127.0.0.1 -p $REDIS_PORT {{ if .Values.redis.usePassword }} -a {{ .Values.redis.password }}{{- end }} \ No newline at end of file diff --git a/helm/roger/templates/_helpers.tpl b/helm/templates/_helpers.tpl similarity index 100% rename from helm/roger/templates/_helpers.tpl rename to helm/templates/_helpers.tpl diff --git a/helm/roger/templates/tranql-config-map.yaml b/helm/templates/tranql-config-map.yaml similarity index 96% rename from helm/roger/templates/tranql-config-map.yaml rename to helm/templates/tranql-config-map.yaml index fe01d30d..bce94a5e 100644 --- a/helm/roger/templates/tranql-config-map.yaml +++ b/helm/templates/tranql-config-map.yaml @@ -1,23 +1,23 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "roger.fullname" . }}-tranql-configmap - labels: - {{- include "roger.labels" . | nindent 4 }} -data: - conf.yml: | - m: z - schema.yml: | - doc: | - Schema configuration for tranql - schema: - redis: - doc: | - Roger is a knowledge graph built by aggregeting several kgx formatted knowledge graphs from several sources. - url: "redis:" - redis: true - redis_connection_params: - host: {{ .Release.Name }}-redis-master - port: 6379 - # SET USERNAME and PASSWORD - # via ROGER_USERNAME , ROGER_PASSWORD Env vars (i.e capitialize service name) +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "roger.fullname" . }}-tranql-configmap + labels: + {{- include "roger.labels" . | nindent 4 }} +data: + conf.yml: | + m: z + schema.yml: | + doc: | + Schema configuration for tranql + schema: + redis: + doc: | + Roger is a knowledge graph built by aggregeting several kgx formatted knowledge graphs from several sources. + url: "redis:" + redis: true + redis_connection_params: + host: {{ .Release.Name }}-redis-master + port: 6379 + # SET USERNAME and PASSWORD + # via ROGER_USERNAME , ROGER_PASSWORD Env vars (i.e capitialize service name) diff --git a/helm/roger/templates/tranql_deployment.yaml b/helm/templates/tranql_deployment.yaml similarity index 97% rename from helm/roger/templates/tranql_deployment.yaml rename to helm/templates/tranql_deployment.yaml index 8c51637e..3902cb74 100644 --- a/helm/roger/templates/tranql_deployment.yaml +++ b/helm/templates/tranql_deployment.yaml @@ -1,57 +1,57 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "roger.fullname" . }}-tranql-frontend - labels: - {{- include "roger.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.tranql.replicas }} - selector: - matchLabels: - {{- include "roger.selectorLabels" . | nindent 6 }} - service-type: web-server - template: - metadata: - labels: - {{- include "roger.selectorLabels" . | nindent 8 }} - service-type: web-server - spec: - volumes: - - name: config-yaml - configMap: - name: {{ include "roger.fullname" . }}-tranql-configmap - defaultMode: 0777 - - name: logs - emptyDir: {} - containers: - - name: {{ include "roger.fullname" . }}-tranql-web-container - image: {{ .Values.tranql.image }}:{{ .Values.tranql.imageTag }} - command: - - "/usr/local/bin/gunicorn" - - "--workers={{ .Values.tranql.gunicorn.workerCount }}" - - "--bind=0.0.0.0:{{ .Values.tranql.port }}" - - "--timeout={{ .Values.tranql.gunicorn.workerTimeout }}" - - "--access-logfile=$(ACCESS_LOG)" - - "--error-logfile=$(ERROR_LOG)" - - "--log-level=debug" - - "tranql.api:app" - ports: - - containerPort: {{ .Values.tranql.port }} - name: http - env: - - name: APP_PORT - value: {{ .Values.tranql.webPort | quote }} - - name: ACCESS_LOG - value: "/var/nfs/tranql-frontend_access_log" - - name: ERROR_LOG - value: "/var/nfs/tranql-frontend_error_log" - volumeMounts: - - name: logs - mountPath: /var/nfs - - name: config-yaml - subPath: conf.yml - mountPath: /tranql/tranql/conf.yml - - name: config-yaml - subPath: schema.yml - mountPath: /tranql/tranql/conf/schema.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "roger.fullname" . }}-tranql-frontend + labels: + {{- include "roger.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.tranql.replicas }} + selector: + matchLabels: + {{- include "roger.selectorLabels" . | nindent 6 }} + service-type: web-server + template: + metadata: + labels: + {{- include "roger.selectorLabels" . | nindent 8 }} + service-type: web-server + spec: + volumes: + - name: config-yaml + configMap: + name: {{ include "roger.fullname" . }}-tranql-configmap + defaultMode: 0777 + - name: logs + emptyDir: {} + containers: + - name: {{ include "roger.fullname" . }}-tranql-web-container + image: {{ .Values.tranql.image }}:{{ .Values.tranql.imageTag }} + command: + - "/usr/local/bin/gunicorn" + - "--workers={{ .Values.tranql.gunicorn.workerCount }}" + - "--bind=0.0.0.0:{{ .Values.tranql.port }}" + - "--timeout={{ .Values.tranql.gunicorn.workerTimeout }}" + - "--access-logfile=$(ACCESS_LOG)" + - "--error-logfile=$(ERROR_LOG)" + - "--log-level=debug" + - "tranql.api:app" + ports: + - containerPort: {{ .Values.tranql.port }} + name: http + env: + - name: APP_PORT + value: {{ .Values.tranql.webPort | quote }} + - name: ACCESS_LOG + value: "/var/nfs/tranql-frontend_access_log" + - name: ERROR_LOG + value: "/var/nfs/tranql-frontend_error_log" + volumeMounts: + - name: logs + mountPath: /var/nfs + - name: config-yaml + subPath: conf.yml + mountPath: /tranql/tranql/conf.yml + - name: config-yaml + subPath: schema.yml + mountPath: /tranql/tranql/conf/schema.yaml restartPolicy: Always \ No newline at end of file diff --git a/helm/roger/templates/tranql_service.yaml b/helm/templates/tranql_service.yaml similarity index 96% rename from helm/roger/templates/tranql_service.yaml rename to helm/templates/tranql_service.yaml index 74842234..be148909 100644 --- a/helm/roger/templates/tranql_service.yaml +++ b/helm/templates/tranql_service.yaml @@ -1,16 +1,16 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "roger.fullname" . }}-tranql-service - labels: - {{- include "roger.labels" . | nindent 4 }} -spec: - type: {{ .Values.tranql.service.type }} - ports: - - port: {{ .Values.tranql.port }} - targetPort: {{ .Values.tranql.port }} - protocol: TCP - name: http - selector: - {{- include "roger.selectorLabels" . | nindent 4 }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "roger.fullname" . }}-tranql-service + labels: + {{- include "roger.labels" . | nindent 4 }} +spec: + type: {{ .Values.tranql.service.type }} + ports: + - port: {{ .Values.tranql.port }} + targetPort: {{ .Values.tranql.port }} + protocol: TCP + name: http + selector: + {{- include "roger.selectorLabels" . | nindent 4 }} service-type: web-server \ No newline at end of file diff --git a/helm/roger/values.yaml b/helm/values.yaml similarity index 96% rename from helm/roger/values.yaml rename to helm/values.yaml index bbf814ad..9ed2eb6c 100644 --- a/helm/roger/values.yaml +++ b/helm/values.yaml @@ -1,197 +1,197 @@ -redis: - image: - repository: redislabs/redisgraph - tag: 2.2.13 - redis: - command: "redis-server" - clusterDomain: "blackbalsam-cluster" - cluster: - slaveCount: 1 - usePassword: false - master: - command: "" - readinessProbe: - enabled: false - livenessProbe: - enabled: false - extraFlags: - - "--loadmodule /usr/lib/redis/modules/redisgraph.so" - slave: - command: "" - readinessProbe: - enabled: false - livenessProbe: - enabled: false - extraFlags: - - "--loadmodule /usr/lib/redis/modules/redisgraph.so" - - -airflow: - # - # NOTE: - # - This is intended to be a `custom-values.yaml` starting point for non-production deployment (like minikube) - - # External Dependencies: - # - A PUBLIC git repo for DAGs: ssh://git@repo.example.com:my-airflow-dags.git - # - - ################################### - # Airflow - Common Configs - ################################### - airflow: - ## the airflow executor type to use - ## - image: - repository: renciorg/apache-airflow-1.10.14-python-3.8-git - tag: latest - # executor: CeleryExecutor - executor: KubernetesExecutor - - ## the fernet key used to encrypt the connections in the database - ## - fernetKey: "7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=" - - ## environment variables for the web/scheduler/worker Pods (for airflow configs) - ## - config: - # Security - AIRFLOW__CORE__SECURE_MODE: "True" - AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" - AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" - AIRFLOW__WEBSERVER__RBAC: "False" - AIRFLOW__KUBERNETES__GIT_REPO: "ssh://git@github.com/helxplatform/roger.git" - # https://airflow.apache.org/docs/apache-airflow/1.10.12/configurations-ref.html#git-ssh-key-secret-name - AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME: "airflow-secrets" - # @TODO change this to master. - AIRFLOW__KUBERNETES__GIT_BRANCH: "develop" - AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT: "/opt/airflow/dags" - AIRFLOW__KUBERNETES__GIT_SYNC_DEST: "roger" - AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH: "roger" - AIRFLOW__KUBERNETES__DELETE_WORKER_PODS: "FALSE" - AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: "renciorg/roger-executor" - AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: "0.26" - # DAGS - AIRFLOW__CORE__LOAD_EXAMPLES: "False" - - ## Disable noisy "Handling signal: ttou" Gunicorn log messages - GUNICORN_CMD_ARGS: "--log-level WARNING" - - extraPipPackages: - - Babel==2.8.0 - - biolink-model==1.2.5 - - biolinkml==1.5.8 - - redisgraph==2.1.5 - - git+https://github.com/RedisGraph/redisgraph-bulk-loader.git - - flatten-dict - - git+https://github.com/stevencox/kgx.git - - ################################### - # Volumes - ################################### - extraVolumeMounts: - - name: roger-data - mountPath: /dags/roger/data - - ## extra volumes for the web/scheduler/worker Pods - - extraVolumes: - - name: roger-data - emptyDir: {} - - - - ################################### - # Airflow - WebUI Configs - ################################### - web: - ## configs for the Service of the web Pods - ## - service: - type: ClusterIP - - ################################### - # Airflow - Worker Configs - ################################### - workers: - ## the number of workers Pods to run - ## - replicas: 1 - - ################################### - # Airflow - DAGs Configs - ################################### - dags: - ## configs for the DAG git repository & sync container - ## - git: - ## url of the git repository - url: "ssh://git@github.com/helxplatform/roger.git" - - ## the branch/tag/sha1 which we clone - ## - ref: develop - - ## the name of a pre-created secret containing files for ~/.ssh/ - ## - ## NOTE: - ## - this is ONLY RELEVANT for SSH git repos - ## - the secret commonly includes files: id_rsa, id_rsa.pub, known_hosts - ## - known_hosts is NOT NEEDED if `git.sshKeyscan` is true - ## - secret: airflow-git-keys - - ## the name of the private key file in your `git.secret` - ## - ## NOTE: - ## - this is ONLY RELEVANT for PRIVATE SSH git repos - ## - privateKeyName: id_rsa - - ## the host name of the git repo - ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos - ## - ## EXAMPLE: - ## repoHost: "github.com" - ## - repoHost: "github.com" - - ## the port of the git repo - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos - ## - repoPort: 22 - - ## configs for the git-sync container - gitSync: - ## enable the git-sync sidecar container - enabled: true - ## the git sync interval in seconds - refreshTime: 60 - installRequirments: true - - ################################### - # Database - PostgreSQL Chart - ################################### - postgresql: - enabled: true - - ################################### - # Database - Redis Chart - ################################### - redis: - enabled: false - - -tranql: - image: renciorg/tranql-app - imageTag: develop-test - replicas: 1 - port: 8081 - gunicorn: - workerCount: 4 - workerTimeout: 300 - service: - type: ClusterIP - +redis: + image: + repository: redislabs/redisgraph + tag: 2.2.13 + redis: + command: "redis-server" + clusterDomain: "blackbalsam-cluster" + cluster: + slaveCount: 1 + usePassword: false + master: + command: "" + readinessProbe: + enabled: false + livenessProbe: + enabled: false + extraFlags: + - "--loadmodule /usr/lib/redis/modules/redisgraph.so" + slave: + command: "" + readinessProbe: + enabled: false + livenessProbe: + enabled: false + extraFlags: + - "--loadmodule /usr/lib/redis/modules/redisgraph.so" + + +airflow: + # + # NOTE: + # - This is intended to be a `custom-values.yaml` starting point for non-production deployment (like minikube) + + # External Dependencies: + # - A PUBLIC git repo for DAGs: ssh://git@repo.example.com:my-airflow-dags.git + # + + ################################### + # Airflow - Common Configs + ################################### + airflow: + ## the airflow executor type to use + ## + image: + repository: renciorg/apache-airflow-1.10.14-python-3.8-git + tag: latest + # executor: CeleryExecutor + executor: KubernetesExecutor + + ## the fernet key used to encrypt the connections in the database + ## + fernetKey: "7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=" + + ## environment variables for the web/scheduler/worker Pods (for airflow configs) + ## + config: + # Security + AIRFLOW__CORE__SECURE_MODE: "True" + AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" + AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" + AIRFLOW__WEBSERVER__RBAC: "False" + AIRFLOW__KUBERNETES__GIT_REPO: "ssh://git@github.com/helxplatform/roger.git" + # https://airflow.apache.org/docs/apache-airflow/1.10.12/configurations-ref.html#git-ssh-key-secret-name + AIRFLOW__KUBERNETES__GIT_SSH_KEY_SECRET_NAME: "airflow-secrets" + # @TODO change this to master. + AIRFLOW__KUBERNETES__GIT_BRANCH: "develop" + AIRFLOW__KUBERNETES__GIT_DAGS_FOLDER_MOUNT_POINT: "/opt/airflow/dags" + AIRFLOW__KUBERNETES__GIT_SYNC_DEST: "roger" + AIRFLOW__KUBERNETES__DAGS_VOLUME_SUBPATH: "roger" + AIRFLOW__KUBERNETES__DELETE_WORKER_PODS: "FALSE" + AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: "renciorg/roger-executor" + AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: "0.26" + # DAGS + AIRFLOW__CORE__LOAD_EXAMPLES: "False" + + ## Disable noisy "Handling signal: ttou" Gunicorn log messages + GUNICORN_CMD_ARGS: "--log-level WARNING" + + extraPipPackages: + - Babel==2.8.0 + - biolink-model==1.2.5 + - biolinkml==1.5.8 + - redisgraph==2.1.5 + - git+https://github.com/RedisGraph/redisgraph-bulk-loader.git + - flatten-dict + - git+https://github.com/stevencox/kgx.git + + ################################### + # Volumes + ################################### + extraVolumeMounts: + - name: roger-data + mountPath: /dags/roger/data + + ## extra volumes for the web/scheduler/worker Pods + + extraVolumes: + - name: roger-data + emptyDir: {} + + + + ################################### + # Airflow - WebUI Configs + ################################### + web: + ## configs for the Service of the web Pods + ## + service: + type: ClusterIP + + ################################### + # Airflow - Worker Configs + ################################### + workers: + ## the number of workers Pods to run + ## + replicas: 1 + + ################################### + # Airflow - DAGs Configs + ################################### + dags: + ## configs for the DAG git repository & sync container + ## + git: + ## url of the git repository + url: "ssh://git@github.com/helxplatform/roger.git" + + ## the branch/tag/sha1 which we clone + ## + ref: develop + + ## the name of a pre-created secret containing files for ~/.ssh/ + ## + ## NOTE: + ## - this is ONLY RELEVANT for SSH git repos + ## - the secret commonly includes files: id_rsa, id_rsa.pub, known_hosts + ## - known_hosts is NOT NEEDED if `git.sshKeyscan` is true + ## + secret: airflow-git-keys + + ## the name of the private key file in your `git.secret` + ## + ## NOTE: + ## - this is ONLY RELEVANT for PRIVATE SSH git repos + ## + privateKeyName: id_rsa + + ## the host name of the git repo + ## + ## NOTE: + ## - this is ONLY REQUIRED for SSH git repos + ## + ## EXAMPLE: + ## repoHost: "github.com" + ## + repoHost: "github.com" + + ## the port of the git repo + ## NOTE: + ## - this is ONLY REQUIRED for SSH git repos + ## + repoPort: 22 + + ## configs for the git-sync container + gitSync: + ## enable the git-sync sidecar container + enabled: true + ## the git sync interval in seconds + refreshTime: 60 + installRequirments: true + + ################################### + # Database - PostgreSQL Chart + ################################### + postgresql: + enabled: true + + ################################### + # Database - Redis Chart + ################################### + redis: + enabled: false + + +tranql: + image: renciorg/tranql-app + imageTag: develop-test + replicas: 1 + port: 8081 + gunicorn: + workerCount: 4 + workerTimeout: 300 + service: + type: ClusterIP + From 939edbcb72b4785858cc831e1103d2a3920ab663 Mon Sep 17 00:00:00 2001 From: muralikarthikk Date: Wed, 17 Feb 2021 09:10:36 -0500 Subject: [PATCH 055/332] dug integration into roger - airflow --- annotate.py | 106 + .../__pycache__/dug_utils.cpython-38.pyc | Bin 0 -> 1760 bytes .../dug_data/data/bdc_dbgap_data_dicts.tar.gz | Bin 0 -> 242051 bytes dug_helpers/dug_data/data/dd.xml | 2 + .../dug_data/data/harmonized_variable_DD.csv | 16 + ...00001.v1.ardpheno.data_dict_2008_10_31.xml | 1462 ++ .../dug_data/data/redis/appendonly.aof | Bin 0 -> 1953241 bytes dug_helpers/dug_data/data/test_tags_v1.0.json | 14 + .../dug_data/data/test_variables_v1.0.csv | 62 + .../topmed_data/topmed_tags_v1.0.json | 847 + .../topmed_data/topmed_variables_v1.0.csv | 15912 ++++++++++++++++ dug_helpers/dug_logger.py | 30 + dug_helpers/dug_utils.py | 92 + helm/values.yaml | 9 +- 14 files changed, 18548 insertions(+), 4 deletions(-) create mode 100644 annotate.py create mode 100644 dug_helpers/__pycache__/dug_utils.cpython-38.pyc create mode 100644 dug_helpers/dug_data/data/bdc_dbgap_data_dicts.tar.gz create mode 100644 dug_helpers/dug_data/data/dd.xml create mode 100644 dug_helpers/dug_data/data/harmonized_variable_DD.csv create mode 100644 dug_helpers/dug_data/data/phs000001.v1.pht000001.v1.ardpheno.data_dict_2008_10_31.xml create mode 100644 dug_helpers/dug_data/data/redis/appendonly.aof create mode 100644 dug_helpers/dug_data/data/test_tags_v1.0.json create mode 100644 dug_helpers/dug_data/data/test_variables_v1.0.csv create mode 100644 dug_helpers/dug_data/topmed_data/topmed_tags_v1.0.json create mode 100644 dug_helpers/dug_data/topmed_data/topmed_variables_v1.0.csv create mode 100644 dug_helpers/dug_logger.py create mode 100644 dug_helpers/dug_utils.py diff --git a/annotate.py b/annotate.py new file mode 100644 index 00000000..e3722358 --- /dev/null +++ b/annotate.py @@ -0,0 +1,106 @@ +import os +import json +from pathlib import Path +from airflow.operators.bash_operator import BashOperator +from airflow.models import DAG +from airflow.operators.python_operator import PythonOperator +from airflow.contrib.sensors.python_sensor import PythonSensor +from airflow.utils.dates import days_ago +from dug_helpers.dug_utils import DugUtil +from dug_helpers.dug_logger import get_logger + +from roger.Config import get_default_config as get_config + +default_args = { + 'owner': 'RENCI', + 'start_date': days_ago(1) +} + +import logging + +""" Build the workflow's tasks and DAG. """ +with DAG( + dag_id='annotate_dug', + default_args=default_args, + schedule_interval=None +) as dag: + + at_k8s = True + + def get_executor_config(annotations=None, data_path="/opt/dug-helpers/data"): + """ Get an executor configuration. + :param annotations: Annotations to attach to the executor. + :returns: Returns a KubernetesExecutor if K8s is configured and None otherwise. + """ + k8s_executor_config = { + "KubernetesExecutor": { + "volumes": [ + { + "name": "dug-helpers-data", + "persistentVolumeClaim": { + "claimName": "roger-data-pvc" + } + } + ], + "volume_mounts": [ + { + "mountPath": data_path, + "name": "dug-helpers-data", + } + ] + } + } + return k8s_executor_config if at_k8s else None + + def task_wrapper(a_callable, config, xcom, **kwargs): + ti = kwargs['ti'] + if xcom: + value = ti.xcom_pull(key=None, task_ids=ti.previous_ti) + config.update(value) + else: + config = {} + return a_callable(config) + + def create_python_task(task_id, a_callable, xcom=False): + data_path = "/opt/dug/data" + return PythonOperator( + task_id=task_id, + python_callable=task_wrapper, + op_kwargs={ + "a_callable": a_callable, + "config": {}, + "xcom": xcom + }, + executor_config=get_executor_config(annotations={"task_name": task_id}, data_path=data_path), + dag=dag, + provide_context=True + ) + + def _is_topmed_file_available(**kwargs): + home = os.path.dirname(os.path.abspath(__file__)) + file_path = os.path.join(home, get_config()['dug_data_root']) + data_path = Path(file_path) + data_files = data_path.glob('topmed_*.csv') + files = [str(file) for file in data_files] + kwargs['ti'].xcom_push(key='config', value={"topmed_files": files}) + return "Files Pushed" + + """Build workflow tasks.""" + intro = BashOperator(task_id='Intro', + bash_command='echo running tranql translator && exit 0', + dag=dag) + + is_topmed_file_available = PythonSensor( + task_id="is_topmed_file_available", + python_callable=_is_topmed_file_available, + dag=dag, + poke_interval=5, + timeout=20, + provide_context=True + ) + + dug_load_topmed_variables = create_python_task("load_and_annotate", DugUtil.load_and_annotate, xcom=True) + make_kg_tagged = create_python_task("make_kg_tagged", DugUtil.make_kg_tagged, xcom=False) + + intro >> is_topmed_file_available >> dug_load_topmed_variables >> make_kg_tagged + diff --git a/dug_helpers/__pycache__/dug_utils.cpython-38.pyc b/dug_helpers/__pycache__/dug_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce53b4bffa01a217670b7f26c064f47021267503 GIT binary patch literal 1760 zcmaJBO>ZML)Sj6nlWf}E?iLjZghm_|<*=Dc#7Y%~t+op+h_-54Az4U+Gj#gChI5a2*x`QW=K%=-4_NIAk=kd7)mh^TvBsrG>J0Z$C;T1s z(S8S)o5u%-UvbvA1)J$8;GE9&>3yfSXTpA(M>Dp&jk?9%o7pxv`0o z*<|OGW`;i&GR+Fc6FL}Ze&I|i`6esXN>1`OsItfhT$>d&Rno$pJJaWtvr;iGmCk7< zrVFc2j22zZWhxdf54lY$K4*Y3@HQ>9;5*Z(G6GiB9UU`~&Jz<)dzHlh$HeJpYB-54 z*Lh6Vm+?Q`?f1VvINBpM$yM)+w2ofvz3ea3vcg>=#2;@XMAnzgN~c7fR@q#|S!vR` zeS;?VSNkE-`vQgciq@_6%CUG@U;bWyO zSzppC&5mDs%9IT*F9c+!^dXcTkE}1PB#fWZBC};3a&jt$KO=n)s{Zw-@d?~#;=It1 zl5?usM-c9Fpqa?3D5vK< z5;o%02w+*&ieNz(C=Icu7MM^o5&s^HN}7Ry%1i_S;|nvVnR*pLI|<26H`-4z_**TQ ze#k{)F=;*ogrPainoBhIES|x0e#CQs=3jd=bnP?m2H)Usym>IM*ZrOODr7@p>r;%7Icttqnp4hYhxIKwo_kNDC?@Q z`N&h5IJC8~8)&L2>q*;{9a%($-VKr3Vfs=70i z`;enBNnC9wN!d!0T(Kf^csohHD`>XjG?Ij=)R7I$aFIfjj1pdN_VU^mW3k7yLw9nRgJqK7r7Tr_uHoMp$|r#Gelj= z^675JaRDw(n$%Th{%i^|MR?tj3zf!-L#v8$Uj9BV`^Mq|h`QYS6X&=#<9SyJbMU@R z3>-_0l|gj8b!7TdH99Zs=jS@~^5lw0N$>PX*3>A0X4rq;Em&$Tug}klg5sIot2P(0 z`-WdV78%aN=>7Gg{+FQzW73YDp7evdK6xIb%qe@qbkYs_!>2B1g!UV z`oVE~pN!BpZP4bmWZ`8urDI*$^h(y{9-jYC|O-y%tmq#2u zt#DSQ1|?oSF;=S$hD-aO^&LRSftc}*yu|5CO&k1e5jUK-4zXcS*6DlRugD&+<(U`8 zmCZP9L17XpK<&$Pq=Hm;pN?>MiWFb!A2=mRdP8&QqMOA}-99DOyaw z>?4qKu0c<20{_`Fd>12Qq=rsUaa?%s_Idc;jZXvf1RB;HHA8n=g*z3AZJ(_-jq`|_ zM`FLB$^qOJ)p9;W_;5XL`)8XWZJ->^!H8F;y9V_QT@-$dfC)2j#Lg_dOnc zLf19KqPeE%_SahLP2#|od3<%TjugmZuK=edw@^u9>L%5Iw`LsW^lk?G8qzSHD2i~6 z=pEdphggOVmOArr;O_OIaAjrj)bq+idYWK&R^e9syE?FA5h-n-MK+{Xf>=p`1|-mV z+O!T_C*vs&E6q9 zHiqH8_?Iw9hqAo5MKOsVXf~+W`1in`E&<7EHc>3*H_G`Ego}Mn1EN^Pkgfk`eGoH` ztcuk_yo;nydJmzVR-cNxFK?FMqM_j9c@%wl<q7)8o}q&n^Zs3XZG+#-rd07rX#}V5)sBkc&!koHNc%Q6xg+kXs1u5JSY`bV)V(Is=ga);!FzP?{w@a7=g!8sA}owX?{!8|FZPu2RHa(ZM#ih;mUC66zI; zdmX4;E}u3;L0d*}_lh$cAb*SxjOA{XI## z*KinE&y&jN#_@@1G_iD`L_yxxt8?sE9Y8~}UrT{)IzC`@_+8EmAo`UmByPGF3l}~{ z7m{1VTQV%RrEUi5F^*>ox22Avyg`bOEYfiw-ODVG+!V!femC{65RNH^0oz>@*6{13 z-vaOd9LfNAUghJ+5Xz+D9b-TlzvLK@!YVk57f1Re#|dV6WmMy^rtE;%DMo=lerg$udZvP33O#j^= z&OZpAoV8cO!oRK{A*o+MAa1!&W&?$=E}TNF0XMmv$H67y$uNh6J`lMew0qs}L6XQ- zMV-nw$?0<5A=>}jn6g;P2q_)d39*V{3ha&IxJHFnf9xH&z;HRnDu;04wcfvW)Gq$W9MQ<|Cp&xCeThQyLKnJYuZJ{<4!t<&>_)vKP zllW7X#=l)dh@v(gTpBN3JxQYCVPxc-wLO=_v7f1 zrO{xLif8~mqmc#OLby?IQ}n+L>IuTHE+XtFzDX!bpUFrH|DZTo9i^QojL1h}tn>;Mg3hL4!as3jzx*?X7MzT&i#!6#f<) zqeM1u{3RJ->+VPFc~tVTgH;&84|ZoGhKHXqs{rqcW=T&El|dwDBE_e7mLxe6SC)6mBZ8$~*L`r=j@S}{B- zM~iv44xlG#_%gXVu<9eTWQXSn!LwPP*98Vqb+)*T$TS4vj_x$t+BA$i7N+E`QtRlQ zCplB2D+p|@TI*Hg0CJ(Z26Y_+akc1EneJIcGqnV)O)*0a+1=dB!^uMzc9ns`IIo)i zQvGL`{XxN~G1MrFidTg=%X51I1SE1%i9wh775TOt)D~W+Q_Z&Tl8>)u#w(kK39v64 zeTG?76&^Va4#I!GI<7*(@XqZikGZzLj)ZP`pWXy)I9PeaF3ek9zeU@p7-K$15PHBrx(nt*{v%H&hqr|H^3%$F+)MURZl-jiBH* zuD$oY9Hu;{oNh~fT3(N$`45HwF{9YeWpLgPo6(7<5Tsr%dkpg7z{ycfWWI;U5I|fe zS`BlwsR7~Gjb*15JC~+7%SU(@Lo@+%Z;5-kNsdHb9muHZ%jY31l(^7bu zWR2dVwPs-4^joy>`d&~)#15iMLSUN%Ik`#ip2H?!QR*D;!0gDzC9@WhGhGjPBb)a`@On?aS5#xvNr z`?NkRghcntCsEpP_g4}XP)q2M|WzOxo8B3d4e_)*pOq#@S%F1e0Dc7+`f;7+4mJT7i6nN zA9#r5qO{^E&A~2@zRDOWRIEonXIiSqtC_sXpRpqArTNu))Wsg9?3=cu_ow9Gd#Fiu z)Ts=?Kx^wv;^4|)DA~R@Z$8JK;p|Kv?U@}KcH1A3V^PpWT_mHxc9BxvXzJ=T(fS^b ztz4u}Csxdq;uRj0YaX4acXm=c6SGrnNu+yU_nq;`Su%dlXto89q~y@MQ}*u}`L>|H z6D?dKyl1G{f~ih4XUU}%Ys&`rr+_7m)P@nWfy)$_pj(cf9!;#PcPVV6q{rpQ;qPs7 z(pUKH*+luA)kuk}Bj3B|bWbBc4wt=Xrt)MymsBa%MD}b?KMAR=HqyNxCm z9%-z&xt@5S@QE(n1-q=}2;xa|H7F1w_ZZTfLu+DQxDm%*iG?P+e$_9%p<(noMg!S5 zxY>)wV?T7WKgUg$QCI)g!Fihot*ro0{}m=_GD`$S_LT@pw!)J>T0vs?b-8#Q3j5vF zNW}cQkgxMujx*(kJ9F>T>Rg6@hlL00Y+?vFO7&5p2x=iSs;3NdP|x$n3p8yT`zuJB zGg=<++HQSnB&>?`X$d}79$V1SiEgbJ0AH6}+OV8#aAgWS8Jx+FBmJa^sq>TiC^QY^ zH~sqP#nBv1^nF(EtBpG<1`n#s5S`-h#XEl)PVJuhHs;wuNt<^bO}HypF?3|~^Sr3K zyo!dsCRqbMRN2OMdW9xmHS%DW1xOJsVe3ON*;Ix6C_z zHKQBR2R(v;{cso9C?#d5s?m2plQuMauJFuh%dSX(xl#kNFCWxV$hKN3UeEp#bD{PX zTLqZz#5VB!Ji)*h4N@~IThMG@$ytIF)l${`so?}&=ph|1=)Vs;Hlwp~&ylfku5+u# z9P2!{zf#5WnS)Oe(|iu9uQX7=S)UMa@#d*uV5i7q^BWx+2OUiL^biR04(eOrUPohw zO6Nb{%iGAL2<)&fXrUrDSUSBP zjzy-bUH7pZq;`QU-n&3(OAH@p#E#S6A-vM29xIMplgCSUDB=$J3rns0=@dz~H+`dt zwf(!2_2Aez5dJs+w_t51g0ZuRBpIj!V-_RhjNqAD-2}yK0N^8j@;)d_F!h%LyLH(> z{>#OC6oMJx(S+<+jDk1!DBd>!q_2O#(hYt-29*ly%#t$z|6A|s(DP(q%ys?D4?9`* zuZqnZ*>3UuWkEr}YGj<$>CfbYN9h@_q##+}-vRC}qD&4Jv&tSyAaw_}Rvz!#pQsp? zGG4pJ{}oFEKTGfi7hp}$g?3wj78yv!I0JzA*>=FO)_`7LhIZ+OuGsT?)^a`3v#bHJ zMQ1jNoulFTR0S0GrZIvg4D$XTy|fJr{QpaOq1+Jg(f9NsS^`wX%w9=M07oqF@Gn^@ zX{%q#aumEnqp$oQwPZtwYH9t0Fwmi64vc-Gzk--x5Pi4344l=EqZ7UdDhfL_ViL2+ zj-j)h<7LQIMc%Jxp?uVLS;*MCmWJ+woOIn3V1p-7!enB5LH%{dq8mU6YJ1Vp1ym3W z`tv^>OuE5l;sYF%jU~*~*Ucj1OTfrv2&l&`o?ZstP%@gT1A&|&aO4S9Xa`o$Z?l5x z{62QJZf;Ex@8$%{yfFsy$@7O#Fq`US?~q+RJBy-Co`9rc*p)%x@f(ce9=)Dl3Y@=r zsunaI7KA2FbPM=^`E7yH{R*ZKGsC;{KKs{aSjI0LzmLl39+B=K+%7}hYLi$HKro=g zT3{(9=xO}A^lTP|zb$BxDSqv__D-Vgw5U754TgtrtPrH%NH*jHS-0cDoV7)XgiMlw z&vZM%R&fCR0hQ7rI&>t=K{3=yES;c4&v?tMi%(c!cL(|&)Y z?Ie?3B2e;6L<^K`mvDSs53XnRGUW<#kogSKTG5o^U4y`Ek+9X7hKfV4fKZL<#9r!T zAe{SOYbCf-gw~gEa`|*%eXkF@topcRZ5wL;N3YD%dF(*FIO~z5gI*&+=>F&fNM-(t zrjze%zQCg3r-#cR32cei`E&xdiQ9X>rjnak+AWU(wOb|N)ju|b-d!X_(uuG+m^$8{ zqAV>f-MNBrZsn1I&w?ZQv<>S5bX+E3JCtr^)DY#CDw~QqeHF^KXAnkQ69?yo5#|y6d_%Fg`J`AEZQr(Qs!ldRRvt44!4E zWc&ZI;=(ETlBhf1k1n83IaPhaTlR+XN^|3jrL)f5Ne{4GyMkVS$Ci*zAT?3Lz@Y+s_L=whR;I`7*Fobjq*OUh*x#&N97fR!3);2 zkPCxAhkut~!$7Xuf^d0yeFv1DCot=gd2jaEEMT~`pH22TRp=<2w~kK9k6qqR44zH% zUOJ}u0kmCht-tgc7#{fsM(5DZKYDtRX>3SR2}<}BXqG|kCaj~!J3?iU;LjX2Cci91 z2fU`Dzm=B2Wkx+qk%jS!)o zkHW$j*$+xq{uUe7xt`r#_Ss|)eK{!8Sx(e z(TzN;0hfJ!S4cH~Yz?NP;|zpmpcRokg|dJWdX0-5{zDsD$iU$QSIu2q(%fE-YM=ES zmQiD2Y+w8b7$l~{2m9hv2*_etN-ax&ZW_--PI%9FQ5|+p@lz+kD`=*?49jDXuD!Ye z>@_B)rhyeS#6-*|kW@>Z({x0{Wfp>gEU?hIFMln!90%dlZMB^KyN%*QBq>AQOGku5 zAwJVxmy>5S@s5l$J0y)Z5G^8ov=`0Y;TGt=a_YZwA&k)O@ArLi`&o#_6dcl-KbXJv@(2+-G_aJ2@o4o7+$UtZL=hhAi#D z$%pq9xoRJQ?ZHkJ;GEuOV7rIq8a7jfK4AYt5utNAI*yz%U|IYx^(gYk7EmtC@RhOH z<^*|qI4%bF6UXxR&kv)2J}Y+W9S~L|T>7>Li}?UzCqXV1J#BIj6P-S}16R?>E^PMf zWbV4=K)@E}T9j~>Dv|dGW&#%Rk}zk2=yhZX#15jTS@mYd2>j>c8t%aQ1SQAN1K>&o zCtKS*se|1+Vvc>LRzfNEdl06Z!?5=Cr3yhe-Hx$*6@)XR2J>Z*`hErRA6P+{y>bFm z`6ZBi%ah7Las?x|?MaOUB^9a>fr_PvIPaJyYqGbm30?-zYpLsvRtS5bQ;2x5$m(s7 z@k~l_6xB)xn_Xt{r{Pf;uMkg>*?zX5#uGq^3-?w<`mujw5bkyOB~{WH>Gt>Hb0!e0 zGGs87R!Ap^TTa!1%%3YHC%S?I1=;@k1U+YFtGuu(1H+4+xp*`OD0pr)Tj%e)J_)>G zB`PXzZL&f?#_}GQp zqhp1|Y^ws#R?xf^gsB>EpR)na+b5uX82#cWxrFH6FX!8=P9eN0I%L*5F+d7XF;Khm zL}m?>s5bo{qWYGSu>>p`h~|GO`yo2Kp&cNF$&W-a1@GWKrAA=B1cFq=eCWv{Ko;_k z|5U^$%(}7#j;XZ8zV|JFV+RO1{<8{{c^;`=C+?0(CWm%Z`HtmD&1V zF`C#<);~69r5UFIv?7sTbEI~CScL=V+QSAasfzHILKBa8>Bv~^9LXdHSTh-jg&7d~ zN>|xux)%s+F;!MfyBltXRk$i)q7@XvOGZ8a_;>UQQT0SbFNnmgyL2K~JaC1lf8ehk6!pLiDf*G#}a3`S>=Y9BeDG68S?g>Y#k zVO8u?RN= zV6>QP_!KT{ffzE-*GT&Ej~)91gLwi4g*oJ3pOmfw?fYjKu^v1b0x&v*Ap-10zr&jZ ztHogrl1va@X^HC;g8w{V)}Vx=7=-}2980Z90-k3;yrUglsT>BMgESTi0pUZNKb-*z z188i`Hon(6fX|<^JmT3fdUov?GhiE7Va>fn1`<7kz~g15I%zD@0b_+m&@hZ<`TZFI z`uZ=xcIegqL`MtL_EoMH7dU)2K91vt^V%|S9Lh!p=?vJ-a?fReAKSz762}2BG7dh) zg4`ARZK)?g;0lftH#Sc?%%(i5+B|~xYx}UTaegfV&ckZ~+5UQ1+YJ`?fMMU_+keos z2G}Yxxmf);3WqEoE9G+7q?4Kh>X;ZQq2ng&|Kp{>)-$OfFzEdDp;s*NmcIzj)Pin| zh0tcC9gQF|iH;}+&6B>;iKk6U0f!7;ceCorKyeCwHT&&FWcrGdy@(0-@k%^L7=63Q ze?%m!H~)i^tcx*PVnwpd8Ow@6Hy0;vEF9RP!v&0a@0Tdu`OeG$Ax;% zfLYX{7&0>W@iTfYbRw_vC+wgG#)NE9$PT!)c@=Bkf)kuLeIGy74N85xIsk`OKe_|!;1*$(V+-Hj9#PaoB~esry>v>4k@2=r??*>$YSf%qSM{F}+o zB5R;yf&EY1S@Z~qz<5OjR$?p=sVZ?jfz`tYPy)WH#NINv^c|$$#qHJkYhvIZE(4~5 zNTU!`v)2M1SIDZz!5Q7vlzI+4w;47ufmD(odvohqfIx~5C%5m0Ag8xC{$9dG;LkL4x>iN)VWr zupWdA{tr=02W?QW&XVM?`ZAa)v5p}Mm-Ze+v;y-L81gAdJ=@~l4+SpHU=hri|3t%~ zWN!asC<%{5kCX0>GY9NO5MHFf_T8rt!*AievW7Pf@XT`wsPZcAs{xsCNCMk`ZVoH{ zz7LB@j8%wCTkZeN{}t{Oea4748H6%$a)pg!k7@`IWnRIby@*Bd2J!;~$k*2Ha0V(r zK;#N*J>ygaP__k;mtV^IE-Zu}?Kl4Vg&q`K1Nv0P+5QB*S&@42ke-OI#VG)c~?n6&*=_R zg&NE-iE>K}rfXNgL2Z!;$NLPP>eRJ!j(f_Vrx-r8nY9Gwt{aGU-HLa&27{>m~#z*gq-{xH}qVtIW$Chl6rIRK`$mI}ecFhO=|~>C^-NBAv?l&r67s>MBK{M^ZYJ*G4X! z&@=hpj2Gtk{|3W3ECy;MR-$NV1N#0WlQs*>xbrl?j(h?FR)S3^BF_|yiW*Y2vIrt`5l*luMX!bPAZ)^C-qj&U$qNo`DZ*0 zP|1G<`g0Hrpal6du(PS;A4A@J;Mf8w3U>l@>A<5HL054-?z;X7%gAKo_2lP!%^kv3 z1)wgz%%o34{NsomluSd+k+2AcW-qFK_`{&a85J0@vI<7uBH*}xIJ?{eOy%|UkD}(k zE(~J2o5dntn4_bA8Pwz-fG!7c{{xC_L6(&?4{piBX-T+;X#(T(H%cy_+_42@ zbJAq^9pb_316$>)@D4-us|+wAl#eim$DnoOCwA%s2veq@CzhE^ben1pusVEytIgwv zHuMtj?h?KW-QZZ5o|}oIhA8(}#g>g6%+l=Jz$#7r%jy3@WGia_AMb?uO-$y*smiNr z_!aKvz|nWuQ;}T$XdLVurz5LjX>Pm#S23JGJ@lfO*8BI&r(pL7fQzH&_-ar{-YN#_ zhL-g}^_toD5CafCq^NhQ&_Nc<`G3#>lBih(qTHXSFo3d5B*-F#a-ntYdx(nX(!eUV zMrjo4Gb|jyD$n8THtXP~72}`VJWqnMnR!55<`iNkmkE$+$qf_%gOFy+k5{78cg?@= z4Fn=5eAd>7%=T584o`1b*8-1XSI;ad+T;&`gqjKp|I__U0WS$`<$2%_j0aec^Tr!+ zcZDeJc--POmL7##d*O~qKdL6}F8p}m(DSrid!Svie4}q;Ig|OX4 z;GAZ|@2q8_w9=cOFxlRQQyL2JE2;%|p_0Pd*6{zy9oB;@P%a$*4?6ldVilRxK1c(Z zy;%!5*B*Tl^BX8}s?MyBPCsa+j|&iB+w-3>b(g7@Y_J$_LyuzdC13(I4)j5MHI|T( zyYlQ$O2K~rb&ANU_w-S3ksVv||HIh^=qk*nxK0;>R6GHGgk>VM%#rp)a_n6|eCt3n z|G5^x2NHd&KqqrAimNW=?peiZunXQW1s}YEcZSc*!~qC#wWA@?*+4A@L{?A`=J`uf zNQ#SFBgnSJ4B0g^>^nCfd2x{{;AU`3l98jx*hsTo9YA9#dZ?D31Go#61BC?K1!8W8 z!QH5EzN&Hb%^c8RkUaM^0`&zIoghD{WnvANP5@^@)Cwnei6Tvar;gFg%f(s`v%`$1 z$4Te)AyrcR_!NkiV??HTH$DVP%&M<&-W>q!G7>km?173jCufA@PMuGRKbehCMnPWu z!|0Zj{|E^9Q@9PpI)+&mS*qYvE%Ln-FAC7NOeaNOJK5+7m-{)DJWnMZWjB$hyuKA>%5D+{^OwIhF^`thg>sqF))EM{ z_W%w!OZmq;AUfLIzE8);q-}7-E$}6%UsdLPl$Lk&Uq^0P1k8E$Xc?TsB>oACKZYtC zlof!v`GKEBxDzbP1@2=pl2n43#-w6TCPV@u%N`8x2zf2sPBT5ceHQs{$>0e|wE_Y` zD(u-mQby6k(nnR>3~`QfeOe+A3Gv{%bq5nPB*?3I|1BsajCz+`a47u z-{ZjsM=}p@JiABdF;FPvXYQ;$fHvzslU$7B%PHVfPD95TniP^JT((irUL)tNM*@ac zMIzVKIV1E+a-{^BF3KCGi*j@v*%Qm!IC90(C$IdgykN)~{;od_&5>Bi=Ll+omBnha z{Wby?55Y;U!L0%igB_qgTc7X|<0yoV&YoV72q8oavUNY1nZC<#Bkb@uz>^yZ_Az~> zApsqZ$_0A3Rozx;^*N%IGHP)BZ{oLxe;NEWl^#w2w*Qx3fCV)X`GznGrh-;-c&fI;+KV%b^Gf%R_nOPK)fhi+wiiRJjPAW*oNP5K&@RD$70 zy>Pmho+w*p8uTxLRexhH1_DFltcR27btzscq7=!^SI5H(VlpoJ8`20~h54jxiYKjQ zx)qx)D@N9BIzg|o8bUot(~0gN7I&|Q0h~qCf85LQcb_Bd$PUtY16imH zZjTE6Ja6rrX`Mq0_NK2jscqt!^Q3~aNU0MBoZicT(x8TkcrgFG7Iw(#4HPQrfV#;C zuzQJ+eZ;9}iqv)$Q)5_7@e>7bcS!wP ztm_LELK$IVb*~`J{@`MWn)>R3e9iiJq<3jReYT;?y=#h~&#mlS_4|B%pkHMCq@{hs z)N-FkEF?h1+nnG~VWW!BzMB%EXcz>yCdk z#=ps580(r&jd72+Z&`EhQd+fhb+1sKb$?=yS~zNLd)FSNRiy~M7INHlbfkJbTU*lj z(A*SCm8K3h^EBrV_r&=8yst)>p0t~q9J9+)U;dqcg{=6}KXG?%r{iXXTK;Ii75S^n@18ELjjMU)Vq$nQcT3D>c{w*C z8wB%Y-O^X+&g8Pl9%u4mWO#vx@jY^S+dYDpBf%@deY^e6HX2Ws5B+y}D*g&hsJ@x2 zXLq)L+go4B3B+q^8Du=9XWRyGv2Yp);6yllbUGJ_Cdro$$DUp)L$c(6n~$IKuU+Oz?A?=t}P?su%zdk%N&7?72L?p%%D7Mii_6u}M@ zWZ#4Ng-jt6z569I8Z)X>Bk68C1q+VVl!s>W?sPQ}-yBs)E)TJ& z&zZB+yf1smrQmUNY`X5-0>ib{7$fwd=Mf%>$U84HxNFyU1LnMd z4C6HaENQ=G1o9Uz%;F(Ckzt3_K>1WA*b~JL-G5OXxZ-z#M#Z=31410b@UtDzJx4R{ z9BG~+5&ccO+6w&o8$Im1BTxD#jm23M)q3e z5IxVH^_G{gE`2!7Uag#}vdwX@f1J0xG>!GXkWe8^ zOy|c)zF18@rh540VseJp#B%Sgsatkr&q7jec2-W(?V>QceyI80SfSystA-Phsg3bU zJl_%p7L~p^tV9ZIwRwGSkNsxj?3uALovI8*Pk-5O)t`IG=2|lagDXb%FJE?i+c>5c zm0D|&((~zy*sOz!dM~}#XQbD`Fl0JjE4b>Wn%Ab=B*~~Hq3+P30?Cb(<9U4>!#xy^ z`i2Du+i z`yoRu9&lb}IRiBEQCUqeZ${epm`Og%+X}FXe}%hE#h;@UuKnmsr+}D%Njia>M==ZN zJ&6OcvdoxkTPBeAvuVUOgnv_7>PtzCU*4lNqcyF&A`DqL11z2J7<{|+qlfzGf;R{J zO0fVfkK%RjvNm2{U3rs6cCWtrq&>fNcX`s{vnrY314g$p9&Q^*dYITP9TB18rbroBrL!vu=_wIKl%SU(Qy0(4V@yprTb-8h= zeZwk+i)x$v^KO`jvZ?=){+0ZoyW>dj!kg>YZ?*7SU43`FZsuN>YSW?Rw)ICRByV~+ zW)l4q)6@cSdp#n-|>byW9f4I9sqN>8$?=OLs zTOpHpgafYq19#5RRKy=~@2ST^xb)WUymb{g)9r7jUIy>gU}lvAWc*n+=p<>|YEhc~ z1$C2yK-8&cosj$GOe( zmk7^o!2#p)v^;&aPmE21Jk`c7lN(*?-5JZhij#($z6>_^Qe4LaJ+!3aoE54c?^L&? zJZlc`!GI`t^)Eds&HMId zA2Y}qz8UFg^>O8eE?GgMI`^yM_3jl+#u-ScEXR*cJyv;bNIyULg}kZuz{0nDWNOFY zG;i&*`JBQg$-xqP+hIdb_vOasnRi`6D(}CKC8|toyFb<*sdY`~+D*f+{(5h_$xGhk zaw`5L)9-hmK=pDhCnu&*8)v+5)F9B^azSXn|Dh!$2O~(PQP5WAW_TBnu7IWoPaS-H zM0Fm#Q(&f@L2)oKfg0%{L&(&~)s~7E(S3;##MAt@?iJ&Luw+)3NhKCT9Hcz|RZU0S zXiLa#IywoSov~e0QMx;~+eGl%R@+}m3OAPLQg?M52%2oYOAO8;)yk+ZiK0g-?@iZr z37GFMbkXLoPka%-D6iB+aos<6qNLbFdyiZH=ZqUypF6#+Zc z#luMM%Ig8LPw}{<17{2+{ATmUBPLt(`wc*)W(31SZQ)R<%ax}ZR*vHr9<@A6YrZ1( zO!LHWht}6v*OZ=|@%?iXA7*|%>pM3*&d>bWD|(~Mlm2~~ z5LcdMo% z8G)BPd)m-8f1pmQw<&GFP99^vxwAH(foB;MOZZ0#KSytmq57Q8RP=1>)$yB^Y)2Om z9%W6yr~d&96tj$DDBS4565j(2YoJaHGk-s(pgxf^WM5j^JEN$zA^uU`Xi7op%7TYq zedV>Jh*^b;%lZLpYby3iF43&}FTGaQ?EB!6_#t>{wL%IbrmHUPpAH>1zglV-c_-mwOP53rRLY)W2jmBaLt3cnu1cE`r#%A z)t`CFg+diB_yaZSpT1e&%y(3d5x7-tCSg?ncJHGcp1_n>_oPmqQ4KSg7>rH+(MeWv zqFsozBS~Id6}HeUAzr1x?l!T}SmF9%TaydR#H6s8kTH6{Wk%A?7WJ0tb7jw@vikHi z)Le$H_8GT|b_PDXc&FXo_T2jVXnk7NmdCC6mO1w_`<6a8#qIdHG{_UL{L?8E=;gne z|K@RbDkDN6$Bn1y+{yRy%cZ5wuRi=3Pq@`TKjS)ZfAe{@@r{Qrw3}T%e1Mi>Hv5*K zy#xv3xKeLHDQ0rdc9-(RDKTBKFu_EZJjJ(%`}NZ9v&EfW8uvMJ;)}7*>0#}~>6`K4 z)CbEi7*z^u>4(zED;!;!PB%o#F=#eDz9L}r3WHvS;ZDaT!iH39kR-LsXOTvF$*H3g z3sX4JcWJce!|1)GzNA+Jn0+X;_bHf)Z$k${+#p+A^1Y@CU~iNULG~oI)e9 zUy6=!-Sk6E|F4Un$!+KKR6s9{$o8{;fX&X!E<7NaWRi zlfgHa^~+^)R)`2GG{1L!8gy;Xq@kO+ps~3H(Yc=BRIk#Ue8a`(*M)4dcv#+jvh#ky zd|671+F(O|4ud*d(OXbYAmwIwrQ3Oz>g|@kM<{eFc>4ptQ9(97`s=aho#iWsyVwT1 z$QB#pd&^u;>+P!3*yOh3^CyXCy~5qD)KmMcX~T8cbZ>d@iwjdL=E(Cbm>(Y0@hsS-Ti?H(p#6$yH-|(ly zw>S>(vH=|u_0{a&HErN6op7oJlm+EeRZg2+VHiKmmu1s*C9hgd7~hI$DJj({aJ|FAEUK30q>sgEA{)jde*?n z^NAs}Mn_t9I}`p~sOZhqH(*1s?-~X|1hTDQp>q`E$$wR%5wyI-C($bD=5}DxisdNW z1U01(M|Fjo0uO7+l^n!z?*J8x^+0#$+LmUK9BZQaa@=l?-W58&d*oF8E>?+z}{BdiDBkN;zB~?Z*6Y z$!(qtm*XR(EbwmMswpI!^?uvOF7S!z>|6$~c-O>7f*NLd!j(Z-o-|`;QwO?Jd!m!eh^sOz!TD6C8b+;qLc!)v2gMvW_kq+9zI< z*HlEy7>gTk^4K8wVD59(*bUcv37Imk%Z2UfPfQQ4mrJw$d818OFXN}_>RxlU?~JN3 zKM9A|Q9R)ur8n!=4i9Bd7r189KDm%PLMp18Ckm;@Cbr6J&dLb}6p%bNZCVQR2<$C5 zE;g*`*|o8vKiN2W%-BrAE7Jp=4^aqd+kra1b0)ma3%@h?R&7+|soM_1q+6YpO_J3u zkf@Q3KdZN_CT~_EWoh(i9_);71W|et@D-@VR&cnIOPj*`(11q3u}PmHQH6AL+;FMS z7VF<*F+ssgB*`qVdqQ_;Q{c*BczOdoJ6oo6ph9k~ zeNRYAgo~Htv*&e&2d{M=Q7SWxzh1rP)DNX~-f4D6pY6%dei`l98F3wMsXza5@f)NQB5pWY##lyGfar1)$SrOzWPJ1e(fo`Q|E*2NbNF(&2g z-OUmM#j@E`&du(L-$HZHfB%86YJKm*qmgBWw={*|sg|23&Xrwlm9LN>_e5qfMh+`(Od>O#Q4T<^U4P}WhnWwXiFjOFubXZjPS&R+T!yJk$2_R-`J$} za)b4*nQm(u*}pBI`gMNt5BuZ%Pp=sfOhrPM$Eq(3AKyHG{AN<&8G#>1yM8N2GlHk% zzSMVYqlxuAPqFJc*O6gn*b*>KxbOG+qG`TCTF)_+@s=0c#-DC~<9OYSvc<+T!iL^- z>~38B(-E6)SGFc&uZP3g)#smx|E^Ian)kBrQR9y}yD9zK_M0O1IdpTkuBKfi2eW6Q%0puNYV0M^8Z zVy6=m@pB~?_U6U?9?=uoONdI(eRR||yQ%41OZAQ6&!kz~PX*4W^FXPRLovIxbTb2= zI*9}^=_L$#AJ*;?{yl+k#p-P)$hUQ&s#a$%vaO#4oJG%^q90uQ;eLUUL;PUg0eq^l zgGJ8uz#U>;7t_6%1fQjOY{pPuXktRF15O~2$ILb-35Yxny+cKhoGDyVX@N^&<7nk8 zxGa6gnTEVuWZKK*@yP7^1W|D?|mLquy-fR+s!$!UtQEX zhwg1D7#)=FA%|5Ih(29y?WrMk-0ZP-?|~Izcd~nA^VzEn?Hep#ZuFOZKjp+HxSX_v znBCuQf&i7u6;zem?lMLaG68M#z<2Fs<%K0gW;7^*YF7JoOGr~5Bm6}>IMcbtdh2_;p((y-TbDh1xT5CJTfzcnnSkI1 zgm%~2U1ktByau@{($R9q>%rS!Y$GxXKtgO?wf&3rxNi1KH(@xp0lG{89}L%&ufT1o zj6FwJP=U_AC}2n>u3^514|jsDu4rHxXFN_&CGIY&P>_4$aHf6-8;<&{VW^M~!MDRN zSAh4w##a!#IKUx`)qi*ZvYb?#0#Bc|9&LHybao>pMR)8)s>{ucw0lSU){0xC(N2a8$UbsI9J#d1%H1i_bdC^U**zriZTA|m6iwl;UlHxLuzCFK> zbneStZ0Vf{ZQoD1Y)kGqHdWh4y|-(;T)m;Lq-t!YGJjo7t08xlYl6jXjC|YvQ6Djm zD9A%zE&1}EY5FFv8|Stw*qwOSo3199#Atv1fIHgwkkhj1!fm0OqcZWE%DE4CKi~9| zU$uMhraBw0YYk267o4B&3@m$3P@kYI8ft_3Pn_59`lWo;VXV|Eg9^3XSRyMjpc8kN zu$HF#1&W4MH=n8p0cN&rfrpl0ED-Y`80MOANBu4h8T#1)riwa%h2(=Jz%pqA;ZNQ( z7xUY}`Zw%3$v}iJl}Uxf)Sf24PWTBTuZ<#62g%PT0E-GuOn^O~z{EIdv=bD+Jf745 z=6@tCM}~a-QG(1ZeH~vaU6|fJt%=KYo$raUzi4l>y5ZBt#ti|Zr4IJHqzZ^#e(P#D z2767fX(m1!7I^c6;UK!4&h?`_xwBV#*Fl8~uJ=2*Eb26TJ~0$t2uO?5rpiF|@B>Tu ze}ME*>$%6@fm9)BVcjr>f{Dcwh4D7Hdv#b^cwGnDO@)rr4e)j+beyt4DkWUHaC(Ad zy9>lIth24)Uu=O}FHq!+o&m?M_5YP8OGN32yE*#6;w^kP@wj?XN#&XE-It4Ti&IF8 zf$lR#pzkCVU%l+{19G0?mJW1*Lb^pk2Ywxe(K>sR*Gc?2Iyqe}JhFkX_`>Iz{M6Bw z%kAWwuKTnQ^Yz>|Jk;{#293~HMpAdmUri?63T@4rm+$a9I(4Aqoa)V;Brv{5YE{c! zl~uTXeIW5=&o4T;4Z&grym8I#XpjmQ`xbv54O+N{5?98h2uXO0PwbetcvFt6mVxr# zn?<63O(NS?AEX&?XIg3}$La7zB<4Y)Lm8(wfb-1GyTk0fqsj3Pq@d`!QnI3l31x$c z1u*B}yoA5csE}X6+YNz}2jF@kuDwX|&E>nx9!i{Zw{yUayOM9yyUo57PQ|;;H1o`- zoKnqxGq^KfVb15f#na;r+(YsDpJGSN)@-wF(o{dYjYkL{5Ll39hxRl}o@0srWh<9G z*n;kYM`&;I_8la*T@?Mw90l4?oW6?N(8Ho4?x%ue&!&m^RS*6@uHHH-iuVm0M^R8D zR7z>31PP_3mQX@k8fiqjg{2n>kuK>HknYX}>F$oDyJN|Xc^5z5-+Rvc`)AMDJ%>3n z%*=D&SKQaLwy1>=^;lD$P5nRjv)SH*`>-ZX^kNhf_Y8zHs^c3m+M#}=+}V3O`7IFlr&lZKnM?OZ6Lp)jIH zJv~`8#`Nw~+A|`a~oz~8$(1ir~!cH(Oj9()l3WdAS zaYJiU)5w^BNs=k&{H^?!3d4+kQDa}m76DXuGG5{K^q}{ymc}AefrDEs{eH_RQaZH$ zhSp^nLym@dqL$Kb*U{Fn1;7TgG~PxX%s`$zId!hV)a{4=5Q+=BwiNYx0{D$_Qa*G- zv}oA#UM%h0uAistXO4)Y_?4(y2hmgE74eJ6;J^foH|Kly(39AMmV;{`ACXgRae)Fx zVENZ)=tDQx8KFaJ_e;L6nDNTPhFc)pPregd@McZDvCO6bi?uK3eMH@| z8-Wg;eMB1vGL)~A$=u#F^5ZnzC9dw{uDZ57*3CPMoiaA>l(EuUCC>-`x@#ErR?Gh5!@*gg^R9h|f8w2yCq5V)iYmRup#zwx6OiK`~YK3 zs}%&hIiPbx>zDita2~&%G=1nFy15Q;-Hy{p{##OzEg2*jkH6zKdk2K;a2L>Nojid| z2m;+wNLtNJz-9xOOAx&KjalDrBpKe|#VI=TXzBUayQ(5Gh^Njflrwa9>KkDC}#xrs3 zg?<#$v7w^v4uW5Ri8U)zlsi{?Sj)o|GJo@NGE8CsyH>~!Nb)7=QFq4 zr_ku@JK6$JaxYEZ0@}Vbw-KTJRM#!Xm(hKqW zl)E>L)Z}*$q(vka6Ft3h*D(5?5I{oMX1}aZI*lMEwW6eP;p`f<4zt)7&2soWeBepqbt2~CAh25?vv_s02tDas$qzP)GovDK zUt6Jr4|$-_T7wMN+6G=DN2vFw;SuUw&EsVd@8?9G#A)txn-G!J%w zpPpYto6Z8Fn&Qj)ms;;qo*p4ruLSTyy?H5`)%T*BFZXwT)Q)PVU8-C;=JP1LK1O^W zex%l*`&+z%A+`DA^9xaHwnN~3Y`IDbLYppn5OtD?@U8poSn#u}*co>2=;#>-V74|{4 zfLV1E46rb`C7ZUH5Re5e6y{MLp~E7HPPwt$yrGpfU4{b{|F!HGnFG6v>n#1vHWF04 z9K74Y;AiOoFw?lj?zZUqtDwk<3Q)A~X`UA31E=P-L1S8uf>&VQpr_Oo zmsFx;8fyFWUUAxAch(x7hxq!#?^jBOjfEObuBY9oemBr)x0;pz_U>}6G?!_6`QSm` zu5i+9r6r5Mh^6G5zw6|SeHFj4Dc+D5`=ih;mnWrkzjQg$hS$|;L=c`}Rqch<7FHf% zsGIADCz~olV+dCT`hnQullQST`R-0R_1BX>5~K%{`jzYtBnPNP4Zlo7OfL)vCTZuY z4Q*M?ozT(`zMTLX7_F_B_=&hXf>V>mTY3EoaCrRYeO)4I`UH~$mxQVv`bT+%m_EX| z#_yT|dKT+@z{Nkl3(FBh&tMi(RRpJr%=$i%vA$K_V7Fa~ULQ2hrjr;KKspv-|C z$m=>TgdI;_(G0}w!+XM*BvzKS(o?Se3!Jrnr<(N8X-=ikkWx4fCgnu?SNaziWw<*t z^n1l}eqm1zW86Z1mRAcih)R=d3vZ($gl&sX`BGi6D*m$Czz=6pz2rK>@rf68c>Ce@ z@q`CAA=D$i#ap#}Kz`!Uuiei6>G^a!=!o;`#>OL6rs?(USWrhDX}aBkwO2;z5X$lF zFe@HadGekkzG-l!Q!G}ZWWlA#?W~b=scCUSr??i^QPFv_xwCOxS^emuA)da3yna&Z zNY;hh)?T=n75u3=v{uirE9To;medh>#Nq+-?scx!_4}^z9l{{@w~mW$FqLQID3!SW z!xeJ@n*Nqaw8EDWwKMCvsZA%J&E`9Q2znn32d*uC5%`E-SU@a|n+`Tmpq!>9${l;J z-IL{Vx$faHFVwJY>E)#Gc_5gkv1XJze?>2JtzvffdI&sCspm4N?XOADbzuZ-eHRRMTdPsrcx7`W1j{e8>B1qj~M3aeECQ@3uddB_vV zA)uQu1qIH=^t8o{xESZf9mi{fe*@^fz}?5|{x#KDVkD1iv9!oAb&}EoQi&`LLPsD$ zn93uUxEJUzW@4$HcKrsesfcAZ1Uzr-Jx?!%Ac>59 zj_Cq{M>JSzS?B;d--p&}==*}_Ey8N+07e^o2jpUcHfrv5W6%VnXA7oka{{;xK^H)B z4sdqWf~EYsh72njQ2lo{64=jh^7US-&~#T2uyYX zX>tyvgiI~G7wa<@gz7onb(_Zx;HTM7^)%ho==p!C2xzQS2dboX*uTMOW30!HFrlzh zof40u==9UWsNtWEki^g*Oa6QMAYm0)jHYrBTw+#*G=VLifA2KyG@AZsahZElfdzF% zd}R2C>n&?-RkNe`o&mmMrkL@hpj&Ik>4O_B??sQo3Z9EDBN7W?Ig^CCk}ysw=9O(! z19*g%Fruj)ZJ%jN`p1q@=oRG>HFbXk&f0fPPtTPyovs$%S(3cp zdVi!MI<1nhUILehB`reqJmf>%OmKi}m?@~=;z%4?@X+DmIW zwyXohR#%&Pl)qHGisPpkHeFFOX18sa!p{ruUK3_?%8}H6n%k3JEy3o3l;QUVqgz39 zu4$-c=S{!Cuzg(4O4@x>QP(8s_SUSG@cfWxS*nPMkVn`->Huy_V=`)aZvc0=lH9BO zSiFXsvjXfEb#M@3Z3pJ7q!sSaaaIC-UCnyolbkl}A9t$_-%Je}zyw4VA zo12yKN9*j*N$|!qd8_OGBD9~8gsu~!GO`+?C%Pyww0>HsIbRMJ{9VMqd*DeosJMsV zgv&O6!ULIVL>8Q&j*Zu$1Rl=WCv^KCq7*OP{mPbvc6z70Qn*cZ_8r{A=z`XpcNJ)o zfALsHkMwF!Q;y3T_Awb@oJ$J)^#l-M580TRUu3y~N#5m~^9|2Dxt}#tzFLb&a<>F2 zxvYq(T${F3z}<~p6Xs8#hy0cVwdUOvO6{3Uxmj=IKkCYaO{yAMN*>kmQz$ta9N?nN zM^Z*v_!<=)X-5jz{gV*_n;LI}f=*Z2fbY6#j!v|)?$qA|`)H|p_uo;652=;Mskko2 zpGos7h$N_7B2j~0QaktKo?fk?c67&>TtlPxK1?<~l>~E6=-6@VWXRwS#OjzNX0mNi z)9Ji@qjnfTe8B236}Y1<-M_0)(;@F>msi4X`P^fKQbF-`OSHf}H@%vCoUL1zuZ%42%Kr*q- zWwUjL#}fR@nG}rMX3Kuc9W>4FABKq24`bQ$jMdcaO)}UUjB4YLFP_y}Y@{1+WVI`C zIl?ob`hMU-kV`ICfTLgK94+aZN#Q2F2o=BL`*_VZnR$~4uWArcE?mRoNovqb`_x!; zk^3(6P&9a6OYdL>Zn;y%FM34i9yn)|LZw=8?FJrGwFJw;?4&c+;F;2+KB&WTD%1J^ zfg`!ZEbEzqiM10M!u;>dN}=A@%AsPH$lE5QafNw_= zU}BzJ+RbJ;$h^xb@Tmw>OJ^645-JlXD|4XF=lvcat$Gf^WPX@00 zx7{;CRdc9in$_x8S|!@ggpG49F7XKLc1VSq&rZJvpC{`-mX83^We~f~ECiuSz)?1D6D7-O`Tf+#YO8KE`-jm0tjlTE#w;YY<>rvIBD8 zma1Aech6d$ok8L_ADsr@Nmz$`CwYbDyx)SM=#`}bdXSIIgS-vkao)PfFUeTyui`*| zmKIcx{^n=`IK`OXr*FH26UgHY$OZ>c_z%pzSqJ{9oxFpQMa^_|r-Ppj2`-d#7=2rG_8?ve(da?x z!J6?sr<8S2mL!IIpLs_y=e%KzH*59lR9MiwJhMdam4|iLWwWJO=xZ^It@Bqe`jo_H z69KZUw#S}^JU?(3IRo-hwDl51zElXJb)HDa1dB_^x(b8ATOMw#`uodc%A@GX4xvnM;79U_&tbx1)>FiNd|~WE&*~cXgS=lSpMI;9j)O^P}`*tqKh^#>oZ0#$NI#8Y-|mu=rl!2N^0AL zgdd~#UTs78;^Ymq_Xv~%(+XTG76dAZieNie zW8JJ~1x%GA$J@}$Od%SN`O{v=UT?X*>u3ZQK6S(ovqhc-FZ=b*@|6ZfY78&Ot(8bA7PU5r1dq-UYp+z1ul5UxZlLKhQzzvgNxxBC40D39ZQE3p zJUnRvP_pEfom=Jt0>ZJ6I9a$h{M@L+hbPG*r$yoh&whfv9XzRQLYgQD)UT{ck3>8g zR&3xz91}5znvpq953M|Pe-E?3wfW9vKdXKYPxEqyM|tAMw=BWE;)qDI5un*QPVFTC z<1<#nvc%`h1M12Ezg~=r0eyQxn=dP%Uj&e7yGHKQFlf;N*iRlk21Q@LIBJ*xjJulO1|p8R6I z5#eE{-{e`WfCYBQ`RKMjCC&roOq&Kmu}q{d?%MdgI=(axn_*xdVvB0w`69Lc>DC;KNk%5Y%#q0b z^S)qWCYvuxvjIkx|3PVt<JeWcvCgb zrO7ibYFpD#**@0lG%hX%vdr_)cVE4vPX93y*7;fyRK{jfcR-RzUX2I8@0u6raB>gr z+01gOQ;r0F=~p9Y)5LsUGHDuOKC@}7*j1||y|8NJCkYob*|~`YRJ@%|R-SXKSNc+# z4t#qfm5<_2Mp)Eb6jEX7{)44mR|FS89U2*N_pyCJE$~$F0siS}%M-$N)37lJo%BEI zc1h|JCKjGtB_5dkhe%a0Y=ZbLY2&}xfh^_M1DG!i7)XVCdjZ=bAnFT?BEU%yyWc{P zEmn@+a+HY*KpFJ?kIKXpJq&wW;fb(|##uu;rWAxYS86OD%I4wMW4wYx*;wq`CfIoi$4$$2I9i9%d_1cv0u;HcaiK9=8DRCp4ee1Q{f3^hQ`ye7$d- zG7Z1Mul;hgbj7H}!$tJa8RMQ@*_Qtx-c25ISosU@$e_ts+b<{d*=hIlsSevK?YVZT`-H&V2H_pL0VuVYd`E#a^rmUTJZQZbc zk9l>{nW1ak<8WHT+^rq(`)2jeJ7tphvyYM~ZT2s0#B@y%hsRf1AK@ixWaH<4msjfU-d?ND*)O9dv}}~<-eq{J z_H`=>3k#n9`65Komz-}+3>(!ha0Fk|%h{E~~ z+5E{7-mf54Y~8ybFaP<}l7Ti;Eh#>g(yHX^$UdBD0X@_;w0Oa7<9MEG{`qC!?n^r- zNRIeYj&qp{L@}OgkH`frb2%rj;!CiWsKo2#aju)L4E+ru3U_zhmVf!dJS%IUE^7zX zhBEU|ss;c0J-t);T?f3Q0kZ4*n5&o`?SjZxn}=!MSPKMoe?2`ug)fGrO%x5ogFhBP zVaLUD7ent_AF74mCm)K}HA;G$5BxgV5Hkk7{%A%E22lgVP-zI8XPrDnQ zx}2E02f7ihdh!VWoYQg5)YVNTGKI(~{jy@04JR9Bfe7xL=dOQ%(!;XcXrAO?;*skw zpaL-kMZk$~+;+jjmtOs|3u2{2pY^O{d?>RJWe|7o%zLE=Z?$;hhY_~8cQ9D2**FKc@F=%vOO^=-2E{lQptNnfkH7~7P;+&YrlKd` z%IWirrAKflpYRP6xbXAqTP*b)4oFR`etz*GwfAcV`w_mu{s1t%*vQLY!mwv-B1As{B}F_@oGsE2#$k0JN)&h}D)Q$zUF#jOd|oB=bMNz(lSY(Z ziaJ*BT`f9gIb|Y>p)94$WMQV%qsPQ*qy6WX?_$l46T$vi&` z52QxhPsapZZOggFDhr4d_dU!u3lLnj{ShDFDEtN{a@hQ*NjYguJDaXu~`R-vopyv#$lrY+* zG`kyd!%w9w(_UMk>Hd2bJVB*`=W6vdd?6srEynxT;zmYkePu~y1WS5gBWd1U;q7->k1AdA3}<8bnDJ3T9adR6MY zN;bSELPrd3WW(EF+U$-+eDR&+oL;iw5^F7F?=+^eNO8H!${D0HY_vnDUfX;bb<&CS zn}oxx2F~XWZy1{J?l;qB3}2PON7inY$1V9wpNI7zWr`-D%h@0abn*p%T|KEvFVlP zAPe}HdPUxEJc1MPp1C{Io=3l|x-hPsv=zN%O=kp z;OQSdedyiuNXCst@9?>+MmGMinP;F?8y7+S$wH8jz0|x}%H(2TE_hnHHW$~EfJQn} z{gHKk)QZ~Ov8;JpyYwTBG5$SY(9S+!?_110Sg_=$8%JbadZZ{_`ol`&X!Cu>-)X}l zLYN2tY3*=XTvEVNow76-xqZX)%qCP(No5(H`Y<1A8$pj4rEPq{kM@Z9RMuDJ;o>|t z_?$z}j>}U-GG#J$DZIY-L3rB(l83TCVXO z>8*A-TaXAd#OMo%95oxuaqYxgHRKVO7E{a1z&{PkAgz*%Wdk`F!s`0jJJ+D1YR zI5u@i*YTRtIUii)^T`@fx7NMCe4h-begmh;S(~v%A#+h&T9Opp+PqA>Fp@xW9t&Bv zl2k$Re(FF~qN(Ocw(n(2PFduS187$oB}kkZ!+o6fsANah8C4YP_j;n)z7sdBhx(^; zyKe}5c54pF4rj5pwGycIK2y*<^8QHlwa<1N$IKJomXxNzxx-C~lJ#}yud&z8G)Gcq z)ggM)&zMKFJT{j7(}Q3UGDZ6%msGxz*NbzG356ZX<;Q~A7Wuph2)xZlhjR<1I70(? z^gpZhn^R#9{KQL57E7Fv$i#lsN7B;Qt062_9Vt|44vB$BR@7prebiMKhX60IBBgcsl41Ti6Sg&2y5ubZ+0F zT^jL1#m}TCKKE1AJGPeU?4wWuF8+&RXuPVAfkqD7`ex^xypm4F{kBMPcF9-NOP|{F z{|s5K6voIMk~CA?tbEzVMXJ#oubE}D^07Age!~J85aS+ZU5&8duk5lszuuY8Gt}fG*P_L;DMTPu32%t-;TZ zb1SNzHq{6=9OsCcsXJa$Uakj!|Av&#B~y`a`c!UI^fMxF;yLO*Ep50`P4~LLQ11+{ z!}8egs$!lO5)R!{yU3TTCRP@pLEboI)LQ!#{)L(OIuRAo_f?m+NUWzR2b7+?%UjFZ z1Y|`zE%NnQ5B5&fc6%0QcP4j?B3hoCPM7N$Q+{g7N*iR+x1eZaeSPSX@R4a&M5t;1 zcJKlRUbjdq#6W5LSjw70;=5a1){Uml5ovfhehO)L`m~95-N=jBGCn5inY!q* zGc8te`q=P2?CvNIiiTF4fbjsoa&ZCP*SeSdhe6oV!c3Go(S5MS<^*cDSSi2aI93je zsful9B17jLbQf>ck-5a`&Ok>(ua4g=)N;vWhk8`Fn(mpEO$zv^!4tl^aDdxPjcsV4 za{B##RR7&BwY(jAw+@b_*XFV6ay6C>S9tA7^h>4ZXIUZrg&d&v)oPQ=z#+pZe5BoA z+US>xDgxw5Iz8NE9~2Mjl-&@%cT!Rk(F~)}OLtOoGGxfi5YhS~AnhgeU_n5Rc3@DY z{gh13Bs;r)d!x6aCsmNR%ek#cz=}y;uR8L9pK!kQkKT~J-r>rgX4z(^GB zq=TUWoju3h4A>7KzrC43b?H(+zhwGn`;g=hAVoQ$bSM@~Jd!^F$Ly0+F~;{l*XNj7 z_AS^yELMgH=_!J5Mq9Nnk-;rOL@6-05`f^`J@t_|>-9?S#`*QeeN9T*@rk%f9UKB$ ziRl;d(NTLZIXRJEo+9^ucNG2Xp5l>5%XjF3RFJIOatr<3vHU07krB}Jx(La@KSlWa z-^UX*8+~L??=@C7D?KgT{I$t`$)qwvd(#{rA42}@m)YPJRQqXoGz5FV$KytF%!AJ=4EQ7%*L@33e@@Wf*x{ADxVMj|yL zHK#U(g;3qW&VQYNcLB~cXmZ7czhP~~5f6FW5rQT!<4OkdUTw)a*7=AsmikmiMeMDAqHZ$7{DMaPF<^%tb{$+;~+ z>knXQj-%x!LVu!$|CCv2r~N=zd6xF(7ezC}ByF9;iE{*2t#B#U)xOHU91T`^X!T_F zDbb^KVqY?iG=t>Kz-dR;>UA{uaOGT-U9H|KX8h{mtG;d1%f$=fCEBAODnl9Ld+uhI zIz5x6`^~;}R;8Sdf>G-Sywjk=A8`-8P^e|t|slpX6!;a1v5$&UvR z^zQt@W1d;gQF-S0Ek5aC13+U zEcLVi=fJg#XflA;bPh*qE<+${6QAgX<(^2$b7#56kcGbvp=b zw6J1O9e9Uky9T~Tkk5Tp=x*JMAmeMMVu59G?q? zuwd*+p>*|=K=SVvB*Ee{cgMS{Pan+Bhr=JL7(U=DfS|sCJ&>J9cr6q(Hs9<&8nWdt z*&Xz_RTX{1;^-~a2$DP^rob7(wszbRjq927Tan~I61c3K728xe7~mHuIjC&uBT>fd zUJ}H~40+#}4GG~A%r7bJ?h!e(2(MgMp(;k!ix9{ud|g zRkSqG_+K45j|g|8g$+-NUsU+jW&FLAfQu=@e@@(!VqR)k%Lfstkj!u_ARb@Qufqeq zmk4o6fM+u?D~}w#=d#Kw{pHx!UKC`T?pC6^M_X- z1bQoO}y_;S)B~U@WbzcepeZHn?9$wIXbz42ZwE)59E=N zUuxXN*I!cO_#x~r)IY^9UPwS{l18vAGAN@T^Ii5*%iGu*f^{#O;mD7jG2NjTeM5FU zi~??4_s7~Y4LK4Y$9(o<`&?1Cge zJ23#?IsEahDf3AJW!1~&ySR#Pnjg=e6*7qWNC@Hc$gcu1fqxCCNtAq^t%P9@cRTPl zKD7C^F2&G!-hP+FA2$Y{o8hoY3B;U`aTXKzH_PGruR1REIOon9Sf;}KX@%(b^Iupo z>Eu)>Uz!7EWz6AhfBXM>x!T`#4Zf7{cl#q_vQ9T(AK<$q zz>sgeECt_Jg5GJsy-xEYxf*5=TZ;H$PSN#-+-Al|kcDuWc1~$sP-?vV(XgpUF-d6Z z$BSyhQ1;-8Y&=t!;nJb-)1*{cc$^Wp5mI_^{J!Qxn&u(`))`2WQPsr~Q(h8^xj%{J zvnR4!?QPiV@;oFbQ#-ognZ%Wn6~HUSc#@7+=`Bu*1inB;fF=CutKmQ z<|rQBRl@_c__cEfBQ@|op-N{ zr23z|_nkP2$ngp}$~^qeV$h14yv%U271eSjU*PfN0gJUQY$ zyB|32I957E#m}6d`?`^uKRk+Ii@qZj=G>AKBX5XTh=AK;YOn>)cL zASU$m-`{|9Vg~>fLO}Ro?RARpSU){P7!1z`uzw;2!y(fIGr9vvTR96=p1-3zkJc_aM6Q1f1Yy(X$^mhOWa-wJMp82 zugPpO&wFIv$v8Ri)7IM7CJ#7lG3R+Lh&b3zxVP~YJKAJ)B}vk8W{l)=^s)XACRF~j zp^k>Hy<~2vY@bfatq>s}LPuJeJ85&ud~NsRo^k94>MzVMH2HO#yEvdUOl#CXnfRn{ zz2UXh_`<&+uG>@kip5;oK;3BKjgVFz16eG7F5}+dRv+H;46DR>%eYUwGOt4f`>9`l z_T+rzS|T$aKmG`B{Lza$7V)NlR3+gUt7=GWNqBiSmrV?5C}ltX68}h57mtVmUwu%i z8F@gW;nLttM60>h@=CWYB%Uo%zHQ9-hx3mWR~x+*eLPDe7p3gCc$Qzc?Q!$JIuFF= zOLuN%w8~uQN>pq)GbCS)pZKJ!kUo2>IS5kc7Yt@$OY3bGka|cqCHz|V(}!;1!mh7) z7(yc2s@mzdOujgNOn&?;G_q;7m*D)YjkP9Va`dwn^}@JNn%gIkU6Ex{qA0Yk{pomM z_kx`+eZKdhR;czf|*zfW(LXq5Rf@mA#N5sK^j>$rDt_NW6Em)U>TxR+t>b z)q_b(8^X>OW;0Z$OSrmD&ty3j>Hv1v8<`50f!sZ6y>SBB%}t;)`k@jR2?y5Cm0-fp zkhhlr8J2bU$5cQ)>HN`C5YS#xRCBc?l_vrqMt+q7d!KQG&s$O^f#8K3%sVnQ26zIH z@iHFSwJk`2A(o-S)MxD9EUZrQ(8Ev4zk%^<+HGV!!CG-Q0DUqyb)7!(_nMYN7_d46 zH3cDw64>s!?_jsxgx}k_+fjo!#3XsIijZB?nMQ@XPQwK zJ}CP|k)i0&fX4d>ma^y=IMA$g{E-}Iko*48vj_Gx5z3NFy+zoYgQK9sa1KI6&gOH% za1PsRZ7!MVAy$zmny;C$2w~{RKSs~xEfU@r+wqec*yFe3dM`AduOout;1PSONT)3fXd7Ay=bkaDtMW>dLCMQb>0Go+?JT}GyWoB(WO^TOy`BccQp3>v~qOHVn$V<8_H{Wy-2>|0kOfX||yT`EsK~1HjBBO#ZW6HC&$K*zfRs#R8no^|%uF z77XOspt{49?<0mI1+_FuLJR=hLM;vUvW=^YbiB2PXug2s^eMDeC%Qlg z{t7rd2@lgjMZmA62W>I=GL<)Xgtk|AZBKr2u9rruo5@@zq)gv-t6qPd%Nksmytk#i zBVD~meYq}|_6s|x2Qb|#U*77KOgu>pMV->2p{0j&lTwIj0@vJYp_Gwd{yt3A#s3=B z+0Wr-*0P~^C!*ZamyhoG1kVFWCmh2N?x|SaTq@H1BvffaMN}YQQBbfeI1~g-Ea#S~ zJZ=Yc5G`+6gMi{{yq{|j*3j4~^wNXstEIr4yG^Ay2beJAm|cj1N6R^6F#fNp9FtQU6>qtw0&OQ4PngxVc$a^>;Ah{)z_*eK`T>R1=erAZ)^0pBlX7_Ird3pixXw3RLcK}^zTK7Gm{7FF#5Tg$J`|cX}HdCgZ zltc_<73NreY*0p)j@e>SW<_7w1!%YY#zUu9%xwa*m^ShV(?$ZrANR!Wqe}DKZ?Ca> z7r2M%96t>91FuVf@*f+Rp5sII@ltgaCf47s0qv)V*~07NfBy|g*xZQ$BQ0RF2WrZ` zAtDirsqwY%R}k81<|?y#eiS8>t;^nv`c0|Bci9e;E%gvSLmlZ6tT{mY?u!keRSi+o z<)<_UEwFDyEt5AyQqtA(g9E+5SD$mU1c6+1d`2?Y6#feUlwA!_WDWeZ{zCe=9nymg zRAH*Vz?6$lh$xTgN!m>{gb~HLa;XI%pnJeG;4Pc_xI;MhZ_L_D(N&DL3$D0n2~z(p z22eIIn4Y(=*4{wqtBR3!m{I&L$ZC`(6EI&7k_uF$eX^#4DtKi;TKE`99FStAz3G)b z;=Menqzxi^0ase~So}%%_MX)aP1biSP&jHN+uH{ORyHJtfS4)5#eZy@;u26_IwV}( zh>QfbF`y-FYhoqdoOb_r%L6&@ z3TSS=8E&r?^&&X86zr1h+IYYGDlGHlC)kI z^P_JmTGxDV+YV17LMj(_l4PkKIJ|S+7`a|0{LOgT>Wq`uoC5ZL6uZ*qy*;RVCcxp> zeSUQ?s=-?`DP1zG&M)-W^?)#?S*QX^-4O;&3x}B>-N(T?mIncyXSA|K--s5+(~1Do zc$A03D7xOPZ{WjKqrbenPR^p|%c98eRL<)D+~P>3c>fLVXV->b&;HzOhPHY#nZK?4 zI{8*#KF6lww|)EQ3IZpoCLw35@L4G;YxaW?)68R_9I!ZQix>%_Y>R-Q51+F&o1`Zb zE_Xfz9c;j~UOhj7q{xwR_Ps8Hfdb7j9h4DXlIga6cYci{&FX-F=|;;o#!OakB-2=?dVT8G&3&^FWEn*qGy0(u)b%-8iYiy(&DXiq!mDVj zbEn@qU83?+`o`+^LVmnLyITS3KOV*8)V{HB6;3^~PQP(8iJcqB+3lo`n^6fyG8LWekASR*sJ-;L3VHGY4c~pJwAKHxcw@jqrhu#>LN1G-o?)kFsB>I z^yzEv5Cr;u)aQ|{<}JSrnkc`yPP$dW>3FR;+3i4~mt;Z^o5Q@GT`Eo+5=N^9!St39e z_7}XMlsj1>S>a_@;3JJC0+QnX>~QIlzC+urEN$jZTpuMBiUpi4kpVE0>a7;qB#ks+ zDic+h0{DiT4@3ARtagB=GlNG~Zr4j41X!+dV-#2lDnVyU)tcY$qDp%#UC@OUqnnWV z2R3_1ej2s&G$4tV9(a^*Z<<5+Nz<*{Y)HlGG&Wbb%=NUUvQ_zqma?1QtfBcln|YHW zUjm65siTn*FurK6oCSd+rTOlPoqWgP^Rv-#6#m~p%De@AR}{x zR9KLaZo+0J8+KAU z{#!cRAZz04oMUSp~q78_;9Rq7cqDfo$TLGSKe2By?PO6V03~o zhQ4a-?jD)g+4yj@S>xdWZufHNNRP)y{Js3z&MQkON;zLBWtUyc>h*FhpsMG*Du6@i z_xA2U+kip^G;mKW(6GVY2f&ijSO+_GJs2%^_G3E`MO8^3MipCfbU ziVT$RBc