diff --git a/Dockerfile b/Dockerfile index 2287f91..27455bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,14 @@ ARG img_user=ghcr.io/driplineorg ARG img_repo=dripline-python -ARG img_tag=develop-dev +#ARG img_tag=develop-dev +ARG img_tag=receiver-test FROM ${img_user}/${img_repo}:${img_tag} COPY . /usr/local/src_dragonfly WORKDIR /usr/local/src_dragonfly +RUN pip install docker RUN pip install . WORKDIR / diff --git a/docker-compose.yaml b/docker-compose.yaml index 9ff66ea..dfd23e9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -40,3 +40,18 @@ services: - DRIPLINE_PASSWORD=dripline command: > bash -c "dl-serve -vv -c /root/jitter_example.yaml" + + AlarmSystem: + # this image is build from this branch + image: dragonfly_docker:latest + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./AlarmSystem.yaml:/root/AlarmSystem.yaml + environment: + - DRIPLINE_USER=dripline + - DRIPLINE_PASSWORD=dripline + command: + - python3 + - /usr/local/src_dragonfly/dragonfly/watchdog.py + - --config + - /root/AlarmSystem.yaml diff --git a/dragonfly/__init__.py b/dragonfly/__init__.py index ae9daaf..eb89dbb 100644 --- a/dragonfly/__init__.py +++ b/dragonfly/__init__.py @@ -15,3 +15,5 @@ def __get_version(): return version version = __get_version() __version__ = version.version + +from .watchdog import * diff --git a/dragonfly/watchdog.py b/dragonfly/watchdog.py new file mode 100755 index 0000000..86091db --- /dev/null +++ b/dragonfly/watchdog.py @@ -0,0 +1,114 @@ +#!/user/bin/env python3 +import requests +import json +import signal +import time +import docker +import dripline +import yaml +from pathlib import Path +import argparse + +from dripline.core import Interface + +class WatchDog(object): + kill_now = False + + def __init__(self, config_path): + self.config_path = config_path + self.load_configuration() + self.setup_docker_client() + self.setup_dripline_connection() + signal.signal(signal.SIGINT, self.exit_gracefully) + signal.signal(signal.SIGTERM, self.exit_gracefully) + self.send_slack_message("Started alarm system!") + + def load_configuration(self): + with open(Path(args.config), "r") as open_file: + self.config = yaml.safe_load( open_file.read() ) + + if not "slack_hook" in self.config.keys(): + self.config["slack_hook"] = None + + print("Configuration is:", flush=True) + print(self.config, flush=True) + + def setup_docker_client(self): + self.client = docker.from_env() + + def setup_dripline_connection(self): + self.connection = Interface(dripline_mesh=self.config["dripline_mesh"]) + + def exit_gracefully(self, signum, frame): + self.kill_now = True + print("Got a signal %d"%signum, flush=True) + self.send_slack_message("Stopping, received signal: %d"%signum) + + def send_slack_message(self, message): + if self.config["slack_hook"] is None: + print("Slack hook not configured. No message will be send!") + return + post = {"text": "{0}".format(message)} + response = requests.post(self.config["slack_hook"], headers={'Content-Type': 'application/json'}, data=json.dumps(post)) + + if response.status_code != 200: + print(f'Request to slack returned an error {response.status_code}, the response is:\n{response.text}') + + + def get_endpoint(self, endpoint, calibrated=False): + val = self.connection.get(endpoint) + return val["value_raw" if not calibrated else "value_cal"] + + def compare(self, value, reference, method): + if type(value) == float: reference = float(reference) + if method == "not_equal": + return value != reference + elif method == "equal": + return value == reference + elif method == "lower": + return value < reference + elif method == "greater": + return value > reference + else: + raise ValueError(f"Comparison method {method} is not defined. You can use one of ['not_equal', 'equal', 'lower', 'greater'].") + + def run(self): + + while not self.kill_now: + if self.config["check_endpoints"] is not None: + for entry in self.config["check_endpoints"]: + if self.kill_now: break + try: + value = self.get_endpoint(entry["endpoint"]) + print(entry["endpoint"], value, flush=True) + if self.compare(value, entry["reference"], entry["method"]): + self.send_slack_message(entry["message"].format(**locals())) + except Exception as e: + self.send_slack_message("Could not get endpoint %s. Got error %s."%(entry["endpoint"], str(e) )) + + + for container in self.client.containers.list(all=True): + if self.kill_now: break + if any([container.name.startswith(black) for black in self.config["blacklist_containers"]]): + continue + if container.status != "running": + self.send_slack_message(f"Container {container.name} is not running!") + if int(container.attrs["State"]["ExitCode"]) != 0: + self.send_slack_message(f"Containeri {container.name} has exit code {container.attrs['State']['ExitCode']}!") + + print("Checks done", flush=True) + for i in range(int(self.config["check_interval_s"])): + if self.kill_now: break + time.sleep(1) + self.send_slack_message(f"Stopping alarm system") + + +if __name__ == "__main__": + print("Welcome to Watchdog", flush=True) + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, required=True, help="Path of the yaml config file.") + args = parser.parse_args() + + dog = WatchDog(args.config) + dog.run() diff --git a/examples/AlarmSystem.yaml b/examples/AlarmSystem.yaml new file mode 100644 index 0000000..58eee89 --- /dev/null +++ b/examples/AlarmSystem.yaml @@ -0,0 +1,45 @@ +dripline_mesh: + broker: rabbit-broker + broker_port: 5672 + +check_interval_s: 30 + + +# To create a slack webhook see https://api.slack.com/messaging/webhooks steps 1. to 3. +# Do not push your webhook to github. Slack does not like that and will disable the webhood due to security reasons. +slack_hook: "https://hooks.slack.com/services/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + +blacklist_containers: + # containers listed here will not be checked if they are running or having error messages + - mainzdripline3-dls10ZTranslator + - mainzdripline3-pneumaticValve_dl3 + - mainzdripline3-Pressure_gauge_70 + - mainzdripline3-habs_tc + - mainzdripline3-Checklist + - mainzdripline3-slowdash + - mainzdripline3-dripline-bash + - mainzdripline3-SignalTest +check_endpoints: + # read this as: if 'endpoint' 'method' 'reference' send 'message' + # e.g. if 'habs_error_status' 'not_equal' '00' send 'HABS power supply issue! Error status: {value}' + # methods can be one of ["not_equal", "equal", "lower", "greater"] + #- endpoint: habs_error_status + # method: not_equal + # reference: "00" + # message: "HABS power supply issue! Error status: {value}" + #- endpoint: pg8_pressure_mbar + # method: greater + # reference: 2e-5 + # message: "PG8 above 2e-5 mbar (too high)" + #- endpoint: pg60_pressure_mbar + # method: greater + # reference: 1e-4 + # message: "PG60 above 1e-4 mbar (too high)" + #- endpoint: read_C_Temperature_CoolingLoopSensor1_MATS + #method: lower + #reference: 0 + #message: "Cooling loop water is below freeze point (sensor 1)" + #- endpoint: read_C_Temperature_CoolingLoopSensor2_MATS + # method: lower + #reference: 0 + #message: "Cooling loop water is below freeze point (sensor 2)"