From e57947de7d68856586a48d54307c7c087acf3148 Mon Sep 17 00:00:00 2001 From: niejiangang Date: Thu, 17 Mar 2022 20:32:12 +0800 Subject: [PATCH 1/3] feat: support io-latency --- requirements.txt | 4 +- resources/defects/disk.py | 2 +- resources/scenes/service_loss.py | 7 +- simpledaemon/iolatency.py | 0 simpledaemon/main.py | 219 +++++++++++++++++++++++++++++++ simpledaemon/makefile | 9 ++ 6 files changed, 235 insertions(+), 6 deletions(-) create mode 100644 simpledaemon/iolatency.py create mode 100644 simpledaemon/main.py create mode 100644 simpledaemon/makefile diff --git a/requirements.txt b/requirements.txt index a0cfa94..66697ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ click~=8.0.3 jsonpath~=0.82 spacecapsule~=0.1.0 kubernetes~=22.6.0 -paramiko~=2.9.2 \ No newline at end of file +paramiko~=2.9.2 +daemon~=1.2 +psutil~=5.9.0 \ No newline at end of file diff --git a/resources/defects/disk.py b/resources/defects/disk.py index 6cb222e..cde8f39 100644 --- a/resources/defects/disk.py +++ b/resources/defects/disk.py @@ -33,7 +33,7 @@ @click.option('--timeout', 'timeout') @click.option('--labels', 'labels') @click.option('--names', 'names') -def diskfill(scope, experiment_name, path, size, reserve, percent, timeout, labels, names): +def disk_fill(scope, experiment_name, path, size, reserve, percent, timeout, labels, names): args = locals() args['action'] = 'fill' args['target'] = 'disk' diff --git a/resources/scenes/service_loss.py b/resources/scenes/service_loss.py index 420fa00..ecf5fde 100644 --- a/resources/scenes/service_loss.py +++ b/resources/scenes/service_loss.py @@ -69,18 +69,17 @@ def node_network_loss(node_name, interface, percent, remote_port, local_port, ti @click.command() @click.option('--namespace', 'namespace', default='practice') @click.option('--network-plugin', 'network_plugin', default='calico') -@click.option('--time', 'time', default=3000) @click.option('--percent', 'percent', default=80) @click.option('--timeout', 'timeout', default=10000) @click.option('--kube-config', 'kube_config', default="~/.kube/config") @click.option("--check_history", is_flag=True, default=True, is_eager=True, callback=check_status, help="Check experiment history", expose_value=False) -def case4(namespace, network_plugin, time, percent, timeout, kube_config): - pod_network_loss(namespace, network_plugin, time, percent, timeout, kube_config) +def case4(namespace, network_plugin, percent, timeout, kube_config): + pod_network_loss(namespace, network_plugin, percent, timeout, kube_config) -def pod_network_loss(namespace, network_plugin, time, percent, timeout, kube_config): +def pod_network_loss(namespace, network_plugin, percent, timeout, kube_config): # Choose a pod from target namespace api_instance = prepare_api(kube_config) pod_list = api_instance.list_namespaced_pod(namespace) diff --git a/simpledaemon/iolatency.py b/simpledaemon/iolatency.py new file mode 100644 index 0000000..e69de29 diff --git a/simpledaemon/main.py b/simpledaemon/main.py new file mode 100644 index 0000000..32a1d36 --- /dev/null +++ b/simpledaemon/main.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- + +import fcntl +import json +import os +import select +import signal +import subprocess +import sys +import time + +import psutil + +args = "" + + +class Daemon(object): + def __init__(self, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'): + self.ver = "0.1.23" # version + self.waitToHardKill = 3 + self.processName = os.path.basename(sys.argv[0]) + self.stdin = stdin + self.stdout = stdout + self.stderr = stderr + + def _makeDaemon(self): + try: + pid = os.fork() + if pid > 0: + # Exit first parent. + sys.exit(0) + except OSError as e: + m = f"Fork #1 failed: {e}" + print(m) + sys.exit(1) + + # Decouple from the parent environment. + os.chdir("/") + os.setsid() + os.umask(0) + + # Do second fork. + try: + pid = os.fork() + if pid > 0: + # Exit from second parent. + sys.exit(0) + except OSError as e: + m = f"Fork #2 failed: {e}" + print(m) + sys.exit(1) + + m = "The daemon process is going to background." + print(m) + + # Redirect standard file descriptors. + sys.stdout.flush() + sys.stderr.flush() + si = open(self.stdin, 'r') + so = open(self.stdout, 'w+') + se = open(self.stderr, 'w+') + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + def _getProces(self): + procs = [] + + for p in psutil.process_iter(): + if self.processName in [part.split('/')[-1] for part in p.cmdline()]: + # Skip the current process + if p.pid != os.getpid(): + procs.append(p) + + return procs + + def start(self): + # Handle signals + signal.signal(signal.SIGINT, self.stop) + signal.signal(signal.SIGTERM, self.stop) + signal.signal(signal.SIGHUP, self.stop) + + # Check if the daemon is already running. + procs = self._getProces() + + if procs: + pids = ",".join([str(p.pid) for p in procs]) + status = { + "status": "started", + "pid": pids + } + print(json.dumps(status)) + sys.exit(1) + + # Daemonize the main process + self._makeDaemon() + # Start a infinitive loop that periodically runs run() method + self.run() + + def status(self): + procs = self._getProces() + + if procs: + pids = ",".join([str(p.pid) for p in procs]) + status = { + 'status': "started", + 'pid': pids + } + else: + status = { + 'status': "stopped", + } + print(json.dumps(status)) + + def stop(self): + procs = self._getProces() + + def on_terminate(process): + status = { + 'status': "stopped", + 'pid': process.pid + } + print(json.dumps(status)) + + if procs: + for p in procs: + p.terminate() + + gone, alive = psutil.wait_procs(procs, timeout=self.waitToHardKill, callback=on_terminate) + + for p in alive: + p.kill() + else: + status = { + 'status': "not_find", + } + print(json.dumps(status)) + + # this method you have to override + def run(self): + pass + + +class Toda(Daemon): + def __init__(self, cmd, stdout='/dev/null', stderr='/dev/null'): + Daemon.__init__(self, stdout=stdout, stderr=stderr) + self.cmd = cmd + self.args = "" + + def start(self, pid, path, args): + self.cmd = self.cmd.format(pid, pid, path) + self.args = args + Daemon.start(self) + # self.cmd += "pid" + + def run(self): + global process + print(self.cmd) + process = subprocess.Popen(self.cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL) + fcntl.fcntl(process.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) + self.send(self.args) + status = self.recv() + print(status) + signal.signal(signal.SIGTERM, signal_handler) + process.wait() + + def send(self, data, tail='\n'): + b = data + tail + process.stdin.write(bytes(b, encoding='utf-8')) + process.stdin.flush() + + def recv(self, t=.1, stderr=0): + r = '' + pr = process.stdout + if stderr: + pr = process.stdout + while True: + if not select.select([pr], [], [], 0)[0]: + time.sleep(t) + continue + r = pr.read() + return r.rstrip() + return r.rstrip() + + +def signal_handler(signal, frame): + process.kill() + sys.exit(0) + + +if __name__ == "__main__": + process = None + daemon = Toda( + "/opt/nsexec/release/nsexec -l -p /proc/{}/ns/pid -m /proc/{}/ns/mnt --library-path /opt/nsexec/release/libnsenter.so -- /opt/toda/release/toda --path {} --verbose info", + "/stdout", "/stderr") + # daemon = Toda("/home/nejan/space-capsule/simpledaemon/{}", "args", "/stdout", "/stderr") + usageMessage = f"Usage: {sys.argv[0]} (start |stop|status)" + + choice = sys.argv[1] + + if choice == "start": + if len(sys.argv) == 5: + choice = sys.argv[1] + pid = sys.argv[2] + path = sys.argv[3] + args = sys.argv[4] + daemon.start(pid, path, args) + else: + print(f"Usage: {sys.argv[0]} start $pid $path $args") + elif choice == "stop": + daemon.stop() + elif choice == "status": + daemon.status() + else: + print("Unknown command.") + print(usageMessage) + sys.exit(1) diff --git a/simpledaemon/makefile b/simpledaemon/makefile new file mode 100644 index 0000000..48cd3b4 --- /dev/null +++ b/simpledaemon/makefile @@ -0,0 +1,9 @@ +define exec-command +$(1) + +endef + +# CI build +.PHONY: build +build: + pyinstaller main.py --name simpledaemon \ No newline at end of file From 90bbe3057b72afa4e24ea61b90a3e374bf416c63 Mon Sep 17 00:00:00 2001 From: niejiangang Date: Mon, 21 Mar 2022 18:01:41 +0800 Subject: [PATCH 2/3] feat: support IOLatency --- .../defects/iolatency/__init__.py | 0 .../defects/iolatency/iolatency.py | 30 ++++++++++--------- .../defects/iolatency}/makefile | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) rename simpledaemon/iolatency.py => resources/defects/iolatency/__init__.py (100%) rename simpledaemon/main.py => resources/defects/iolatency/iolatency.py (85%) rename {simpledaemon => resources/defects/iolatency}/makefile (60%) diff --git a/simpledaemon/iolatency.py b/resources/defects/iolatency/__init__.py similarity index 100% rename from simpledaemon/iolatency.py rename to resources/defects/iolatency/__init__.py diff --git a/simpledaemon/main.py b/resources/defects/iolatency/iolatency.py similarity index 85% rename from simpledaemon/main.py rename to resources/defects/iolatency/iolatency.py index 32a1d36..53f604e 100644 --- a/simpledaemon/main.py +++ b/resources/defects/iolatency/iolatency.py @@ -155,16 +155,19 @@ def start(self, pid, path, args): def run(self): global process - print(self.cmd) - process = subprocess.Popen(self.cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL) - fcntl.fcntl(process.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) - self.send(self.args) - status = self.recv() - print(status) - signal.signal(signal.SIGTERM, signal_handler) - process.wait() + with open('/status', 'w+') as nsexec: + process = subprocess.Popen(self.cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=nsexec) + flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL) + fcntl.fcntl(process.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) + self.send(self.args) + status = { + "cmd": self.args, + "status": self.recv().decode(encoding="utf-8") + } + print(json.dumps(status)) + signal.signal(signal.SIGTERM, signal_handler) + process.wait() def send(self, data, tail='\n'): b = data + tail @@ -186,16 +189,15 @@ def recv(self, t=.1, stderr=0): def signal_handler(signal, frame): - process.kill() + process.terminate() sys.exit(0) if __name__ == "__main__": process = None daemon = Toda( - "/opt/nsexec/release/nsexec -l -p /proc/{}/ns/pid -m /proc/{}/ns/mnt --library-path /opt/nsexec/release/libnsenter.so -- /opt/toda/release/toda --path {} --verbose info", - "/stdout", "/stderr") - # daemon = Toda("/home/nejan/space-capsule/simpledaemon/{}", "args", "/stdout", "/stderr") + "/opt/nsexec/nsexec -l -p /proc/{}/ns/pid -m /proc/{}/ns/mnt --library-path /opt/nsexec/libnsenter.so -- /opt/toda/toda --path {} --verbose info", + "/stdout", "/dev/null") usageMessage = f"Usage: {sys.argv[0]} (start |stop|status)" choice = sys.argv[1] diff --git a/simpledaemon/makefile b/resources/defects/iolatency/makefile similarity index 60% rename from simpledaemon/makefile rename to resources/defects/iolatency/makefile index 48cd3b4..1c72f11 100644 --- a/simpledaemon/makefile +++ b/resources/defects/iolatency/makefile @@ -6,4 +6,4 @@ endef # CI build .PHONY: build build: - pyinstaller main.py --name simpledaemon \ No newline at end of file + pyinstaller iolatency.py --name iolatency \ No newline at end of file From b6c79f9f077f96674e642677dc785e8d5e0ea250 Mon Sep 17 00:00:00 2001 From: niejiangang Date: Tue, 22 Mar 2022 17:09:36 +0800 Subject: [PATCH 3/3] feat: support IOLatency case --- image/daemon/Dockerfile | 23 +++++++++++++ resources/defects/disk.py | 62 ++++++++++++++++++++++++++++++++++++ resources/scenes/resource.py | 2 ++ 3 files changed, 87 insertions(+) create mode 100644 image/daemon/Dockerfile diff --git a/image/daemon/Dockerfile b/image/daemon/Dockerfile new file mode 100644 index 0000000..6dc8fbd --- /dev/null +++ b/image/daemon/Dockerfile @@ -0,0 +1,23 @@ +FROM debian:buster-slim + +ENV CHAOSBLADE_HOME=/opt/chaosblades +COPY chaosblade-1.5.0 /opt/chaosblade +ENV PATH=/opt/chaosblade:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + +RUN curl -L https://github.com/chaos-mesh/toda/releases/download/v0.2.2/toda-linux-amd64.tar.gz | tar xz -C /usr/local/bin + +RUN case "$TARGET_PLATFORM" in \ + 'amd64') \ + export NSEXEC_ARCH='x86_64'; \ + ;; \ + 'arm64') \ + export NSEXEC_ARCH='aarch64'; \ + ;; \ + *) echo >&2 "error: unsupported architecture '$TARGET_PLATFORM'"; exit 1 ;; \ + esac; \ + curl -L https://github.com/chaos-mesh/nsexec/releases/download/v0.1.6/nsexec-$NSEXEC_ARCH-unknown-linux-gnu.tar.gz | tar xz -C /usr/local/bin; \ + mv /usr/local/bin/libnsenter.so /usr/local/lib/libnsenter.so; + +COPY iolatency /opt/iolatency + +CMD ["sh", "-c", "tail -f /dev/null"] \ No newline at end of file diff --git a/resources/defects/disk.py b/resources/defects/disk.py index cde8f39..0ea0a95 100644 --- a/resources/defects/disk.py +++ b/resources/defects/disk.py @@ -1,3 +1,5 @@ +import json + import click # # 为 test-ns 下 label为 app=testdemo 的所有pod设置访问网络延时 @@ -19,7 +21,12 @@ # # space-capsule undo {experiment-name} # space-capsule undo network-delay +import jsonpath + +from resources.scenes.service_slow import select_pod_from_ready, field_selector from spacecapsule.executor import bash_executor +from spacecapsule.history import store_experiment +from spacecapsule.k8s import executor_command_inside_namespaced_pod, prepare_api from spacecapsule.template import chaosblade_resource, chaosblade_resource_script @@ -74,5 +81,60 @@ def disk_fill(scope, experiment_name, path, size, reserve, percent, timeout, lab args) +def disk_latency(pod, container, volume, methods, time, kube_config): + api_instance = prepare_api(kube_config) + pod_list = api_instance.list_namespaced_pod("practice") + if pod is None: + pod_ref = select_pod_from_ready(pod_list.items, None, None) + else: + pod_ref = select_pod_from_ready(pod_list.items, field_selector, {"key": "$.metadata.name", "value": pod}) + host_ip = pod_ref.status.host_ip + containers_list = pod.ref.status.containerStatuses + if container is not None: + for container_ref in containers_list: + if container_ref.name == container: + container_id = container_ref.containerID + break + else: + container_id = containers_list[0].containerID + + if container_id is None: + print("Can not found container named " + container + " in target pod") + exit(0) + commands = "docker inspect " + container_id + " -f {{.State.Pid}}" + daemon_list = api_instance.list_namespaced_pod('chaosblade') + for daemon in daemon_list.items: + if daemon.status.host_ip == host_ip: + stdout, stderr = executor_command_inside_namespaced_pod(api_instance, 'chaosblade', daemon.metadata.name, + commands) + # TODO Execute command in daemonSet Pod + io_latency_commands = "/opt/iolatency/iolatency start " + executor_command_inside_namespaced_pod(api_instance, 'chaosblade', daemon.metadata.name, + io_latency_commands) + + check_status_command = "/opt/iolatency/iolatency status" + stdout, stderr = executor_command_inside_namespaced_pod(api_instance, 'chaosblade', daemon.metadata.name, + check_status_command) + status = jsonpath.jsonpath(json.loads(stdout), "$.status") + if status != "started": + # TODO failed reason + print("Inject Failed caused by ...!") + + # TODO Store experiment into history + rollback_arg = { + "pod": daemon.metadata.name, + "namespace": 'chaosblade', + "command": "/opt/iolatency/iolatency stop" + } + experiment_arg = { + + } + store_experiment(experiment_arg, rollback_arg, status, None) + print("IoLatency inject succeed!") + exit(0) + + print("Can not found target pod to inject!") + + def rollback_args(args): return {} diff --git a/resources/scenes/resource.py b/resources/scenes/resource.py index 8381e7c..7767042 100644 --- a/resources/scenes/resource.py +++ b/resources/scenes/resource.py @@ -28,6 +28,7 @@ def case9(namespace, deploy, requests, limits): .format(requests, limits)) print("resource limits injected done!") + @click.command() @click.option('--namespace', 'namespace', default='practice') @click.option('--cpu_requests', 'cpu_requests', default='100m') @@ -43,6 +44,7 @@ def case11(namespace, cpu_limits, mem_limits, cpu_requests, mem_requests): print("namespace_quota injected done!") + @click.command() @click.option('--timeout', 'timeout') @click.option('--names', 'names')