Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions image/daemon/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM debian:buster-slim

ENV CHAOSBLADE_HOME=/opt/chaosblades
COPY chaosblade-1.5.0 /opt/chaosblade
ENV PATH=/opt/chaosblade:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin

RUN curl -L https://github.com/chaos-mesh/toda/releases/download/v0.2.2/toda-linux-amd64.tar.gz | tar xz -C /usr/local/bin

RUN case "$TARGET_PLATFORM" in \
'amd64') \
export NSEXEC_ARCH='x86_64'; \
;; \
'arm64') \
export NSEXEC_ARCH='aarch64'; \
;; \
*) echo >&2 "error: unsupported architecture '$TARGET_PLATFORM'"; exit 1 ;; \
esac; \
curl -L https://github.com/chaos-mesh/nsexec/releases/download/v0.1.6/nsexec-$NSEXEC_ARCH-unknown-linux-gnu.tar.gz | tar xz -C /usr/local/bin; \
mv /usr/local/bin/libnsenter.so /usr/local/lib/libnsenter.so;

COPY iolatency /opt/iolatency

CMD ["sh", "-c", "tail -f /dev/null"]
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ click~=8.0.3
jsonpath~=0.82
spacecapsule~=0.1.0
kubernetes~=22.6.0
paramiko~=2.9.2
paramiko~=2.9.2
daemon~=1.2
psutil~=5.9.0
64 changes: 63 additions & 1 deletion resources/defects/disk.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import click

# # 为 test-ns 下 label为 app=testdemo 的所有pod设置访问网络延时
Expand All @@ -19,7 +21,12 @@

# # space-capsule undo {experiment-name}
# space-capsule undo network-delay
import jsonpath

from resources.scenes.service_slow import select_pod_from_ready, field_selector
from spacecapsule.executor import bash_executor
from spacecapsule.history import store_experiment
from spacecapsule.k8s import executor_command_inside_namespaced_pod, prepare_api
from spacecapsule.template import chaosblade_resource, chaosblade_resource_script


Expand All @@ -33,7 +40,7 @@
@click.option('--timeout', 'timeout')
@click.option('--labels', 'labels')
@click.option('--names', 'names')
def diskfill(scope, experiment_name, path, size, reserve, percent, timeout, labels, names):
def disk_fill(scope, experiment_name, path, size, reserve, percent, timeout, labels, names):
args = locals()
args['action'] = 'fill'
args['target'] = 'disk'
Expand Down Expand Up @@ -74,5 +81,60 @@ def diskfill(scope, experiment_name, path, size, reserve, percent, timeout, labe
args)


def disk_latency(pod, container, volume, methods, time, kube_config):
api_instance = prepare_api(kube_config)
pod_list = api_instance.list_namespaced_pod("practice")
if pod is None:
pod_ref = select_pod_from_ready(pod_list.items, None, None)
else:
pod_ref = select_pod_from_ready(pod_list.items, field_selector, {"key": "$.metadata.name", "value": pod})
host_ip = pod_ref.status.host_ip
containers_list = pod.ref.status.containerStatuses
if container is not None:
for container_ref in containers_list:
if container_ref.name == container:
container_id = container_ref.containerID
break
else:
container_id = containers_list[0].containerID

if container_id is None:
print("Can not found container named " + container + " in target pod")
exit(0)
commands = "docker inspect " + container_id + " -f {{.State.Pid}}"
daemon_list = api_instance.list_namespaced_pod('chaosblade')
for daemon in daemon_list.items:
if daemon.status.host_ip == host_ip:
stdout, stderr = executor_command_inside_namespaced_pod(api_instance, 'chaosblade', daemon.metadata.name,
commands)
# TODO Execute command in daemonSet Pod
io_latency_commands = "/opt/iolatency/iolatency start "
executor_command_inside_namespaced_pod(api_instance, 'chaosblade', daemon.metadata.name,
io_latency_commands)

check_status_command = "/opt/iolatency/iolatency status"
stdout, stderr = executor_command_inside_namespaced_pod(api_instance, 'chaosblade', daemon.metadata.name,
check_status_command)
status = jsonpath.jsonpath(json.loads(stdout), "$.status")
if status != "started":
# TODO failed reason
print("Inject Failed caused by ...!")

# TODO Store experiment into history
rollback_arg = {
"pod": daemon.metadata.name,
"namespace": 'chaosblade',
"command": "/opt/iolatency/iolatency stop"
}
experiment_arg = {

}
store_experiment(experiment_arg, rollback_arg, status, None)
print("IoLatency inject succeed!")
exit(0)

print("Can not found target pod to inject!")


def rollback_args(args):
return {}
Empty file.
221 changes: 221 additions & 0 deletions resources/defects/iolatency/iolatency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# -*- coding: utf-8 -*-

import fcntl
import json
import os
import select
import signal
import subprocess
import sys
import time

import psutil

args = ""


class Daemon(object):
def __init__(self, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
self.ver = "0.1.23" # version
self.waitToHardKill = 3
self.processName = os.path.basename(sys.argv[0])
self.stdin = stdin
self.stdout = stdout
self.stderr = stderr

def _makeDaemon(self):
try:
pid = os.fork()
if pid > 0:
# Exit first parent.
sys.exit(0)
except OSError as e:
m = f"Fork #1 failed: {e}"
print(m)
sys.exit(1)

# Decouple from the parent environment.
os.chdir("/")
os.setsid()
os.umask(0)

# Do second fork.
try:
pid = os.fork()
if pid > 0:
# Exit from second parent.
sys.exit(0)
except OSError as e:
m = f"Fork #2 failed: {e}"
print(m)
sys.exit(1)

m = "The daemon process is going to background."
print(m)

# Redirect standard file descriptors.
sys.stdout.flush()
sys.stderr.flush()
si = open(self.stdin, 'r')
so = open(self.stdout, 'w+')
se = open(self.stderr, 'w+')
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())

def _getProces(self):
procs = []

for p in psutil.process_iter():
if self.processName in [part.split('/')[-1] for part in p.cmdline()]:
# Skip the current process
if p.pid != os.getpid():
procs.append(p)

return procs

def start(self):
# Handle signals
signal.signal(signal.SIGINT, self.stop)
signal.signal(signal.SIGTERM, self.stop)
signal.signal(signal.SIGHUP, self.stop)

# Check if the daemon is already running.
procs = self._getProces()

if procs:
pids = ",".join([str(p.pid) for p in procs])
status = {
"status": "started",
"pid": pids
}
print(json.dumps(status))
sys.exit(1)

# Daemonize the main process
self._makeDaemon()
# Start a infinitive loop that periodically runs run() method
self.run()

def status(self):
procs = self._getProces()

if procs:
pids = ",".join([str(p.pid) for p in procs])
status = {
'status': "started",
'pid': pids
}
else:
status = {
'status': "stopped",
}
print(json.dumps(status))

def stop(self):
procs = self._getProces()

def on_terminate(process):
status = {
'status': "stopped",
'pid': process.pid
}
print(json.dumps(status))

if procs:
for p in procs:
p.terminate()

gone, alive = psutil.wait_procs(procs, timeout=self.waitToHardKill, callback=on_terminate)

for p in alive:
p.kill()
else:
status = {
'status': "not_find",
}
print(json.dumps(status))

# this method you have to override
def run(self):
pass


class Toda(Daemon):
def __init__(self, cmd, stdout='/dev/null', stderr='/dev/null'):
Daemon.__init__(self, stdout=stdout, stderr=stderr)
self.cmd = cmd
self.args = ""

def start(self, pid, path, args):
self.cmd = self.cmd.format(pid, pid, path)
self.args = args
Daemon.start(self)
# self.cmd += "pid"

def run(self):
global process
with open('/status', 'w+') as nsexec:
process = subprocess.Popen(self.cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=nsexec)
flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
fcntl.fcntl(process.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK)
self.send(self.args)
status = {
"cmd": self.args,
"status": self.recv().decode(encoding="utf-8")
}
print(json.dumps(status))
signal.signal(signal.SIGTERM, signal_handler)
process.wait()

def send(self, data, tail='\n'):
b = data + tail
process.stdin.write(bytes(b, encoding='utf-8'))
process.stdin.flush()

def recv(self, t=.1, stderr=0):
r = ''
pr = process.stdout
if stderr:
pr = process.stdout
while True:
if not select.select([pr], [], [], 0)[0]:
time.sleep(t)
continue
r = pr.read()
return r.rstrip()
return r.rstrip()


def signal_handler(signal, frame):
process.terminate()
sys.exit(0)


if __name__ == "__main__":
process = None
daemon = Toda(
"/opt/nsexec/nsexec -l -p /proc/{}/ns/pid -m /proc/{}/ns/mnt --library-path /opt/nsexec/libnsenter.so -- /opt/toda/toda --path {} --verbose info",
"/stdout", "/dev/null")
usageMessage = f"Usage: {sys.argv[0]} (start |stop|status)"

choice = sys.argv[1]

if choice == "start":
if len(sys.argv) == 5:
choice = sys.argv[1]
pid = sys.argv[2]
path = sys.argv[3]
args = sys.argv[4]
daemon.start(pid, path, args)
else:
print(f"Usage: {sys.argv[0]} start $pid $path $args")
elif choice == "stop":
daemon.stop()
elif choice == "status":
daemon.status()
else:
print("Unknown command.")
print(usageMessage)
sys.exit(1)
9 changes: 9 additions & 0 deletions resources/defects/iolatency/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
define exec-command
$(1)

endef

# CI build
.PHONY: build
build:
pyinstaller iolatency.py --name iolatency
2 changes: 2 additions & 0 deletions resources/scenes/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def case9(namespace, deploy, requests, limits):
.format(requests, limits))
print("resource limits injected done!")


@click.command()
@click.option('--namespace', 'namespace', default='practice')
@click.option('--cpu_requests', 'cpu_requests', default='100m')
Expand All @@ -43,6 +44,7 @@ def case11(namespace, cpu_limits, mem_limits, cpu_requests, mem_requests):

print("namespace_quota injected done!")


@click.command()
@click.option('--timeout', 'timeout')
@click.option('--names', 'names')
Expand Down
7 changes: 3 additions & 4 deletions resources/scenes/service_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,17 @@ def node_network_loss(node_name, interface, percent, remote_port, local_port, ti
@click.command()
@click.option('--namespace', 'namespace', default='practice')
@click.option('--network-plugin', 'network_plugin', default='calico')
@click.option('--time', 'time', default=3000)
@click.option('--percent', 'percent', default=80)
@click.option('--timeout', 'timeout', default=10000)
@click.option('--kube-config', 'kube_config', default="~/.kube/config")
@click.option("--check_history", is_flag=True, default=True,
is_eager=True, callback=check_status,
help="Check experiment history", expose_value=False)
def case4(namespace, network_plugin, time, percent, timeout, kube_config):
pod_network_loss(namespace, network_plugin, time, percent, timeout, kube_config)
def case4(namespace, network_plugin, percent, timeout, kube_config):
pod_network_loss(namespace, network_plugin, percent, timeout, kube_config)


def pod_network_loss(namespace, network_plugin, time, percent, timeout, kube_config):
def pod_network_loss(namespace, network_plugin, percent, timeout, kube_config):
# Choose a pod from target namespace
api_instance = prepare_api(kube_config)
pod_list = api_instance.list_namespaced_pod(namespace)
Expand Down