Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 28 additions & 32 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import readline
import shutil
import typing
import shlex

import yaml
import socket
Expand Down Expand Up @@ -1967,20 +1968,14 @@ def join_cluster(seed_host, remote_user):
"""
Cluster configuration for joining node.
"""
retrieve_all_config_files(seed_host)
retrieve_data(seed_host)

is_qdevice_configured = corosync.is_qdevice_configured()
if is_qdevice_configured and not ServiceManager().service_is_available("corosync-qdevice.service"):
utils.fatal("corosync-qdevice.service is not available")

shell = sh.cluster_shell()

if is_qdevice_configured:
if not _context.use_ssh_agent or not _keys_from_ssh_agent():
# trigger init_qnetd_remote on init node
cmd = f"crm cluster init qnetd_remote {utils.this_node()} -y"
shell.get_stdout_or_raise_error(cmd, seed_host)

shutil.copy(corosync.conf(), _context.get_corosync_conf_orig())

# check if use IPv6
Expand Down Expand Up @@ -2046,10 +2041,17 @@ def join_cluster(seed_host, remote_user):
with logger_utils.status_long("Reloading cluster configuration"):
shell.get_stdout_or_raise_error("corosync-cfgtool -R")

service_manager = ServiceManager()
if is_qdevice_configured:
start_qdevice_on_join_node(seed_host)
if not _context.use_ssh_agent or not _keys_from_ssh_agent():
# trigger init_qnetd_remote on init node
cmd = f"crm cluster init qnetd_remote {utils.this_node()} -y"
shell.get_stdout_or_raise_error(cmd, seed_host)
retrieve_data(seed_host, [qdevice.QDevice.qdevice_path], "qdevice")
logger.info("Starting and enable corosync-qdevice.service")
service_manager.start_service("corosync-qdevice.service", enable=True)
else:
ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).disable_service("corosync-qdevice.service")
service_manager.disable_service("corosync-qdevice.service")


def adjust_priority_in_rsc_defaults(is_2node_wo_qdevice):
Expand Down Expand Up @@ -2085,18 +2087,6 @@ def adjust_priority_fencing_delay(is_2node_wo_qdevice):
utils.set_property("priority-fencing-delay", 0)


def start_qdevice_on_join_node(seed_host):
"""
Doing qdevice certificate process and start qdevice service on join node
"""
with logger_utils.status_long("Starting corosync-qdevice.service"):
if corosync.is_qdevice_tls_on():
qnetd_addr = corosync.get_value("quorum.device.net.host")
qdevice_inst = qdevice.QDevice(qnetd_addr, cluster_node=seed_host)
qdevice_inst.certificate_process_on_join()
ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).start_service("corosync-qdevice.service", enable=True)


def get_cluster_node_ip(node: str) -> str:
"""
ringx_addr might be hostname or IP
Expand Down Expand Up @@ -2801,14 +2791,18 @@ def adjust_properties(with_sbd: bool = False):
adjust_priority_fencing_delay(is_2node_wo_qdevice)


def retrieve_all_config_files(cluster_node):
"""
Retrieve config files from cluster_node if exists
"""
with logger_utils.status_long("Retrieve all config files"):
cmd = 'cpio -o << EOF\n{}\nEOF\n'.format(
'\n'.join((f for f in get_files_to_sync() if f != CSYNC2_KEY and f != CSYNC2_CFG))
)
def retrieve_data(from_node, data_list=None, data_type=None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. It would be more clear to name it retrieve_files instead of retrieve_data, as the word "data" is too general.
  2. And also file_list.
  3. data_type is only used for outputting messages and has no actual functions. Passing it makes the code harder to understand. It would be better to pass msg directly, or use a more clear parameter name to reflect its usage.

if not data_list:
data_list = [f for f in get_files_to_sync() if f != CSYNC2_KEY and f != CSYNC2_CFG]
find_args = ' '.join(shlex.quote(f) for f in data_list)
cmd = f'find {find_args} -print | cpio -o'

if data_type:
msg = f"Retrieving {data_type} configuration files from {from_node}"
else:
msg = f"Retrieving all configuration files from {from_node}"

with logger_utils.status_long(msg):
pipe_outlet, pipe_inlet = os.pipe()
try:
child = subprocess.Popen(['cpio', '-iud'], stdin=pipe_outlet, stderr=subprocess.DEVNULL)
Expand All @@ -2818,15 +2812,17 @@ def retrieve_all_config_files(cluster_node):
finally:
os.close(pipe_outlet)
try:
result = sh.cluster_shell().subprocess_run_without_input(cluster_node, None, cmd, stdout=pipe_inlet, stderr=subprocess.DEVNULL)
result = sh.cluster_shell().subprocess_run_without_input(
from_node, None, cmd, stdout=pipe_inlet, stderr=subprocess.DEVNULL
)
finally:
os.close(pipe_inlet)
rc = child.wait()
# Some errors may happen here, since all files in get_files_to_sync() may not exist.
if result is None or result.returncode == 255:
utils.fatal("Failed to create ssh connect to {}".format(cluster_node))
utils.fatal(f"Failed to create ssh connect to {from_node}")
if rc != 0:
utils.fatal("Failed to retrieve config files from {}".format(cluster_node))
utils.fatal(f"Failed to retrieve config files from {from_node}")


def sync_path(path, peer_node=None):
Expand Down
77 changes: 1 addition & 76 deletions crmsh/qdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ class QDevice(object):
"""Class to manage qdevice configuration and services

Call `certificate_process_on_init` to generate all of CA, server, and client certs.
Call `certificate_process_on_join` to generate a single client cert for the local node.
"""

qnetd_service = "corosync-qnetd.service"
Expand All @@ -113,7 +112,7 @@ class QDevice(object):
qdevice_db_path = "/etc/corosync/qdevice/net/nssdb"

def __init__(self, qnetd_addr, port=5403, algo="ffsplit", tie_breaker="lowest",
tls="on", ssh_user=None, cluster_node=None, cmds=None, mode=None, cluster_name=None, is_stage=False):
tls="on", ssh_user=None, cmds=None, mode=None, cluster_name=None, is_stage=False):
"""
Init function
"""
Expand All @@ -123,7 +122,6 @@ def __init__(self, qnetd_addr, port=5403, algo="ffsplit", tie_breaker="lowest",
self.tie_breaker = tie_breaker
self.tls = tls
self.ssh_user = ssh_user
self.cluster_node = cluster_node
self.cmds = cmds
self.mode = mode
self.cluster_name = cluster_name
Expand All @@ -145,13 +143,6 @@ def qnetd_cacert_on_local(self):
"""
return "{}/{}/{}".format(self.qdevice_path, self.qnetd_addr, self.qnetd_cacert_filename)

@property
def qnetd_cacert_on_cluster(self):
"""
Return path of qnetd-cacert.crt on cluster node
"""
return "{}/{}/{}".format(self.qdevice_path, self.cluster_node, self.qnetd_cacert_filename)

@property
def qdevice_crq_on_qnetd(self):
"""
Expand Down Expand Up @@ -187,13 +178,6 @@ def qdevice_p12_on_local(self):
"""
return "{}/nssdb/{}".format(self.qdevice_path, self.qdevice_p12_filename)

@property
def qdevice_p12_on_cluster(self):
"""
Return path of qdevice-net-node.p12 on cluster node
"""
return "{}/{}/{}".format(self.qdevice_path, self.cluster_node, self.qdevice_p12_filename)

@staticmethod
def check_qnetd_addr(qnetd_addr):
qnetd_ip = None
Expand Down Expand Up @@ -455,65 +439,6 @@ def certificate_process_on_init(self):
]):
step(lambda s, cmd=None: self.log_only_to_file(f'Step {i+1}: {s}', cmd))

def fetch_qnetd_crt_from_cluster(self):
"""
Certificate process for join
Step 1
Fetch QNetd CA certificate(qnetd-cacert.crt) from init node
"""
if os.path.exists(self.qnetd_cacert_on_cluster):
return

desc = "Step 1: Fetch {} from {}".format(self.qnetd_cacert_filename, self.cluster_node)
QDevice.log_only_to_file(desc)
crmsh.parallax.parallax_slurp([self.cluster_node], self.qdevice_path, self.qnetd_cacert_on_local)

def init_db_on_local(self):
"""
Certificate process for join
Step 2
Initialize database by running
/usr/sbin/corosync-qdevice-net-certutil -i -c qnetd-cacert.crt
"""
if os.path.exists(self.qdevice_db_path):
utils.rmdir_r(self.qdevice_db_path)

cmd = "corosync-qdevice-net-certutil -i -c {}".format(self.qnetd_cacert_on_cluster)
QDevice.log_only_to_file("Step 2: Initialize database on local", cmd)
sh.cluster_shell().get_stdout_or_raise_error(cmd)

def fetch_p12_from_cluster(self):
"""
Certificate process for join
Step 3
Fetch p12 key file from init node
"""
if os.path.exists(self.qdevice_p12_on_cluster):
return

desc = "Step 3: Fetch {} from {}".format(self.qdevice_p12_filename, self.cluster_node)
QDevice.log_only_to_file(desc)
crmsh.parallax.parallax_slurp([self.cluster_node], self.qdevice_path, self.qdevice_p12_on_local)

def import_p12_on_local(self):
"""
Certificate process for join
Step 4
Import cluster certificate and key
"""
cmd = "corosync-qdevice-net-certutil -m -c {}".format(self.qdevice_p12_on_cluster)
QDevice.log_only_to_file("Step 4: Import cluster certificate and key", cmd)
sh.cluster_shell().get_stdout_or_raise_error(cmd)

def certificate_process_on_join(self):
"""
The qdevice certificate process on join node
"""
self.fetch_qnetd_crt_from_cluster()
self.init_db_on_local()
self.fetch_p12_from_cluster()
self.import_p12_on_local()

def write_qdevice_config(self) -> None:
"""
Write qdevice attributes to config file
Expand Down
11 changes: 11 additions & 0 deletions test/features/qdevice_setup_remove.feature
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,17 @@ Feature: corosync qdevice/qnetd setup/remove process
And Service "corosync-qdevice" is "started" on "hanode1"
And Show status from qnetd

@clean
Scenario: Re-join cluster (bsc#1254243)
When Run "crm cluster init --qnetd-hostname=qnetd-node -y" on "hanode1"
When Run "crm cluster join -c hanode1 -y" on "hanode2"
Then Service "corosync-qdevice" is "started" on "hanode1"
And Service "corosync-qdevice" is "started" on "hanode2"
When Run "crm cluster remove hanode1 -y" on "hanode2"
Then Service "corosync-qdevice" is "stopped" on "hanode1"
When Run "crm cluster join -c hanode2 -y" on "hanode1"
Then Service "corosync-qdevice" is "started" on "hanode1"

@skip_non_root
@clean
Scenario: Passwordless for root, not for sudoer (bsc#1209193)
Expand Down
21 changes: 0 additions & 21 deletions test/unittests/test_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,27 +1461,6 @@ def test_remove_qdevice_reload(self, mock_qdevice_configured, mock_confirm, mock
mock_remove_db.assert_called_once_with()
mock_cluster_shell_inst.get_stdout_or_raise_error.assert_called_once_with("corosync-cfgtool -R")

@mock.patch('crmsh.service_manager.ServiceManager.start_service')
@mock.patch('crmsh.qdevice.QDevice')
@mock.patch('crmsh.corosync.get_value')
@mock.patch('crmsh.corosync.is_qdevice_tls_on')
@mock.patch('crmsh.log.LoggerUtils.status_long')
def test_start_qdevice_on_join_node(self, mock_status_long, mock_qdevice_tls, mock_get_value, mock_qdevice, mock_start_service):
mock_qdevice_tls.return_value = True
mock_get_value.return_value = "10.10.10.123"
mock_qdevice_inst = mock.Mock()
mock_qdevice.return_value = mock_qdevice_inst
mock_qdevice_inst.certificate_process_on_join = mock.Mock()

bootstrap.start_qdevice_on_join_node("node2")

mock_status_long.assert_called_once_with("Starting corosync-qdevice.service")
mock_qdevice_tls.assert_called_once_with()
mock_get_value.assert_called_once_with("quorum.device.net.host")
mock_qdevice.assert_called_once_with("10.10.10.123", cluster_node="node2")
mock_qdevice_inst.certificate_process_on_join.assert_called_once_with()
mock_start_service.assert_called_once_with("corosync-qdevice.service", enable=True)

@mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr')
@mock.patch('crmsh.log.LoggerUtils.log_only_to_file')
def test_invoke(self, mock_log, mock_run):
Expand Down
Loading