From 66790cbf0f08394cb4f89174f00e30345b88b63f Mon Sep 17 00:00:00 2001 From: Itxaka Date: Tue, 18 Apr 2017 13:19:36 +0200 Subject: [PATCH] updater: Stop pacemaker or set maintenance mode when updating cluster nodes (bsc#983617) Make the update barclamp aware of HA nodes and deal with them properly. When HA packages need to be updated, stop the HA services before the update and start them again after the package have been updated. When normal packages are updated, set the node in maintenance mode as mentioned on the HA guide. Also do not stop or set maintenance mode if the node is a remote_node. Updates on normal nodes should not be affected by this changes. --- chef/cookbooks/updater/metadata.rb | 1 + chef/cookbooks/updater/recipes/default.rb | 68 +++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/chef/cookbooks/updater/metadata.rb b/chef/cookbooks/updater/metadata.rb index 0f981ae0d7..73dd939f70 100644 --- a/chef/cookbooks/updater/metadata.rb +++ b/chef/cookbooks/updater/metadata.rb @@ -22,5 +22,6 @@ version "0.0.1" depends "utils" +depends "crowbar-pacemaker" recipe "updater", "System Package Updater" diff --git a/chef/cookbooks/updater/recipes/default.rb b/chef/cookbooks/updater/recipes/default.rb index f0960c6543..452f5e33ae 100644 --- a/chef/cookbooks/updater/recipes/default.rb +++ b/chef/cookbooks/updater/recipes/default.rb @@ -17,6 +17,9 @@ # limitations under the License. # +::Chef::Recipe.include CrowbarPacemaker::MaintenanceModeHelpers +::Chef::Resource.include CrowbarPacemaker::MaintenanceModeHelpers + if !node[:updater].key?(:one_shot_run) || !node[:updater][:one_shot_run] node[:updater][:one_shot_run] = true @@ -45,6 +48,65 @@ ignore_failure true end + ruby_block "check for updates" do + block do + case node[:updater][:zypper][:method] + when "patch" + command = "list-patches" + when "update" + command = "list-updates" + when "dist-upgrade" + command = "list-updates" + end + + node.run_state["needs_update"] = `zypper -q #{command}|wc -l`.chomp.to_i > 0 + + command += '|egrep -q "corosync|pacemaker"' + system("zypper #{command}") + # exit 0: found, 1 not found + node.run_state["found_ha_packages"] = $?.exitstatus ? true : false + end + end + + ["corosync", "pacemaker"].each do |s| + service s do + action :stop + only_if { node.run_state["found_ha_packages"] } + not_if { node[:pacemaker] && node[:pacemaker][:is_remote] } + end + end + + # set cluster to maintenance if + # HA packages are NOT gonna be updated + # And Node is part of a cluster + # And there is packages to update + execute "crm --wait node maintenance" do + action :nothing + notifies :create, "ruby_block[set maintenance mode via this chef run]", :immediately + end + + ruby_block "set maintenance mode via this chef run" do + action :nothing + block do + set_maintenance_mode_via_this_chef_run + end + end + + ruby_block "set cluster maintenance" do + block do + Chef::Log.info("Triggering maintenance mode for this node") + true + end + only_if do + is_cluster = node.role? "pacemaker-cluster-member" + !node.run_state["found_ha_packages"] && is_cluster && node.run_state["needs_update"] + end + not_if do + maintenance_mode_set_via_this_chef_run? && maintenance_mode? + end + notifies :run, "execute[crm --wait node maintenance]", :immediately + end + # Butt-ugly, enhance Chef::Provider::Package::Zypper later on... ruby_block "running \"#{zypper_command}\"" do block do @@ -94,8 +156,14 @@ end # case end # while end # block + only_if { node.run_state["needs_update"] } end # ruby_block + service "pacemaker" do + action :start + not_if { node[:pacemaker] && node[:pacemaker][:is_remote] } + end + end # platform_family suse block # handle case where there is a reboot needed from a previous run