From ca30353908109a793ff47d87e5adfc98401c4275 Mon Sep 17 00:00:00 2001 From: "narrieta@microsoft" Date: Wed, 31 Dec 2025 12:12:53 -0800 Subject: [PATCH 1/5] Improve telemetry for firewall setup --- azurelinuxagent/agent.py | 1 + azurelinuxagent/common/event.py | 1 + azurelinuxagent/ga/env.py | 68 ++++++++++++--- azurelinuxagent/ga/firewall_manager.py | 66 ++++++++++----- azurelinuxagent/ga/update.py | 1 + tests/ga/test_env.py | 112 ++++++++++++++++++++++++- tests/ga/test_firewall_manager.py | 106 ++++++++++++++++++++++- tests/lib/mock_firewall_command.py | 36 ++++++-- 8 files changed, 352 insertions(+), 39 deletions(-) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index c5e9ecc5c..e4b7d9d8e 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -317,6 +317,7 @@ def setup_firewall(endpoint): try: firewall_manager = FirewallManager.create(endpoint) + firewall_manager.verbose = True if isinstance(firewall_manager, IpTables): try: # diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index e78352b63..64c3a42ef 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -85,6 +85,7 @@ class WALAEventOperation: Disable = "Disable" Downgrade = "Downgrade" Download = "Download" + DuplicateFirewallRules = "DuplicateFirewallRules" Enable = "Enable" ExtensionHandlerManifest = "ExtensionHandlerManifest" ExtensionPolicy = "ExtensionPolicy" # Event for any extension policy-related operations (e.g., extension not in allowlist). diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 752ff5a4b..b7f914ef5 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -32,6 +32,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.common.osutil import get_osutil +from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.version import AGENT_NAME from azurelinuxagent.ga.periodic_operation import PeriodicOperation @@ -100,46 +101,89 @@ def _get_dhcp_client_pid(self): return pid +class FirewallState(object): + OK = "OK" # The firewall rules for the WireServer are setup correctly + NotSet = "NotSet" # The firewall rules have not been set + Invalid = "Invalid" # The state of the firewall rules is not as expected, e.g. because some rules are missing + Inconsistent = "Inconsistent" # The stare of the firewall is reported differently by different tools, e.g. "iptables -C" vs "iptables -L" + Unknown = "Unknown" # The state of the firewall has not been checked yet + + class EnableFirewall(PeriodicOperation): + _REPORTING_PERIOD = datetime.timedelta(hours=24) # we set a limit on the number of messages logged within this period + def __init__(self, wire_server_address): super(EnableFirewall, self).__init__(conf.get_enable_firewall_period()) self._wire_server_address = wire_server_address self._firewall_manager = None # initialized on demand in the _operation method - self._message_count = 0 - self._report_after = datetime.datetime.now(UTC) + self._firewall_state = FirewallState.OK + self._report_count = 0 + self._next_report_time = datetime.datetime.now(UTC) + self._should_report = True def _operation(self): try: + # + # Each check of the firewall can produce a report, but we limit the number of reports we emit within a fix period of time. + # + self._update_reporting_state() + if self._firewall_manager is None: self._firewall_manager = FirewallManager.create(self._wire_server_address) + # + # Setting up the Firewall Manager to verbose will make it log each command it executes, along with its output. We do this only when the state of the firewall + # is not OK and the report limit has not been reached. + # + self._firewall_manager.verbose = self._firewall_state != FirewallState.OK and self._should_report + try: if self._firewall_manager.check(): - return # The firewall is configured correctly + self._update_firewall_state(FirewallState.OK) + self._report(event.info, WALAEventOperation.Firewall, "The firewall is configured correctly. Current state:\n{0}", self._firewall_manager.get_state()) + return + self._update_firewall_state(FirewallState.NotSet) self._report(event.warn, WALAEventOperation.Firewall, "The firewall has not been setup. Will set it up.") except IptablesInconsistencyError as e: + self._update_firewall_state(FirewallState.Inconsistent) self._report(event.warn, WALAEventOperation.FirewallInconsistency, "The results returned by iptables are inconsistent, will not change the current state of the firewall: {0}", ustr(e)) return except FirewallStateError as e: + self._update_firewall_state(FirewallState.Invalid) self._report(event.warn, WALAEventOperation.ResetFirewall, "The firewall is not configured correctly. {0}. Will reset it. Current state:\n{1}", ustr(e), self._firewall_manager.get_state()) self._firewall_manager.remove() + self._firewall_manager.setup() + self._update_firewall_state(FirewallState.OK) self._report(event.info, WALAEventOperation.Firewall, "The firewall was setup successfully:\n{0}", self._firewall_manager.get_state()) except Exception as e: - self._report(event.warn, WALAEventOperation.Firewall, "An error occurred while setting up the firewall: {0}", ustr(e)) + self._update_firewall_state(FirewallState.Unknown) + self._report(event.warn, WALAEventOperation.Firewall, "An error occurred while verifying the state of the firewall: {0}. Current state:\n{1}", textutil.format_exception(e), self._firewall_manager.get_state()) def _report(self, report_function, operation, message, *args): - # Report the first 3 messages, then stop reporting for 12 hours - if datetime.datetime.now(UTC) < self._report_after: - return + if self._should_report: + report_function(operation, message, *args) - self._message_count += 1 - if self._message_count > 3: - self._report_after = datetime.datetime.now(UTC) + datetime.timedelta(hours=12) - self._message_count = 0 + def _update_reporting_state(self): + if self._next_report_time > datetime.datetime.now(UTC): + self._should_report = False return - report_function(operation, message, *args) + self._report_count += 1 + max_reports = 1 if self._firewall_state == FirewallState.OK else 3 + if self._report_count <= max_reports: + self._should_report = True + else: + self._report_count = 0 + self._next_report_time = datetime.datetime.now(UTC) + self._REPORTING_PERIOD + self._should_report = False + + def _update_firewall_state(self, firewall_state): + if (self._firewall_state == FirewallState.OK) != (firewall_state == FirewallState.OK): # reset the reporting limits if the firewall state goes from OK to not-OK or vice versa + self._report_count = 0 + self._next_report_time = datetime.datetime.now(UTC) + self._should_report = True + self._firewall_state = firewall_state class SetRootDeviceScsiTimeout(PeriodicOperation): diff --git a/azurelinuxagent/ga/firewall_manager.py b/azurelinuxagent/ga/firewall_manager.py index 1bdbd35dc..a75cf4a8e 100644 --- a/azurelinuxagent/ga/firewall_manager.py +++ b/azurelinuxagent/ga/firewall_manager.py @@ -76,6 +76,7 @@ class FirewallManager(object): """ def __init__(self, wire_server_address): self._wire_server_address = wire_server_address + self._verbose = False # Friendly names for the firewall rules ACCEPT_DNS = "ACCEPT DNS" @@ -112,6 +113,14 @@ def version(self): """ raise NotImplementedError() + @property + def verbose(self): + return self._verbose + + @verbose.setter + def verbose(self, value): + self._verbose = value + def setup(self): """ Sets up the firewall rules for the WireServer. @@ -156,6 +165,20 @@ def _get_state_command(self): """ raise NotImplementedError() + def _run_command_verbose(self, command, *args, **kwargs): + """ + Executes the given command, logging the command and its output if verbose mode is enabled. + """ + try: + stdout = shellutil.run_command(command, *args, **kwargs) + if self.verbose: + event.info(WALAEventOperation.Firewall, "{0} [exit code: 0]\n{1}", " ".join(command), stdout) + return stdout + except CommandError as e: + if self.verbose: + event.info(WALAEventOperation.Firewall, "{0} [exit code: {1}]\n{2}\n{3}", " ".join(command), e.returncode, e.stdout, e.stderr) + raise + class _FirewallManagerIndividualRules(FirewallManager): """ @@ -201,7 +224,7 @@ def _delete(self): def setup(self): for _, get_command in self._firewall_commands: - shellutil.run_command(get_command(self._append)) + self._run_command_verbose(get_command(self._append)) def remove(self): for _, get_command in self._firewall_commands: @@ -211,9 +234,9 @@ def remove(self): def remove_legacy_rule(self): check_command = self._get_legacy_rule_command(self._check) if not self._rule_exists(check_command): - logger.info("Did not find a legacy firewall rule: {0}", check_command) + event.info(WALAEventOperation.Firewall, "Did not find a legacy firewall rule: {0}", check_command) return - logger.info("Found legacy firewall rule: {0}", check_command) + event.info(WALAEventOperation.Firewall, "Found legacy firewall rule: {0}", check_command) delete_command = self._get_legacy_rule_command(self._delete) self._delete_rule(delete_command) @@ -237,10 +260,9 @@ def check(self): return False - @staticmethod - def _rule_exists(check_command): + def _rule_exists(self, check_command): try: - shellutil.run_command(check_command) + self._run_command_verbose(check_command) except CommandError as e: if e.returncode != 1: # if 1, the command failed because the rule does not exist raise @@ -348,11 +370,11 @@ def _delete(self): return '-D' def check(self): - # A few users have reported an issue where the Agent creates duplicate DROP rule, with one of them at the top of the OUTPUT chain, that block communication + # A few users have reported an issue where waagent creates duplicate DROP rules, with one of them at the top of the OUTPUT chain. This blocks communication # with the WireServer (see, for example, incident 21000000779819). These VMs are running RedHat/CentOS 7/8. # - # Debugging showed that the DROP rule created by waagent-network-setup during boot cannot detected by waagent using "iptables -C" (and "iptables -D" won't delete - # the rule either) causing waagent to create duplicate rules. This issue may be related to https://access.redhat.com/solutions/6514071, which has identical + # Debugging showed that the DROP rule created by waagent-network-setup during boot cannot be detected by waagent using "iptables -C" (and "iptables -D" won't delete + # the rule either) and waagent ends up creating duplicate rules. This issue may be related to https://access.redhat.com/solutions/6514071, which has identical # symptoms. Our debugging showed that the first rule created when the conntrack module has not been loaded yet is not visible to "-C" or "-D". # # We work around this issue by checking against the output of "iptables -L" when the check() method reports that some rules do not exist. If any of those rules @@ -371,23 +393,29 @@ def load_conntrack(self): """ Forces the conntrack module to be loaded by executing "iptables -C -m conntrack..." - Returns a string containing the command that was execute and its output. + Returns a string containing the command that was executed and its output. """ try: command = self._get_drop_rule_command(self._check) # The DROP rule uses conntrack - return "{0}: {1}", command, shellutil.run_command(command) + return "{0}: {1}".format(command, shellutil.run_command(command)) except CommandError as e: return ustr(e) def _delete_rule(self, command): """ - Continually execute the delete operation until the return code is non-zero or the limit has been reached. + Attempts to delete all the instances of the rule specified for the given command. """ - for _ in range(1, 100): + for i in range(1, 100): + # When we delete 1 rule, we expect 2 iterations: the first iteration deletes the rule and the second fails to find the rule. More than 2 iterations implies duplicate rules. try: - shellutil.run_command(command) + if i <= 2: + self._run_command_verbose(command) + else: + shellutil.run_command(command) except CommandError as e: if e.returncode == 1: + if i > 2: + event.info(WALAEventOperation.DuplicateFirewallRules, "Deleted multiple firewall rules. Count: {0}. Command: {1}", i - 1, " ".join(command)) return if e.returncode == 2: raise Exception("Invalid firewall deletion command '{0}'".format(command)) @@ -442,7 +470,7 @@ def _delete(self): return '--remove-passthrough' def _delete_rule(self, command): - shellutil.run_command(command) + self._run_command_verbose(command) def _get_state_command(self): return ["firewall-cmd", "--permanent", "--direct", "--get-all-passthroughs"] @@ -481,14 +509,14 @@ def version(self): return self._version def setup(self): - shellutil.run_command(["nft", "-f", "-"], input=""" + self._run_command_verbose(["nft", "-f", "-"], input=""" add table ip walinuxagent add chain ip walinuxagent output {{ type filter hook output priority 0 ; policy accept ; }} add rule ip walinuxagent output ip daddr {0} tcp dport != 53 skuid != {1} ct state invalid,new counter drop """.format(self._wire_server_address, os.getuid())) def remove(self): - shellutil.run_command(["nft", "delete", "table", "walinuxagent"]) + self._run_command_verbose(["nft", "delete", "table", "walinuxagent"]) def check(self): # @@ -503,7 +531,7 @@ def check(self): # ] # } # - output_text = shellutil.run_command(["nft", "--json", "list", "tables"]) + output_text = self._run_command_verbose(["nft", "--json", "list", "tables"]) try: output = json.loads(output_text) tables = [i["table"] for i in output["nftables"] if i.get("table") is not None] @@ -557,7 +585,7 @@ def check(self): # ] # } # - output_text = shellutil.run_command(["nft", "--json", "list", "table", "walinuxagent"]) + output_text = self._run_command_verbose(["nft", "--json", "list", "table", "walinuxagent"]) errors = [] try: diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 6c5f7b1fa..042b9ded8 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -1175,6 +1175,7 @@ def _initialize_firewall(wire_server_address): return firewall_manager = FirewallManager.create(wire_server_address) + firewall_manager.verbose = True try: firewall_manager.remove_legacy_rule() diff --git a/tests/ga/test_env.py b/tests/ga/test_env.py index a3d56ebf6..7fc86764b 100644 --- a/tests/ga/test_env.py +++ b/tests/ga/test_env.py @@ -15,12 +15,14 @@ # Requires Python 2.6+ and Openssl 1.0+ # import datetime +import re +import time from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.osutil.default import DefaultOSUtil, shellutil from azurelinuxagent.ga.env import MonitorDhcpClientRestart, EnableFirewall -from tests.lib.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch, DEFAULT from tests.lib.mock_firewall_command import MockIpTables @@ -215,3 +217,111 @@ def test_it_should_not_modify_the_firewall_rules_when_the_check_command_is_incon ], mock_iptables.call_list[:3], "Expected the 3 firewall rules to have been checked (Test case: {0})".format(test_case)) + + def test_it_should_log_the_state_of_the_firewall_once_per_reporting_period(self): + with MockIpTables() as mock_iptables: + enable_firewall = EnableFirewall('168.63.129.16') + enable_firewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) + + with patch.multiple("azurelinuxagent.ga.firewall_manager.event", info=DEFAULT, warn=DEFAULT, error=DEFAULT) as patches: + info = patches["info"] + warn = patches["warn"] + error = patches["error"] + + for _ in range(0, 3): + enable_firewall._operation() # we call the _operation() method directly because the run() method enforces its own time period + event_count_first_reporting_period = info.call_count + + time.sleep(0.5) # let 1 reporting period elapse + + for _ in range(0, 3): + enable_firewall._operation() + + # Each call to the _operation() method should have checked each rule, plus listed all the rules + expected_commands = 6 * [ + mock_iptables.get_accept_dns_command("-C"), + mock_iptables.get_accept_command("-C"), + mock_iptables.get_drop_command("-C"), + mock_iptables.get_list_command() + ] + self.assertEqual(expected_commands, mock_iptables.call_list, "Expected commands {0}, got: {1}".format(expected_commands, mock_iptables.call_list)) + + # The first call to _operation() reports the version of iptables, then there should be only one firewall state report for each of the 2 reporting periods in the test + self.assertEqual(2, event_count_first_reporting_period, "Expected 2 events to be logged during the first reporting period, got: {0}".format(info.call_args_list[:event_count_first_reporting_period])) + self.assertEqual(3, len(info.call_args_list), "Expected a total of 3 events to be logged for the two reporting periods, got: {0}".format(info.call_args_list)) + event_args = [args for args, _ in info.call_args_list] + self.assertTrue(event_args[0][0] == "Firewall" and re.match(r'Using iptables \[version .+] to manage firewall rules', event_args[0][1]), "Expected a check for the iptables version in the first reporting period. Got: {0}".format(event_args[0])) + self.assertTrue(event_args[1][0] == "Firewall" and event_args[1][1].startswith('The firewall is configured correctly.'), "Expected a firewall status report in the first reporting period. Got: {0}".format(event_args[1])) + self.assertTrue(event_args[2][0] == "Firewall" and event_args[1][1].startswith('The firewall is configured correctly.'), "Expected a firewall status report in the second reporting period. Got: {0}".format(event_args[1])) + + self.assertEqual(0, warn.call_count, "No warnings should have been reported. Got: {0}". format(warn.call_args_list)) + self.assertEqual(0, error.call_count, "No errors should have been reported. Got: {0}". format(error.call_args_list)) + + def test_it_should_log_errors_thrice_per_reporting_period(self): + # We force an inconsistency between "iptables -C" and "iptables -C" to create an error condition (the DROP rule will fail for -C, but will show up in -L) + with MockIpTables(check_matches_list=False) as mock_iptables: + mock_iptables.set_return_values("-C", accept_dns=0, accept=0, drop=1, legacy=0) + + enable_firewall = EnableFirewall('168.63.129.16') + enable_firewall._REPORTING_PERIOD = datetime.timedelta(milliseconds=500) + + firewall_manager_in_verbose_mode = [] + + with patch.multiple("azurelinuxagent.ga.firewall_manager.event", info=DEFAULT, warn=DEFAULT, error=DEFAULT) as patches: + info = patches["info"] + warn = patches["warn"] + error = patches["error"] + + for _ in range(0, 5): + enable_firewall._operation() # we call the _operation() method directly because the run() method enforces its own time period + firewall_manager_in_verbose_mode.append(enable_firewall._firewall_manager.verbose) + warn_count_first_reporting_period = warn.call_count + + time.sleep(0.5) # let 1 reporting period elapse + + for _ in range(0, 5): + enable_firewall._operation() + firewall_manager_in_verbose_mode.append(enable_firewall._firewall_manager.verbose) + + # Each call to the _operation() method should have checked each rule, and then compared the results of -C against the output of -L + expected_commands = 10 * [ + mock_iptables.get_accept_dns_command("-C"), + mock_iptables.get_accept_command("-C"), + mock_iptables.get_drop_command("-C"), + "iptables -w -t security -L OUTPUT -nxv" + ] + self.assertEqual(expected_commands, mock_iptables.call_list, "Expected commands {0}, got: {1}".format(expected_commands, mock_iptables.call_list)) + + # + # Incorrect firewall settings are reported as warnings, and each reporting period should log these warnings only 3 times. The first reporting period will include an extra warning + # because the state of the firewall went from "OK" to "not OK" (the initial state is "OK"). + # + self.assertEqual(4, warn_count_first_reporting_period, "Expected 4 warnings to be logged during the first reporting period, got: {0}".format(warn.call_args_list[:warn_count_first_reporting_period])) + self.assertEqual(7, len(warn.call_args_list), "Expected a total of 7 warnings to be logged for the two reporting periods, got: {0}".format(warn.call_args_list)) + warn_args = [args for args, _ in warn.call_args_list] + for w in warn_args: + self.assertTrue(w[0] == "FirewallInconsistency" and w[1].startswith('The results returned by iptables are inconsistent, will not change the current state of the firewall'), "Expected a warning about the results of iptables being inconsistent. Got: {0}".format(w)) + + # + # Once the firewall goes into a "not OK" state, the firewall manager is set to verbose mode in order to make it log the commands it executes. Verbose mode should be set only 3 times + # per reporting period. The initial state is False, since verbose mode is turned on only after the _operation() method detects an incorrect fire wall state. + # + expected_firewall_manager_in_verbose_mode = [False, True, True, True, False, True, True, True, False, False] + self.assertEqual(expected_firewall_manager_in_verbose_mode, firewall_manager_in_verbose_mode, "The firewall manager is not in verbose mode as expected for each invocation of the firewall operation") + + # + # The firewall manager logs the commands it executes as info. There should be 6 sets of commands, 3 for each of the 2 reporting periods in the test, plus an initial check for the iptables version + # + info_args = [args for args, _ in info.call_args_list] + self.assertTrue(info_args[0][0] == "Firewall" and re.match(r'Using iptables \[version .+] to manage firewall rules', info_args[0][1])) + self.assertEqual(19, len(info_args), "Expected a total of 19 commands logged as info. Got: {0}".format(info_args)) + expected_commands = ["dummy command"] + 6 * [ + mock_iptables.get_accept_dns_command("-C"), + mock_iptables.get_accept_command("-C"), + mock_iptables.get_drop_command("-C") + ] + for i in range(1, 18): + self.assertTrue(info_args[i][0] == "Firewall" and info_args[i][1].startswith(expected_commands[i]), "Expected command '{0}' logged at position {1}. Got: {2}".format(expected_commands[i], i, info_args[i])) + + self.assertEqual(0, error.call_count, "No errors should have been reported. Got: {0}". format(error.call_args_list)) + \ No newline at end of file diff --git a/tests/ga/test_firewall_manager.py b/tests/ga/test_firewall_manager.py index d2f00c5e9..7195436e6 100644 --- a/tests/ga/test_firewall_manager.py +++ b/tests/ga/test_firewall_manager.py @@ -19,7 +19,7 @@ import unittest from azurelinuxagent.common.utils import shellutil -from azurelinuxagent.ga.firewall_manager import FirewallManager, IpTables, FirewallCmd, NfTables, FirewallStateError, FirewallManagerNotAvailableError +from azurelinuxagent.ga.firewall_manager import FirewallManager, IpTables, FirewallCmd, NfTables, FirewallStateError, FirewallManagerNotAvailableError, event as firewall_manager_event from tests.lib.tools import AgentTestCase, patch from tests.lib.mock_firewall_command import MockIpTables, MockFirewallCmd, MockNft @@ -147,6 +147,52 @@ def _test_remove_legacy_rule_should_delete_the_legacy_rule(self, firewall_cmd_ty mock.call_list, "Expected a check ({0}) for the legacy rule, followed by a delete ({1}) of the rule".format(mock.check_option, mock.delete_option)) + def _test_it_should_not_log_the_commands_it_executes_by_default(self, firewall_cmd_type, mock_firewall_cmd_type, command): + with mock_firewall_cmd_type(): + firewall = firewall_cmd_type('168.63.129.16') + + with patch.object(firewall_manager_event, "info") as info: + firewall.check() + firewall.setup() + firewall.remove() + + # There should be no Firewall events, but if there are any, check that the command does not show up + firewall_events = [args[1] for args, _ in info.call_args_list if args[0] == "Firewall"] + if len(firewall_events) == 0: + self.assertTrue(True) # pylint: disable=redundant-unittest-assert + else: + for e in firewall_events: + self.assertNotIn(command, e, "Execution of the '{0}' command should not have produced telemetry events, got: {1}".format(command, firewall_events)) + + def _test_it_should_log_the_commands_it_executes_when_using_verbose_mode(self, firewall_cmd_type, mock_firewall_cmd_type, add_option, check_option, delete_option): + with mock_firewall_cmd_type(): + firewall = firewall_cmd_type('168.63.129.16') + firewall.verbose = True + + def assert_commands_were_logged(commands, call_args_list): + firewall_events = [args[1] for args, _ in call_args_list if args[0] == "Firewall"] + self.assertEqual(len(commands), len(firewall_events), "Expected {0} Firewall events to be logged, got: {1}".format(len(commands), firewall_events)) + for i in range(0, len(commands)): + self.assertIn(commands[i], firewall_events[i], "The '{0}' command is missing from the telemetry events: {1}".format(commands[i], firewall_events)) + + with patch.object(firewall_manager_event, "info") as info: + firewall.check() + assert_commands_were_logged([mock_firewall_cmd_type.get_accept_dns_command(check_option), mock_firewall_cmd_type.get_accept_command(check_option), mock_firewall_cmd_type.get_drop_command(check_option)], info.call_args_list) + + with patch.object(firewall_manager_event, "info") as info: + firewall.setup() + assert_commands_were_logged([mock_firewall_cmd_type.get_accept_dns_command(add_option), mock_firewall_cmd_type.get_accept_command(add_option), mock_firewall_cmd_type.get_drop_command(add_option)], info.call_args_list) + + with patch.object(firewall_manager_event, "info") as info: + firewall.remove() + assert_commands_were_logged( + [ + mock_firewall_cmd_type.get_accept_dns_command(check_option), mock_firewall_cmd_type.get_accept_dns_command(delete_option), + mock_firewall_cmd_type.get_accept_command(check_option), mock_firewall_cmd_type.get_accept_command(delete_option), + mock_firewall_cmd_type.get_drop_command(check_option), mock_firewall_cmd_type.get_drop_command(delete_option) + ], + info.call_args_list) + class TestIpTables(_TestFirewallCommand): def test_it_should_raise_FirewallManagerNotAvailableError_when_the_command_is_not_available(self): @@ -183,6 +229,12 @@ def test_it_should_not_use_the_wait_option_on_iptables_versions_less_than_1_4_21 mock_iptables.call_list, "Expected only 3 calls to the -A (append) command without the -w option") + def test_it_should_not_log_the_commands_it_executes_by_default(self): + self._test_it_should_not_log_the_commands_it_executes_by_default(IpTables, MockIpTables, "iptables") + + def test_it_should_log_the_commands_it_executes_when_using_verbose_mode(self): + self._test_it_should_log_the_commands_it_executes_when_using_verbose_mode(IpTables, MockIpTables, add_option="-A", check_option="-C", delete_option="-D") + class TestFirewallCmd(_TestFirewallCommand): def test_it_should_raise_FirewallManagerNotAvailableError_when_the_command_is_not_available(self): @@ -205,6 +257,12 @@ def test_check_should_verify_all_rules(self): def test_remove_legacy_rule_should_delete_the_legacy_rule(self): self._test_remove_legacy_rule_should_delete_the_legacy_rule(FirewallCmd, MockFirewallCmd) + def test_it_should_not_log_the_commands_it_executes_by_default(self): + self._test_it_should_not_log_the_commands_it_executes_by_default(IpTables, MockIpTables, "firewallcmd") + + def test_it_should_log_the_commands_it_executes_when_using_verbose_mode(self): + self._test_it_should_log_the_commands_it_executes_when_using_verbose_mode(FirewallCmd, MockFirewallCmd, add_option="--passthrough", check_option="--query-passthrough", delete_option="--remove-passthrough") + class TestNft(AgentTestCase): def test_it_should_raise_FirewallManagerNotAvailableError_when_the_command_is_not_available(self): @@ -224,7 +282,6 @@ def test_setup_should_set_the_walinuxagent_table(self): self.assertIn("add chain ip walinuxagent output", script, "The setup script should to create the output chain. Script: {0}".format(script)) self.assertIn("add rule ip walinuxagent output ", script, "The setup script should to create the rule to manage the output chain. Script: {0}".format(script)) - def test_remove_should_delete_the_walinuxagent_table(self): with MockNft() as mock_nft: firewall = NfTables('168.63.129.16') @@ -259,6 +316,51 @@ def test_check_should_verify_all_rules(self): firewall.check() self.assertIn("['The drop action is missing']", str(context.exception), "Expected an error message indicating the Agent's UID is not excluded") + def test_it_should_not_log_the_commands_it_executes_by_default(self): + with MockNft(): + firewall = NfTables('168.63.129.16') + + with patch.object(firewall_manager_event, "info") as info: + firewall.check() + firewall.setup() + firewall.remove() + + firewall_events = [args[1] for args, _ in info.call_args_list if args[0] == "Firewall"] + # There should be no Firewall event, but if there are any, check that the command does not show up + if len(firewall_events) == 0: + self.assertTrue(True) # pylint: disable=redundant-unittest-assert + else: + for e in firewall_events: + self.assertNotIn("nft", e, "Execution of the 'nft' command should not have produced telemetry events, got: {0}".format(firewall_events)) + + def test_it_should_log_the_commands_it_executes_when_using_verbose_mode(self): + with MockNft(): + firewall = NfTables('168.63.129.16') + firewall.verbose = True + + def assert_commands_were_logged(commands, call_args_list): + firewall_events = [args[1] for args, _ in call_args_list if args[0] == "Firewall"] + self.assertEqual(len(commands), len(firewall_events), "Expected {0} Firewall events to be logged, got: {1}".format(len(commands), firewall_events)) + for i in range(0, len(commands)): + self.assertIn(commands[i], firewall_events[i], "The '{0}' command is missing from the telemetry events: {1}".format(commands[i], firewall_events)) + + with patch.object(firewall_manager_event, "info") as info: + firewall.check() + assert_commands_were_logged( + [ + "nft --json list tables", + "nft --json list table walinuxagent" + ], + info.call_args_list) + + with patch.object(firewall_manager_event, "info") as info: + firewall.setup() + assert_commands_were_logged(["nft -f -"], info.call_args_list) + + with patch.object(firewall_manager_event, "info") as info: + firewall.remove() + assert_commands_were_logged(["nft delete table walinuxagent"], info.call_args_list) + if __name__ == '__main__': unittest.main() diff --git a/tests/lib/mock_firewall_command.py b/tests/lib/mock_firewall_command.py index bc75417c0..5ffc24b33 100644 --- a/tests/lib/mock_firewall_command.py +++ b/tests/lib/mock_firewall_command.py @@ -167,7 +167,7 @@ def _get_return_value(self, command): raise Exception("Unexpected rule: {0}".format(rule)) return exit_code, "Mocked stdout", "Mocked stderr" - match = re.match(r"iptables (-w )?-t security -L OUTPUT -nxv", command) + match = re.match(r"iptables (-w )?-t security -L (?POUTPUT )?-nxv", command) if match is not None: # # Create the mock output for iptables -L @@ -179,10 +179,36 @@ def _get_return_value(self, command): # 0 0 ACCEPT tcp -- * * 0.0.0.0/0 168.63.129.16 owner UID match 0 # 0 0 DROP tcp -- * * 0.0.0.0/0 168.63.129.16 ctstate INVALID,NEW # - stdout = [ - "Chain OUTPUT (policy ACCEPT 1384 packets, 126406 bytes)\n", - " pkts bytes target prot opt in out source destination\n" - ] + # # iptables -w -t security -L -nvx + # Chain INPUT (policy ACCEPT 0 packets, 0 bytes) + # pkts bytes target prot opt in out source destination + # + # Chain FORWARD (policy ACCEPT 0 packets, 0 bytes) + # pkts bytes target prot opt in out source destination + # + # Chain OUTPUT (policy ACCEPT 1384 packets, 126406 bytes) + # pkts bytes target prot opt in out source destination + # 0 0 ACCEPT tcp -- * * 0.0.0.0/0 168.63.129.16 tcp dpt:53 + # 0 0 ACCEPT tcp -- * * 0.0.0.0/0 168.63.129.16 owner UID match 0 + # 0 0 DROP tcp -- * * 0.0.0.0/0 168.63.129.16 ctstate INVALID,NEW + # + # + if match.group("output") is not None: + stdout = [ + "Chain OUTPUT (policy ACCEPT 1384 packets, 126406 bytes)\n", + " pkts bytes target prot opt in out source destination\n" + ] + else: + stdout = [ + "Chain INPUT (policy ACCEPT 0 packets, 0 bytes)\n", + " pkts bytes target prot opt in out source destination\n", + "\n", + "Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)\n", + " pkts bytes target prot opt in out source destination\n", + "\n", + "Chain OUTPUT (policy ACCEPT 1384 packets, 126406 bytes)\n", + " pkts bytes target prot opt in out source destination\n" + ] if (self._return_values["-C"]["ACCEPT DNS"] == 0) == self._check_matches_list: stdout.append(" 0 0 ACCEPT tcp -- * * 0.0.0.0/0 168.63.129.16 tcp dpt:53\n") if (self._return_values["-C"]["ACCEPT"] == 0) == self._check_matches_list: From cc5fe1bb44cde65ff0709ac0124380aefbfbddc2 Mon Sep 17 00:00:00 2001 From: "narrieta@microsoft" Date: Wed, 31 Dec 2025 14:48:34 -0800 Subject: [PATCH 2/5] ut --- tests/ga/test_env.py | 34 +++++++++++----------- tests/ga/test_firewall_manager.py | 48 +++++++++++++++---------------- tests/lib/event.py | 24 ++++++++++++++++ 3 files changed, 64 insertions(+), 42 deletions(-) create mode 100644 tests/lib/event.py diff --git a/tests/ga/test_env.py b/tests/ga/test_env.py index 7fc86764b..197623d03 100644 --- a/tests/ga/test_env.py +++ b/tests/ga/test_env.py @@ -22,6 +22,7 @@ from azurelinuxagent.common.osutil.default import DefaultOSUtil, shellutil from azurelinuxagent.ga.env import MonitorDhcpClientRestart, EnableFirewall +from tests.lib.event import get_events_from_mock from tests.lib.tools import AgentTestCase, patch, DEFAULT from tests.lib.mock_firewall_command import MockIpTables @@ -249,10 +250,10 @@ def test_it_should_log_the_state_of_the_firewall_once_per_reporting_period(self) # The first call to _operation() reports the version of iptables, then there should be only one firewall state report for each of the 2 reporting periods in the test self.assertEqual(2, event_count_first_reporting_period, "Expected 2 events to be logged during the first reporting period, got: {0}".format(info.call_args_list[:event_count_first_reporting_period])) self.assertEqual(3, len(info.call_args_list), "Expected a total of 3 events to be logged for the two reporting periods, got: {0}".format(info.call_args_list)) - event_args = [args for args, _ in info.call_args_list] - self.assertTrue(event_args[0][0] == "Firewall" and re.match(r'Using iptables \[version .+] to manage firewall rules', event_args[0][1]), "Expected a check for the iptables version in the first reporting period. Got: {0}".format(event_args[0])) - self.assertTrue(event_args[1][0] == "Firewall" and event_args[1][1].startswith('The firewall is configured correctly.'), "Expected a firewall status report in the first reporting period. Got: {0}".format(event_args[1])) - self.assertTrue(event_args[2][0] == "Firewall" and event_args[1][1].startswith('The firewall is configured correctly.'), "Expected a firewall status report in the second reporting period. Got: {0}".format(event_args[1])) + infos = get_events_from_mock(info) + self.assertTrue(infos[0][0] == "Firewall" and infos[0][1] == "Using iptables [version 1.4.21] to manage firewall rules", "Expected a check for the iptables version in the first reporting period. Got: {0}".format(infos[0])) + self.assertTrue(infos[1][0] == "Firewall" and infos[1][1].startswith('The firewall is configured correctly.'), "Expected a firewall status report in the first reporting period. Got: {0}".format(infos[1])) + self.assertTrue(infos[2][0] == "Firewall" and infos[1][1].startswith('The firewall is configured correctly.'), "Expected a firewall status report in the second reporting period. Got: {0}".format(infos[1])) self.assertEqual(0, warn.call_count, "No warnings should have been reported. Got: {0}". format(warn.call_args_list)) self.assertEqual(0, error.call_count, "No errors should have been reported. Got: {0}". format(error.call_args_list)) @@ -298,8 +299,8 @@ def test_it_should_log_errors_thrice_per_reporting_period(self): # self.assertEqual(4, warn_count_first_reporting_period, "Expected 4 warnings to be logged during the first reporting period, got: {0}".format(warn.call_args_list[:warn_count_first_reporting_period])) self.assertEqual(7, len(warn.call_args_list), "Expected a total of 7 warnings to be logged for the two reporting periods, got: {0}".format(warn.call_args_list)) - warn_args = [args for args, _ in warn.call_args_list] - for w in warn_args: + warnings = get_events_from_mock(warn) + for w in warnings: self.assertTrue(w[0] == "FirewallInconsistency" and w[1].startswith('The results returned by iptables are inconsistent, will not change the current state of the firewall'), "Expected a warning about the results of iptables being inconsistent. Got: {0}".format(w)) # @@ -312,16 +313,15 @@ def test_it_should_log_errors_thrice_per_reporting_period(self): # # The firewall manager logs the commands it executes as info. There should be 6 sets of commands, 3 for each of the 2 reporting periods in the test, plus an initial check for the iptables version # - info_args = [args for args, _ in info.call_args_list] - self.assertTrue(info_args[0][0] == "Firewall" and re.match(r'Using iptables \[version .+] to manage firewall rules', info_args[0][1])) - self.assertEqual(19, len(info_args), "Expected a total of 19 commands logged as info. Got: {0}".format(info_args)) - expected_commands = ["dummy command"] + 6 * [ - mock_iptables.get_accept_dns_command("-C"), - mock_iptables.get_accept_command("-C"), - mock_iptables.get_drop_command("-C") - ] - for i in range(1, 18): - self.assertTrue(info_args[i][0] == "Firewall" and info_args[i][1].startswith(expected_commands[i]), "Expected command '{0}' logged at position {1}. Got: {2}".format(expected_commands[i], i, info_args[i])) + infos = get_events_from_mock(info) + expected_commands = ['Using iptables [version 1.4.21] to manage firewall rules'] + \ + 6 * [ + mock_iptables.get_accept_dns_command("-C"), + mock_iptables.get_accept_command("-C"), + mock_iptables.get_drop_command("-C") + ] + for i in range(0, 19): + self.assertTrue(infos[i][0] == "Firewall" and infos[i][1].startswith(expected_commands[i]), "Expected command '{0}' logged at position {1}. Got: {2}".format(expected_commands[i], i, infos[i])) self.assertEqual(0, error.call_count, "No errors should have been reported. Got: {0}". format(error.call_args_list)) - \ No newline at end of file + diff --git a/tests/ga/test_firewall_manager.py b/tests/ga/test_firewall_manager.py index 7195436e6..bba679967 100644 --- a/tests/ga/test_firewall_manager.py +++ b/tests/ga/test_firewall_manager.py @@ -20,6 +20,8 @@ from azurelinuxagent.common.utils import shellutil from azurelinuxagent.ga.firewall_manager import FirewallManager, IpTables, FirewallCmd, NfTables, FirewallStateError, FirewallManagerNotAvailableError, event as firewall_manager_event + +from tests.lib.event import get_events_from_mock from tests.lib.tools import AgentTestCase, patch from tests.lib.mock_firewall_command import MockIpTables, MockFirewallCmd, MockNft @@ -157,41 +159,39 @@ def _test_it_should_not_log_the_commands_it_executes_by_default(self, firewall_c firewall.remove() # There should be no Firewall events, but if there are any, check that the command does not show up - firewall_events = [args[1] for args, _ in info.call_args_list if args[0] == "Firewall"] - if len(firewall_events) == 0: + events = get_events_from_mock(info) + if len(events) == 0: self.assertTrue(True) # pylint: disable=redundant-unittest-assert else: - for e in firewall_events: - self.assertNotIn(command, e, "Execution of the '{0}' command should not have produced telemetry events, got: {1}".format(command, firewall_events)) + for e in events: + self.assertNotIn(command, e, "Execution of the '{0}' command should not have produced telemetry events, got: {1}".format(command, events)) def _test_it_should_log_the_commands_it_executes_when_using_verbose_mode(self, firewall_cmd_type, mock_firewall_cmd_type, add_option, check_option, delete_option): with mock_firewall_cmd_type(): firewall = firewall_cmd_type('168.63.129.16') firewall.verbose = True - def assert_commands_were_logged(commands, call_args_list): - firewall_events = [args[1] for args, _ in call_args_list if args[0] == "Firewall"] + def assert_commands_were_logged(commands): + firewall_events = [e[1] for e in get_events_from_mock(info) if e[0] == "Firewall"] self.assertEqual(len(commands), len(firewall_events), "Expected {0} Firewall events to be logged, got: {1}".format(len(commands), firewall_events)) for i in range(0, len(commands)): self.assertIn(commands[i], firewall_events[i], "The '{0}' command is missing from the telemetry events: {1}".format(commands[i], firewall_events)) with patch.object(firewall_manager_event, "info") as info: firewall.check() - assert_commands_were_logged([mock_firewall_cmd_type.get_accept_dns_command(check_option), mock_firewall_cmd_type.get_accept_command(check_option), mock_firewall_cmd_type.get_drop_command(check_option)], info.call_args_list) + assert_commands_were_logged([mock_firewall_cmd_type.get_accept_dns_command(check_option), mock_firewall_cmd_type.get_accept_command(check_option), mock_firewall_cmd_type.get_drop_command(check_option)]) with patch.object(firewall_manager_event, "info") as info: firewall.setup() - assert_commands_were_logged([mock_firewall_cmd_type.get_accept_dns_command(add_option), mock_firewall_cmd_type.get_accept_command(add_option), mock_firewall_cmd_type.get_drop_command(add_option)], info.call_args_list) + assert_commands_were_logged([mock_firewall_cmd_type.get_accept_dns_command(add_option), mock_firewall_cmd_type.get_accept_command(add_option), mock_firewall_cmd_type.get_drop_command(add_option)]) with patch.object(firewall_manager_event, "info") as info: firewall.remove() - assert_commands_were_logged( - [ - mock_firewall_cmd_type.get_accept_dns_command(check_option), mock_firewall_cmd_type.get_accept_dns_command(delete_option), - mock_firewall_cmd_type.get_accept_command(check_option), mock_firewall_cmd_type.get_accept_command(delete_option), - mock_firewall_cmd_type.get_drop_command(check_option), mock_firewall_cmd_type.get_drop_command(delete_option) - ], - info.call_args_list) + assert_commands_were_logged([ + mock_firewall_cmd_type.get_accept_dns_command(check_option), mock_firewall_cmd_type.get_accept_dns_command(delete_option), + mock_firewall_cmd_type.get_accept_command(check_option), mock_firewall_cmd_type.get_accept_command(delete_option), + mock_firewall_cmd_type.get_drop_command(check_option), mock_firewall_cmd_type.get_drop_command(delete_option) + ]) class TestIpTables(_TestFirewallCommand): @@ -338,28 +338,26 @@ def test_it_should_log_the_commands_it_executes_when_using_verbose_mode(self): firewall = NfTables('168.63.129.16') firewall.verbose = True - def assert_commands_were_logged(commands, call_args_list): - firewall_events = [args[1] for args, _ in call_args_list if args[0] == "Firewall"] + def assert_commands_were_logged(commands): + firewall_events = [e[1] for e in get_events_from_mock(info) if e[0] == "Firewall"] self.assertEqual(len(commands), len(firewall_events), "Expected {0} Firewall events to be logged, got: {1}".format(len(commands), firewall_events)) for i in range(0, len(commands)): self.assertIn(commands[i], firewall_events[i], "The '{0}' command is missing from the telemetry events: {1}".format(commands[i], firewall_events)) with patch.object(firewall_manager_event, "info") as info: firewall.check() - assert_commands_were_logged( - [ - "nft --json list tables", - "nft --json list table walinuxagent" - ], - info.call_args_list) + assert_commands_were_logged([ + "nft --json list tables", + "nft --json list table walinuxagent" + ]) with patch.object(firewall_manager_event, "info") as info: firewall.setup() - assert_commands_were_logged(["nft -f -"], info.call_args_list) + assert_commands_were_logged(["nft -f -"]) with patch.object(firewall_manager_event, "info") as info: firewall.remove() - assert_commands_were_logged(["nft delete table walinuxagent"], info.call_args_list) + assert_commands_were_logged(["nft delete table walinuxagent"]) if __name__ == '__main__': diff --git a/tests/lib/event.py b/tests/lib/event.py new file mode 100644 index 000000000..52b877431 --- /dev/null +++ b/tests/lib/event.py @@ -0,0 +1,24 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +def get_events_from_mock(mock): + """ + Extracts the telemetry events from the call_args_list of a mock for one of the azurelinuxagent.common.event.{info,warn,error} functions. + Those functions have a signature similar to "def info(op, fmt, *args)". Events are returned as an array of (operation, formatted message) tuples. + """ + return [(args[0], args[1].format(*args[2:])) for args, _ in mock.call_args_list] + From 0666c5463e61b7f8ce93c1dbdbb68b93c58b4550 Mon Sep 17 00:00:00 2001 From: "narrieta@microsoft" Date: Wed, 31 Dec 2025 14:59:18 -0800 Subject: [PATCH 3/5] pylint --- tests/ga/test_env.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ga/test_env.py b/tests/ga/test_env.py index 197623d03..6266d9c0b 100644 --- a/tests/ga/test_env.py +++ b/tests/ga/test_env.py @@ -15,7 +15,6 @@ # Requires Python 2.6+ and Openssl 1.0+ # import datetime -import re import time from azurelinuxagent.common.osutil import get_osutil From 3ec415baa772796939eda5e483e5c65a3ce81b4c Mon Sep 17 00:00:00 2001 From: "narrieta@microsoft" Date: Mon, 5 Jan 2026 12:46:55 -0800 Subject: [PATCH 4/5] typo --- azurelinuxagent/ga/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index b7f914ef5..decfd63f1 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -105,7 +105,7 @@ class FirewallState(object): OK = "OK" # The firewall rules for the WireServer are setup correctly NotSet = "NotSet" # The firewall rules have not been set Invalid = "Invalid" # The state of the firewall rules is not as expected, e.g. because some rules are missing - Inconsistent = "Inconsistent" # The stare of the firewall is reported differently by different tools, e.g. "iptables -C" vs "iptables -L" + Inconsistent = "Inconsistent" # The state of the firewall is reported differently by different tools, e.g. "iptables -C" vs "iptables -L" Unknown = "Unknown" # The state of the firewall has not been checked yet From c976c13fe043c0752a778ad08ea24fa4a85d94f1 Mon Sep 17 00:00:00 2001 From: "narrieta@microsoft" Date: Mon, 5 Jan 2026 14:45:23 -0800 Subject: [PATCH 5/5] comment --- azurelinuxagent/ga/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index decfd63f1..c481d9c27 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -116,7 +116,7 @@ def __init__(self, wire_server_address): super(EnableFirewall, self).__init__(conf.get_enable_firewall_period()) self._wire_server_address = wire_server_address self._firewall_manager = None # initialized on demand in the _operation method - self._firewall_state = FirewallState.OK + self._firewall_state = FirewallState.OK # Initialized to OK to prevent turning on verbose mode on the initial invocation of _operation(). It is properly initialized as soon as we do the first check of the firewall. self._report_count = 0 self._next_report_time = datetime.datetime.now(UTC) self._should_report = True