diff --git a/.github/workflows/log-quality-rules.yml b/.github/workflows/log-quality-rules.yml new file mode 100644 index 00000000..0a28cf5f --- /dev/null +++ b/.github/workflows/log-quality-rules.yml @@ -0,0 +1,156 @@ +name: log-quality-rules +on: + push: + branches: [ main, develop, release, support/*, hotfix/*, topic/* ] + pull_request: + branches: [ main, develop, release, support/*, hotfix/*, topic/* ] +jobs: + log-quality-check: + runs-on: ubuntu-22.04 + steps: + - name: Set up cache + id: cache + uses: actions/cache@v4 + with: + path: | + build + install + key: ${{ runner.os }}-${{ github.sha }} + + - name: Set up CMake + # 3.16.x To support flow control commands to be overridden. + # Installs in: find /opt -ipath *bin/cmake + uses: jwlawson/actions-setup-cmake@v1.12 + with: + cmake-version: '3.16.x' + - name: Install packages + run: > + sudo apt update && sudo apt install -y libcurl4-openssl-dev libgtest-dev lcov gcovr libmsgpack* libcjson-dev build-essential + - name: Checkout rbus + if: steps.cache.outputs.cache-hit != 'true' + uses: actions/checkout@v4 + with: + path: rbus + - name: Install dependencies + run: pip install pyyaml + - name: Configure Rbus + if: steps.cache.outputs.cache-hit != 'true' + run: > + cmake + -S "${{github.workspace}}/rbus" + -B build/rbus + -DCMAKE_INSTALL_PREFIX="${{github.workspace}}/install/usr" + -DBUILD_FOR_DESKTOP=ON + -DENABLE_UNIT_TESTING=ON + -DENABLE_CODE_COVERAGE=ON + -DCMAKE_BUILD_TYPE=Debug + - name: Build Rbus + if: steps.cache.outputs.cache-hit != 'true' + run: > + make -C build/rbus + - name: Install rbus + if: steps.cache.outputs.cache-hit != 'true' + run: > + make -C build/rbus install + - name: Start rtrouted + # nohup to keep it running. + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + nohup ./bin/rtrouted -f -l DEBUG > /tmp/rtrouted_log.txt & + - name: Run drop test with inactive provider + continue-on-error: true + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + nohup ./bin/rbusSampleProvider > /tmp/log_sampleProvider.log 2>&1 & + sleep 1 + nohup stdbuf -oL -eL ./bin/rbusBlockingProvider > /tmp/log_blockingProvider.log 2>&1 & + sleep 1 + ./bin/rbusDmlBlockingConsumer > /tmp/log_dmlBlockingConsumer.log 2>&1 + cat /tmp/log_sampleProvider.log || true + cat /tmp/log_blockingProvider.log || true + cat /tmp/log_dmlBlockingConsumer.log || true + - name: Run RbusTestTimeoutValues Unit test + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + nohup ./bin/rbusTestTimeoutValuesProvider > /tmp/log_testTimeoutValuesProvider.txt & + ./bin/rbusTestTimeoutValuesConsumer > /tmp/log_testTimeoutValuesConsumer.txt + - name: Run Unit test + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + nohup ./bin/rbusTestProvider >/tmp/log_TestAppProvider.txt & + ./bin/rbusTestConsumer -a > /tmp/log_TestConsumer.txt + - name: Run multiRbusOpenMethod Unit Test + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + ./bin/multiRbusOpenMethodProvider > /tmp/log_MultiRbusOpenMethodProvider.txt & + ./bin/multiRbusOpenMethodConsumer > /tmp/log_MultiRbusOpenMethodConsumer.txt & + - name: Run multiRbusOpenSubscribe Unit test + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + nohup ./bin/multiRbusOpenProvider >/tmp/log_multiRbusOpenProvider.txt & + ./bin/multiRbusOpenConsumer >/tmp/log_multiRbusOpenConsumer.txt + - name: Run multiRbusOpenGet Unit test + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + nohup ./bin/multiRbusOpenRbusGetProvider >/tmp/log_multiRbusOpenRbusGetProvider.txt & + ./bin/multiRbusOpenRbusGetConsumer >/tmp/log_multiRbusOpenRbusGetConsumer.txt + - name: Run multiRbusOpenSet Unit test + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + export RT_LOG_LEVEL=info + nohup ./bin/multiRbusOpenRbusGetProvider >/tmp/log_multiRbusOpenRbusSetProvider.txt & + ./bin/multiRbusOpenRbusSetConsumer >/tmp/log_multiRbusOpenRbusSetConsumer.txt + - name: Run Gtest Cases + run: | + cd build/rbus + export RT_LOG_LEVEL=info + nohup ./src/session_manager/rbus_session_mgr & + ./unittests/rbus_gtest.bin > /tmp/log_gtestlogs.txt + - name: Analyze logs + run: | + cd ${{github.workspace}}/rbus + set -- /tmp/log_*.txt + if [ ! -e "$1" ]; then + echo "No log files found matching /tmp/log_*.txt, skipping log analysis." + exit 0 + else + for f in "$@"; do + python3 noisylogdetector.py "$f" "/tmp/noisy_log_report_$(basename "$f" .txt).html" + done + fi + - name: Stop SessionManager + run: | + killall -15 rbus_session_mgr || true + - name: Stop rtrouted + run: | + cd install/usr + export PREFIX=$PWD + export LD_LIBRARY_PATH=$PREFIX/lib + ./bin/rtrouted_diag shutdown + - name: Upload the noisy log report to Pull request using actions + uses: actions/upload-artifact@v4 + with: + name: noisy-log-report + path: /tmp/noisy_log_report_*.html diff --git a/noisylogdetector.py b/noisylogdetector.py new file mode 100644 index 00000000..b6c2bea1 --- /dev/null +++ b/noisylogdetector.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 + +import re +import sys +import yaml +from html import escape +from pathlib import Path + +# ----------------------------- +def load_rules(path="rules.yml"): + try: + with open(path, "r") as f: + return yaml.safe_load(f) + except FileNotFoundError: + print(f"Rules file not found: {path}", file=sys.stderr) + sys.exit(1) + except PermissionError: + print(f"Permission denied while reading rules file: {path}", file=sys.stderr) + sys.exit(1) + except yaml.YAMLError as e: + print(f"Failed to parse YAML rules file '{path}': {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Unexpected error while loading rules from '{path}': {e}", file=sys.stderr) + sys.exit(1) + +# ----------------------------- +def starts_with_date_and_timestamp(line): + """ + Matches log lines starting with any of the following timestamp patterns: + - HH:MM:SS or HH:MM:SS.ssssss (e.g. 04:31:14 or 04:31:14.109764) + - YYYY-MM-DD HH:MM:SS or YYYY-MM-DD HH:MM:SS.sss (e.g. 2024-11-11 04:31:14 or 2024-11-11 04:31:14.109) + - Mon DD HH:MM:SS (e.g. Nov 11 04:31:14) + Lines not matching these patterns at the start will be ignored. + """ + return bool(re.match( + r'^\s*(\d{2}:\d{2}:\d{2}(?:\.\d+)?|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(?:\.\d+)?|' + r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})', + line + )) + +def detect_level(line): + for lvl in ("ERROR", "WARN", "INFO", "DEBUG", "TRACE"): + if re.search(rf"\b{lvl}\b", line): + return lvl + return "UNKNOWN" + +# ----------------------------- +def compile_patterns(patterns): + return [re.compile(p, re.IGNORECASE) for p in patterns] + +# ----------------------------- +def analyze(log_file, rules): + """ + Analyze a log file for noisy logging, sensitive data exposure, and + incorrect severity usage based on the provided rules. + Parameters + ---------- + log_file : str or pathlib.Path + Path to the log file to analyze. The file is opened in text mode + with errors ignored to allow processing partially invalid encodings. + rules : dict + Configuration dictionary containing analysis rules. Expected keys: + - "public_api_patterns": list of regex patterns that identify when + a public API call starts. + - "sensitive_patterns": list of regex patterns that match sensitive + or PII data that must not appear in logs. + - "failure_keywords": list of lowercase keywords that indicate a + failure or error condition in a log line. + - "noisy_log_levels": iterable of log levels (e.g. "INFO", "DEBUG") + that are considered noisy when emitted during a public API call. + - "required_severity_on_failure": iterable of log levels (e.g. + "ERROR", "WARN") that must be used when a failure keyword is + present. + Returns + ------- + tuple + A 3-tuple `(noisy_logs, sensitive_logs, severity_violations)` where + each element is a list of dictionaries describing matching log lines. + - noisy_logs: entries for internal/noisy logs emitted while a public + API is considered active. Each dict contains: + - "line": line number (1-based) in the log file. + - "log": the full log line. + - "reason": human-readable explanation. + - sensitive_logs: entries where sensitive or PII data was detected, + with similar structure ("line", "log", "reason"). + - severity_violations: entries where a failure keyword was found but + the log level did not meet the required severity. + """ + noisy_logs = [] + sensitive_logs = [] + severity_violations = [] + + public_api_res = compile_patterns(rules["public_api_patterns"]) + sensitive_res = compile_patterns(rules["sensitive_patterns"]) + failure_keywords = rules["failure_keywords"] + in_public_api = False + active_public_api = None + + with open(log_file, "r", errors="ignore") as f: + for ln, line in enumerate(f, 1): + line = line.rstrip() + + if not starts_with_date_and_timestamp(line): + continue + + level = detect_level(line) + + # ----------------- Detect public API start + for r in public_api_res: + m = r.search(line) + if m: + in_public_api = True + active_public_api = m.group(0) + break + + # ----------------- Noisy logs + if in_public_api: + if active_public_api not in line: + if level in rules["noisy_log_levels"]: + noisy_logs.append({ + "line": ln, + "log": line, + "reason": f"Internal log during public API execution ({active_public_api})" + }) + + # ----------------- Sensitive logs + for r in sensitive_res: + if r.search(line): + sensitive_logs.append({ + "line": ln, + "log": line, + "reason": "Sensitive / PII data detected" + }) + break + + # ----------------- Severity enforcement + if any(k in line.lower() for k in failure_keywords): + if level not in rules["required_severity_on_failure"]: + severity_violations.append({ + "line": ln, + "log": line, + "reason": "Failure logged without ERROR/WARN" + }) + + return noisy_logs, sensitive_logs, severity_violations + +# ----------------------------- +def generate_html(noisy, sensitive, severity, output): + with open(output, "w", encoding="utf-8") as f: + f.write(""" + +
+| Line | Reason | Log |
|---|---|---|
| No issues found in this section. | ||
| {r['line']} | " + f"{escape(r['reason'])} | " + f"{escape(r['log'])} |