diff --git a/.github/workflows/yetus-general-check.yml b/.github/workflows/yetus-general-check.yml index bb285a7b0692..ecaf94c19424 100644 --- a/.github/workflows/yetus-general-check.yml +++ b/.github/workflows/yetus-general-check.yml @@ -97,7 +97,7 @@ jobs: if: always() run: | cd "${{ github.workspace }}" - python3 src/dev-support/yetus_console_to_md.py yetus-general-check/output/console.txt >> $GITHUB_STEP_SUMMARY + python3 src/dev-support/yetus_console_to_md.py yetus-general-check/output >> $GITHUB_STEP_SUMMARY - name: Publish Test Results if: always() diff --git a/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml index 8526943c3339..8d41b86b99e4 100644 --- a/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml +++ b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml @@ -95,7 +95,7 @@ jobs: if: always() run: | cd "${{ github.workspace }}" - python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-compile-check/output/console.txt >> $GITHUB_STEP_SUMMARY + python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-compile-check/output >> $GITHUB_STEP_SUMMARY - name: Publish Results if: always() diff --git a/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml index b4cc992b9a08..f29acabb5290 100644 --- a/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml +++ b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml @@ -97,7 +97,6 @@ jobs: PLUGINS: "github,htmlout,maven,unit" SET_JAVA_HOME: "/usr/lib/jvm/java-17" SOURCEDIR: "${{ github.workspace }}/src" - TESTS_FILTER: "mvninstall" YETUSDIR: "${{ github.workspace }}/yetus" AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc" BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt" @@ -118,7 +117,7 @@ jobs: if: always() run: | cd "${{ github.workspace }}" - python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-unit-check/output/console.txt >> $GITHUB_STEP_SUMMARY + python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-unit-check/output >> $GITHUB_STEP_SUMMARY - name: Publish Test Results if: always() diff --git a/dev-support/jenkins_precommit_github_yetus.sh b/dev-support/jenkins_precommit_github_yetus.sh index 59d4cf2b82c3..4ec0c1d3829e 100755 --- a/dev-support/jenkins_precommit_github_yetus.sh +++ b/dev-support/jenkins_precommit_github_yetus.sh @@ -38,7 +38,6 @@ declare -a required_envs=( "PLUGINS" "SET_JAVA_HOME" "SOURCEDIR" - "TESTS_FILTER" "YETUSDIR" "AUTHOR_IGNORE_LIST" "BLANKS_EOL_IGNORE_FILE" @@ -126,7 +125,9 @@ YETUS_ARGS+=("--java-home=${SET_JAVA_HOME}") YETUS_ARGS+=("--author-ignore-list=${AUTHOR_IGNORE_LIST}") YETUS_ARGS+=("--blanks-eol-ignore-file=${BLANKS_EOL_IGNORE_FILE}") YETUS_ARGS+=("--blanks-tabs-ignore-file=${BLANKS_TABS_IGNORE_FILE}*") -YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}") +if [[ -n "${TESTS_FILTER}" ]]; then + YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}") +fi YETUS_ARGS+=("--personality=${SOURCEDIR}/dev-support/hbase-personality.sh") YETUS_ARGS+=("--quick-hadoopcheck") if [[ "${SKIP_ERRORPRONE}" = "true" ]]; then diff --git a/dev-support/yetus_console_to_md.py b/dev-support/yetus_console_to_md.py index c85806b3cf7e..a01a16e9cb8d 100644 --- a/dev-support/yetus_console_to_md.py +++ b/dev-support/yetus_console_to_md.py @@ -21,8 +21,9 @@ """ import re import sys +from collections import defaultdict from pathlib import Path -from typing import List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple # Vote to emoji mapping @@ -45,195 +46,338 @@ def is_runtime(text: str) -> bool: return bool(re.match(r'^\d+m\s+\d+s$', text)) -def parse_table_row(line: str) -> List[str]: +def parse_table_row(line: str) -> Tuple[str, str, str, str]: """ - Parse a table row and return list of cell values. - Returns exactly 4 columns: [vote, subsystem, runtime, comment] + Parse a table row and return tuple of cell values. + Returns exactly 4 columns: (vote, subsystem, runtime, comment) """ parts = line.split('|') # Remove first empty element (from leading |) parts = parts[1:] if len(parts) > 1 else [] - result = [] - for p in parts[:4]: # Take first 4 columns - result.append(p.strip()) + # Take first 4 columns and strip whitespace + result: List[str] = [p.strip() for p in parts[:4]] # Pad to 4 columns if needed while len(result) < 4: result.append('') - return result + return result[0], result[1], result[2], result[3] -def process_first_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]: +def extract_module_from_surefire(line: str) -> Optional[str]: + """Extract module name from surefire plugin execution line.""" + match = re.search(r'\[INFO\] --- surefire:.*:test .* @ (\S+) ---', line) + return match.group(1) if match else None + + +def is_new_surefire_execution(line: str) -> bool: + """Check if line indicates a new surefire execution (new module).""" + return bool(re.search(r'\[INFO\] --- surefire:.*:test .* @ (\S+) ---', line)) + + +def is_results_section_start(line: str) -> bool: + """Check if line indicates the start of Results section.""" + return bool(re.search(r'^\[\w+\] Results:', line.strip())) + + +def is_tests_run_summary(line: str) -> bool: + """Check if line is the Tests run summary line.""" + return bool(re.search(r'^\[\w+\] Tests run:', line.strip())) + + +def parse_results_section( + lines: List[str], + start_idx: int, + current_module: str, + result: Dict[str, Any] +) -> int: """ - Process the first table (Vote, Subsystem, Runtime, Comment). + Parse the Results section within a patch-unit file. + + Args: + lines: All lines from the file + start_idx: Index where Results section starts (after [INFO] Results:) + current_module: Current module name + result: Dictionary to store parsed results Returns: - Tuple of (markdown lines, next index to process) + Index of next line to process after Results section """ - content = [] i = start_idx - - # Add table header - content.append('\n') - content.append('| Vote | Subsystem | Runtime | Comment |\n') - content.append('|------|-----------|---------|---------|\n') - - # Skip the original separator line - if i < len(lines) and '===' in lines[i]: - i += 1 + current_error_type = None while i < len(lines): line = lines[i] stripped = line.strip() - # Check for second table start - if '|| Subsystem || Report/Notes ||' in line: - break + # Section end markers + if is_tests_run_summary(line): + return i + 1 - # Skip section separator lines (like +-----------) - if stripped.startswith('+--'): + if is_new_surefire_execution(line): + return i + + # Detect error type sections + if re.search(r'^\[\w+\] Failures:', stripped): + current_error_type = 'Failures' i += 1 continue - # Process table rows - if stripped.startswith('|'): - parts = parse_table_row(line) - vote, subsystem, runtime, comment = parts[0], parts[1], parts[2], parts[3] - - # Case 1: Section header (vote and subsystem are empty, has comment) - if not vote and not subsystem: - if comment: - content.append(f'| | | | {comment} |\n') - i += 1 - continue - # If there's only runtime, it's a total time row - elif runtime and is_runtime(runtime): - content.append(f'| | | {runtime} | |\n') - i += 1 - continue - else: - # Empty row, skip - i += 1 - continue - - # Case 2: Data row with vote - if vote in VOTE_EMOJI: - vote_emoji = convert_vote(vote) - comment_parts = [comment] if comment else [] - - # Check for continuation lines - i += 1 - while i < len(lines): - next_line = lines[i] - next_stripped = next_line.strip() - - if not next_stripped.startswith('|'): - break - - # Check for second table start - if '|| Subsystem || Report/Notes ||' in next_line: - break - - next_parts = parse_table_row(next_line) - next_vote, next_subsystem, next_runtime, next_comment = next_parts[0], next_parts[1], next_parts[2], next_parts[3] - - # Stop at new data row - if next_vote in VOTE_EMOJI: - break - - # If vote and subsystem are empty, check if it's a continuation - if not next_vote and not next_subsystem: - # If there's a comment, it's a continuation - if next_comment: - comment_parts.append(next_comment) - i += 1 - # If there's only runtime, it's a standalone total time row - elif next_runtime and is_runtime(next_runtime): - break - else: - i += 1 - else: - break - - comment_text = ' '.join(comment_parts) - content.append(f'| {vote_emoji} | {subsystem} | {runtime} | {comment_text} |\n') - continue - - # Case 3: Other cases, skip + if re.search(r'^\[\w+\] Flakes:', stripped): + current_error_type = 'Flakes' + i += 1 + continue + + if re.search(r'^\[\w+\] Errors:', stripped): + current_error_type = 'Errors' i += 1 continue + # Parse test entries + if current_error_type: + test_match = re.search( + r'^\[\w+\]\s+((?:org\.)?\S+\.(?:\w+\.)*\w+\.\w+)', + stripped + ) + if test_match: + test_name = test_match.group(1) + if 'test' in test_name.lower(): + result[current_module][current_error_type].append(test_name) + i += 1 - return content, i + return i -# TODO: Yetus should support this natively, but docker integration with job summaries doesn't seem -# to work out of the box. -def extract_failed_tests_from_unit_files(output_dir: Path) -> List[Tuple[str, List[str]]]: +def parse_patch_unit_file(file_path: Path) -> Dict[str, Dict[str, List[str]]]: """ - Extract failed test names from patch-unit-*.txt files. + Parse a patch-unit-*.txt file and extract failed tests by module. + + Returns: + Dict mapping module name to error types and test names + {module: {'Failures': [...], 'Flakes': [...], 'Errors': [...]}} + """ + result = defaultdict(lambda: defaultdict(list)) + current_module = None + + with open(file_path, 'r') as f: + lines = f.readlines() + + i = 0 + while i < len(lines): + line = lines[i] + + # Detect module change via surefire plugin execution + module = extract_module_from_surefire(line) + if module: + current_module = module + i += 1 + continue + + # Skip if no module is active + if current_module is None: + i += 1 + continue + + # Parse Results section + if is_results_section_start(line): + i = parse_results_section(lines, i + 1, current_module, result) + continue + + i += 1 + + # Convert defaultdict to regular dict + return {k: dict(v) for k, v in result.items()} - Parses Maven surefire output to find lines like: - [ERROR] org.apache.hadoop.hbase.types.TestPBCell.testRoundTrip + +def aggregate_failed_tests(yetus_dir: Path) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, List[str]]]]: + """ + Aggregate failed tests from all patch-unit-*.txt files. Returns: - List of (module_name, [failed_test_names]) tuples + Tuple of: + - counts: {error_type: {module: count}} + - details: {module: {error_type: [test_names]}} """ - results = [] + patch_files = list(yetus_dir.glob('patch-unit-*.txt')) + + if not patch_files: + return {}, {} + + # Aggregate results from all files + all_results = defaultdict(lambda: defaultdict(list)) - for unit_file in output_dir.glob('patch-unit-*.txt'): - module_name = unit_file.stem.replace('patch-unit-', '') - failed_tests = set() + for patch_file in patch_files: + file_results = parse_patch_unit_file(patch_file) + for module, errors in file_results.items(): + for error_type, tests in errors.items(): + all_results[module][error_type].extend(tests) - with open(unit_file, 'r') as f: - in_failures_section = False - for line in f: - stripped = line.strip() + # Count by type and module + counts = defaultdict(lambda: defaultdict(int)) + for module, errors in all_results.items(): + for error_type, tests in errors.items(): + counts[error_type][module] = len(tests) - if stripped == '[ERROR] Failures:': - in_failures_section = True - continue + return dict(counts), dict(all_results) - if in_failures_section: - if stripped.startswith('[ERROR]') and not stripped.startswith('[ERROR] Run'): - test_name = stripped.replace('[ERROR] ', '').strip() - if test_name and '.' in test_name: - failed_tests.add(test_name) - elif stripped.startswith('[INFO]') or not stripped: - in_failures_section = False - if failed_tests: - results.append((module_name, sorted(failed_tests))) +def generate_failed_tests_table( + counts: Dict[str, Dict[str, int]], + details: Dict[str, Dict[str, List[str]]] +) -> List[str]: + """Generate the Failed Tests HTML table.""" + total_failures = sum(sum(m.values()) for m in counts.values()) + if total_failures == 0: + return [] + + content = [ + '\n## Failed Tests\n\n', + '
| Error Type | Count | Module | Tests |
|---|---|---|---|
| {error_type} | ' + f'{total_count} | ' + f'{module} | {tests_str} |
| {module} | {tests_str} |