diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5cd5138eb69..d2825f9c34b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -5,19 +5,8 @@ ## Contribution process -```mermaid -flowchart LR - A[Pre-checks] --> B[PR Tests] - subgraph Code Review/Approval - C1[Expert Review] --> C2[Final Review] - end - B --> C1 - C2 --> D[Merge] -``` - ### Pre-checks -- [ ] I want this PR in a versioned release and have added the appropriate Milestone (e.g., `Core 0.8`) - [ ] I have added relevant unit tests - [ ] I have added relevant functional tests - [ ] I have added proper typing to my code [Typing guidelines](https://docs.python.org/3/library/typing.html) @@ -26,33 +15,32 @@ flowchart LR ### Code review -The following process is enforced via the CODEOWNERS file for changes into `megatron/core`. For changes outside of `megatron/core`, it is up to the PR author whether or not to tag the Final Reviewer team. +Feel free to message or comment the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) to help accelerate your merge into main. The less complex your PR is, the faster it will be approved and merged! -
-For MRs into `main` branch +All PRs start as **draft**. If you open a non-draft PR, it will be automatically converted to draft. -Feel free to message or comment the @mcore-oncall to help accelerate your merge into main. The less complex your PR is, the faster it will be approved and merged! +#### Step 1: Mark PR as "Ready for Review" -#### (Step 1): Add PR label `Expert Review` +1. When your PR is ready, click **Ready for Review**. +2. An oncall reviewer is auto-assigned and expert reviewers are notified based on your changes. + - Some PRs may jump straight to step 2. This is determined by `.github/CODEOWNERS`. -#### (Step 2): Collect the expert reviewers reviews +:warning: Only mark as ready once merge-conflicts are resolved and the CI is passing. +Final Review might get declined if these requirements are not fulfilled. -1. Attach the `Expert Review` label when your PR is ready for review. -2. GitHub auto-assigns expert reviewers based on your changes. They will get notified and pick up your PR soon. +#### Step 2: Final Review -:warning: Only proceed to the next step once all reviewers have approved, merge-conflict are resolved and the CI is passing. -Final Review might get declined if these requirements are not fulfilled. +For PRs that change `megatron/core`, once all expert reviewers have approved, the `Final Review` label is applied **automatically** and final reviewers are assigned. -#### (Step 3): Final Review +For PRs outside `megatron/core`, this step is skipped. -1. Add `Final Review` label -2. GitHub auto-assigns final reviewers based on your changes. They will get notified and pick up your PR soon. +#### Step 3: Approved -#### (Optional Step 4): Cherry-pick into release branch +Once all required reviewers have approved, the `Approved` label is applied **automatically**. -If this PR also needs to be merged into `core_r*` release branches, after this PR has been merged, select `Cherry-pick` to open a new PR into the release branch. +### Merge -
+Any member of [mcore-engineers](https://github.com/orgs/NVIDIA/teams/mcore-engineers) will be able to merge your PR.
For MRs into `dev` branch @@ -60,7 +48,3 @@ The proposed review process for `dev` branch is under active discussion. MRs are mergable after one approval by either `eharper@nvidia.com` or `zijiey@nvidia.com`.
- -### Merging your PR - -Any member of [core-adlr](https://github.com/orgs/teams/NVIDIA/core-adlr) and [`core-nemo`](https://github.com/orgs/teams/NVIDIA/core-nemo) will be able to merge your PR. diff --git a/.github/workflows/auto-swap-labels.yml b/.github/workflows/auto-swap-labels.yml index 5335026e2af..1a1e84de885 100644 --- a/.github/workflows/auto-swap-labels.yml +++ b/.github/workflows/auto-swap-labels.yml @@ -4,6 +4,10 @@ name: Auto Swap Labels on: pull_request_review: types: [submitted] + pull_request_target: + types: [ready_for_review, synchronize] + branches: + - main permissions: pull-requests: write @@ -12,7 +16,13 @@ permissions: jobs: check-approval: runs-on: ubuntu-latest - if: github.event.review.state == 'approved' && github.repository == 'NVIDIA/Megatron-LM' + if: >- + github.repository == 'NVIDIA/Megatron-LM' && + github.event.pull_request.base.ref == 'main' && + ( + (github.event_name == 'pull_request_review' && github.event.review.state == 'approved') || + (github.event_name == 'pull_request_target' && !github.event.pull_request.draft) + ) steps: - name: Check out repository code uses: actions/checkout@v4 @@ -26,7 +36,7 @@ jobs: run: | pip install --no-cache-dir PyGithub slack-sdk - - name: Run Auto Reminder Bot + - name: Run Auto Swap Labels run: | export GH_TOKEN=${{ github.token }} export PR_NUMBER=${{ github.event.pull_request.number }} diff --git a/.github/workflows/force-draft-pr.yml b/.github/workflows/force-draft-pr.yml new file mode 100644 index 00000000000..d45dabf14b7 --- /dev/null +++ b/.github/workflows/force-draft-pr.yml @@ -0,0 +1,36 @@ +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +name: Force Draft PR + +on: + pull_request_target: + types: [opened] + branches: + - main + +permissions: + pull-requests: write + +jobs: + force-draft: + runs-on: ubuntu-latest + if: ${{ !github.event.pull_request.draft && github.repository == 'NVIDIA/Megatron-LM' }} + steps: + - name: Convert PR to draft + env: + GH_TOKEN: ${{ secrets.PAT }} + run: | + gh pr ready --undo ${{ github.event.pull_request.number }} --repo ${{ github.repository }} + + - name: Add comment explaining draft policy + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr comment ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --body \ + "This PR has been automatically converted to **draft** because all PRs must start as drafts. + + When you are ready for review, click **Ready for Review** to begin the review process. This will: + 1. Add the oncall reviewer (optional reviewer) + 2. Add required review teams based on your changes + + See the [contribution guide](https://github.com/NVIDIA/Megatron-LM/blob/main/docs/developer/submit.md) for more details." diff --git a/.github/workflows/oncall-assign.yml b/.github/workflows/oncall-assign.yml index d4cc47d5f9e..87f60dd32f3 100644 --- a/.github/workflows/oncall-assign.yml +++ b/.github/workflows/oncall-assign.yml @@ -16,7 +16,7 @@ name: Oncall Assign on: pull_request_target: - types: [opened, ready_for_review] + types: [ready_for_review] branches: - main diff --git a/docs/developer/oncall.md b/docs/developer/oncall.md index ee5582ca24d..0e5b38e2708 100644 --- a/docs/developer/oncall.md +++ b/docs/developer/oncall.md @@ -40,13 +40,13 @@ Below is the checklist that the oncall needs to go through for each PR. - Do all tests pass? - Oncall will need to kick off testing suite for external reviewers - Comment “/ok to test commid_id” to kick off testing suite -- Add the “Expert Review” label - - Select an expert reviewer from each expert group as a reviewer. If you’re unsure who to select, pick a “maintainer” or manager. +- Expert reviewers are notified after the PR is marked “Ready for Review” - **Expert reviewers should review within 1 business day.** Message the assigned reviewer if it is taking longer. The reviewer either needs to review the PR or suggest an alternate reviewer. - - If the reviewer is not responding after 2 business days, escalate to the reviewer's manager. -- Add the “Final Review” label after experts approve + - If the reviewer is not responding after 2 business days, escalate to the reviewer’s manager. +- For `megatron/core` PRs, the “Final Review” label is applied automatically once all expert reviewers approve - Final reviewers should review within 1 business day. Message the assigned reviewer if it is taking longer. - - If the reviewer is not responding after 2 business days, escalate to the reviewer's manager. + - If the reviewer is not responding after 2 business days, escalate to the reviewer’s manager. +- The “Approved” label is applied automatically once all required reviewers have approved ## Issues and Discussion Questions diff --git a/docs/developer/submit.md b/docs/developer/submit.md index a46df22f85c..205e18cc52f 100644 --- a/docs/developer/submit.md +++ b/docs/developer/submit.md @@ -9,17 +9,26 @@ # How to Submit a PR -## Step 1: Add PR label `Expert Review` +All PRs start as **draft**. If you open a non-draft PR, it will be automatically converted to draft. -## Step 2: Collect the expert reviewers reviews +## Step 1: Mark PR as "Ready for Review" -1. Attach the `Expert Review` label when your PR is ready for review. -2. GitHub auto-assigns expert reviewers based on your changes. They will get notified and pick up your PR soon. +1. When your PR is ready, click **Ready for Review**. +2. The oncall reviewer is auto-assigned and expert reviewers are notified based on your changes. They will get notified and pick up your PR soon. -:warning: Only proceed to the next step once all reviewers have approved, merge-conflict are resolved and the CI is passing. +:warning: Only mark as ready once all merge-conflicts are resolved and the CI is passing. Final Review might get declined if these requirements are not fulfilled. -## Step 3: Final Review +## Step 2: Final Review (`megatron/core` only) -1. Add `Final Review` label -2. GitHub auto-assigns final reviewers based on your changes. They will get notified and pick up your PR soon. +For PRs that change `megatron/core`, once all expert reviewers have approved, the `Final Review` label is applied **automatically** and final reviewers are assigned. + +For PRs outside `megatron/core`, this step is skipped. + +## Step 3: Approved + +Once all required reviewers have approved, the `Approved` label is applied **automatically**. The PR is now ready to merge. + +## Step 4: Merge + +Any member of [mcore-engineers](https://github.com/orgs/NVIDIA/teams/mcore-engineers) will be able to merge your PR. diff --git a/tests/test_utils/python_scripts/auto_reminder_github.py b/tests/test_utils/python_scripts/auto_reminder_github.py index eca1108dcc6..0329b2b23e0 100644 --- a/tests/test_utils/python_scripts/auto_reminder_github.py +++ b/tests/test_utils/python_scripts/auto_reminder_github.py @@ -40,6 +40,7 @@ class Reminder: class PRReviewTracker: EXPERT_REVIEW = "Expert Review" FINAL_REVIEW = "Final Review" + APPROVED = "Approved" EXCLUDED_TEAMS = {"core-adlr", "core-nemo"} def __init__( @@ -127,6 +128,11 @@ def get_label_date(self, pr, label: str): ] return max(dates) if dates else None + def get_ready_for_review_date(self, pr): + """Get the date a PR was marked as ready for review.""" + dates = [e.created_at for e in pr.as_issue().get_events() if e.event == "ready_for_review"] + return max(dates) if dates else None + def days_since(self, date): """Calculate days since given date.""" if not date: @@ -138,11 +144,22 @@ def days_since(self, date): def get_stage(self, pr): """Get current review stage.""" labels = {l.name for l in pr.labels} - return self.FINAL_REVIEW if self.FINAL_REVIEW in labels else self.EXPERT_REVIEW + if self.APPROVED in labels: + return self.APPROVED + if self.FINAL_REVIEW in labels: + return self.FINAL_REVIEW + return self.EXPERT_REVIEW def get_reviewers(self, pr): """Get filtered reviewer emails who haven't approved yet.""" stage = self.get_stage(pr) + + if stage == self.APPROVED: + return ( + [self.get_user_email(pr.user.login)], + "All reviewers have approved. Please merge the PR.", + ) + org = self.github.get_organization(self.repo.organization.login) # 1. Get the latest review state for everyone who has submitted a review @@ -207,9 +224,19 @@ def get_reviewers(self, pr): # 8. Handle the original edge cases if len(reviewer_emails) == 0: if stage == self.EXPERT_REVIEW: + # No reviewer activity yet — assignment hasn't completed (e.g. PR just became + # ready-for-review). Don't fire a spurious "all approved" message. + has_reviewer_activity = bool( + approvers + or non_approving_reviewers + or pending_individuals + or pending_teams_slugs + ) + if not has_reviewer_activity: + return [], "Waiting for reviewers to be assigned." # Assign to PR author reviewer_emails = [self.get_user_email(pr.user.login)] - action_message = "All Expert Reviewers approved the PR. Please attach the Final Review label to proceed with the review." + action_message = "All Expert Reviewers have approved the PR." elif stage == self.FINAL_REVIEW: # Assign to mcore-reviewers who approved try: @@ -217,7 +244,7 @@ def get_reviewers(self, pr): mcore_members = {m.login for m in mcore_team.get_members()} valid_approvers = approvers & mcore_members reviewer_emails = sorted([self.get_user_email(u) for u in valid_approvers]) - action_message = "All Final Reviewers approved the PR. Please ping an Expert or Final Reviewer to merge the PR." + action_message = "All Final Reviewers approved the PR. Please ping the @mcore-oncall to merge the PR." except Exception as e: logger.warning( @@ -229,7 +256,14 @@ def get_reviewers(self, pr): def create_reminder(self, pr): """Create reminder for PR.""" stage = self.get_stage(pr) - stage_days = self.days_since(self.get_label_date(pr, stage)) + ready_date = self.get_ready_for_review_date(pr) + if stage == self.EXPERT_REVIEW: + stage_days = self.days_since(ready_date) + elif stage in (self.FINAL_REVIEW, self.APPROVED): + stage_days = self.days_since(self.get_label_date(pr, stage)) + else: + stage_days = 0 + total_review_days = self.days_since(ready_date) author_email = self.get_user_email(pr.user.login) reviewer_emails, action_message = self.get_reviewers(pr) escaped_title = html.escape(pr.title, quote=False) @@ -241,7 +275,7 @@ def create_reminder(self, pr): author=self.get_slack_user_id(author_email), priority="P0" if stage_days > 3 else "P1" if stage_days >= 1 else "P2", review_stage=stage, - total_review_time=self.days_since(self.get_label_date(pr, self.EXPERT_REVIEW)), + total_review_time=total_review_days, current_stage_time=stage_days, reviewers=[self.get_slack_user_id(email) for email in reviewer_emails], action_message=action_message, @@ -256,12 +290,11 @@ def generate_reminders(self): reminders = [] for milestone in milestones: - # Find issues with the 'Expert Review' or 'Final Review' label + # Find all open non-draft PRs with this milestone query = ( f'repo:"{self.repo.full_name}" ' f'milestone:"{milestone.title}" ' - f'is:open is:pr -is:draft ' - f'label:"{self.EXPERT_REVIEW}","{self.FINAL_REVIEW}"' + f'is:open is:pr -is:draft' ) try: # Use search_issues for a more direct query instead of get_issues + filtering diff --git a/tests/test_utils/python_scripts/swap_pr_labels.py b/tests/test_utils/python_scripts/swap_pr_labels.py index aa5875443f8..a98a83b4d20 100644 --- a/tests/test_utils/python_scripts/swap_pr_labels.py +++ b/tests/test_utils/python_scripts/swap_pr_labels.py @@ -35,6 +35,7 @@ class Reminder: class PRReviewTracker: EXPERT_REVIEW = "Expert Review" FINAL_REVIEW = "Final Review" + APPROVED = "Approved" EXCLUDED_TEAMS = {"core-adlr", "core-nemo"} def __init__(self, token: str, repo_name: str, pr_number: str): @@ -43,26 +44,49 @@ def __init__(self, token: str, repo_name: str, pr_number: str): self.pr = self.repo.get_pull(pr_number) self.stage = self.get_stage(self.pr) self.org = self.github.get_organization(self.repo.organization.login) + self._team_cache = {} def get_stage(self, pr): """Get current review stage.""" labels = {l.name for l in pr.labels} - return self.FINAL_REVIEW if self.FINAL_REVIEW in labels else self.EXPERT_REVIEW + if self.APPROVED in labels: + return self.APPROVED + if self.FINAL_REVIEW in labels: + return self.FINAL_REVIEW + return self.EXPERT_REVIEW + + def _get_team_members(self, slug): + """Get all members of a team, with caching.""" + if slug not in self._team_cache: + try: + self._team_cache[slug] = { + m.login for m in self.org.get_team_by_slug(slug).get_members() + } + except Exception as e: + logger.warning(f"Could not get members for team {slug}: {e}") + self._team_cache[slug] = set() + return self._team_cache[slug] + + def _get_teams_members(self, slugs): + """Get all members of multiple teams.""" + members = set() + for slug in slugs: + members.update(self._get_team_members(slug)) + return members def swap_labels(self): - """Get filtered reviewer emails who haven't approved yet.""" + """Evaluate review state and update labels accordingly.""" pr = self.pr - if self.stage == self.FINAL_REVIEW: - logger.info(f"PR #{self.pr.number} is in the {self.stage} stage. No reviewers needed.") + if pr.draft: + logger.info(f"PR #{pr.number} is a draft. Skipping label swap.") return # 1. Get the latest review state for everyone who has submitted a review latest_reviews = {} try: for review in pr.get_reviews(): - if not review.user: # Handle rare cases of deleted users + if not review.user: continue - # Only track 'APPROVED' or 'CHANGES_REQUESTED' as definitive states if review.state in ("APPROVED", "CHANGES_REQUESTED"): if ( review.user.login not in latest_reviews @@ -72,61 +96,139 @@ def swap_labels(self): except Exception as e: logger.warning(f"Could not get reviews for PR #{pr.number}: {e}") - # 2. Separate reviewers into approvers (List B) and non-approvers approvers = {user for user, review in latest_reviews.items() if review.state == "APPROVED"} - non_approving_reviewers = { + non_approvers = { user for user, review in latest_reviews.items() if review.state == "CHANGES_REQUESTED" } - # 3. Get all *currently pending* review requests + # 2. Get all currently pending review requests try: pending_users_req, pending_teams_req = pr.get_review_requests() pending_individuals = {r.login for r in pending_users_req} - pending_teams_slugs = {t.slug for t in pending_teams_req} + pending_team_slugs = {t.slug for t in pending_teams_req} except Exception as e: logger.warning(f"Could not get review requests for PR #{pr.number}: {e}") pending_individuals = set() - pending_teams_slugs = set() - - # 4. Filter pending teams based on the current stage - teams_to_query = ( - pending_teams_slugs - self.EXCLUDED_TEAMS - if self.stage == self.EXPERT_REVIEW - else pending_teams_slugs & self.EXCLUDED_TEAMS - ) - - # 5. Get members from the required pending teams - pending_team_members = set() - for slug in teams_to_query: + pending_team_slugs = set() + + # 3. Classify teams into expert vs final (excluded) + expert_team_slugs = pending_team_slugs - self.EXCLUDED_TEAMS + final_team_slugs = pending_team_slugs & self.EXCLUDED_TEAMS + + # 4. Get team members + expert_team_members = self._get_teams_members(expert_team_slugs) + all_excluded_members = self._get_teams_members(self.EXCLUDED_TEAMS) + + # 5. Compute pending expert reviewers + expert_non_approvers = non_approvers - all_excluded_members + pending_expert = ( + pending_individuals | expert_team_members | expert_non_approvers + ) - approvers + logger.info(f"Pending expert reviewers: {pending_expert}") + + # 6. Compute pending final reviewers + final_pending_members = self._get_teams_members(final_team_slugs) + final_non_approvers = non_approvers & all_excluded_members + pending_final = (final_pending_members | final_non_approvers) - approvers + logger.info(f"Pending final reviewers: {pending_final}") + + # 7. Determine if final review is needed at all (excluded teams are assigned) + excluded_who_reviewed = (approvers | non_approvers) & all_excluded_members + needs_final_review = bool(final_team_slugs) or bool(excluded_who_reviewed) + + # 8. Guard: if no reviewers exist at all, the review process hasn't started yet. + # This prevents a race condition where the swap script runs before the oncall + # reviewer is assigned (both trigger on ready_for_review concurrently). + has_any_reviewers = pending_individuals or pending_team_slugs or approvers or non_approvers + if not has_any_reviewers and self.stage == self.EXPERT_REVIEW: + logger.info(f"PR #{pr.number} has no reviewers assigned yet. Skipping.") + return + + # 9. State machine: update labels based on current stage and pending reviewers + if self.stage == self.APPROVED: + self._handle_approved_stage(pr, pending_expert, pending_final) + elif self.stage == self.FINAL_REVIEW: + self._handle_final_review_stage(pr, pending_expert, pending_final) + else: + self._handle_expert_review_stage(pr, pending_expert, pending_final, needs_final_review) + + def _handle_approved_stage(self, pr, pending_expert, pending_final): + """Handle PRs that already have the Approved label.""" + if len(pending_expert) > 0 or len(pending_final) > 0: + # New reviewers appeared — revert try: - pending_team_members.update( - m.login for m in self.org.get_team_by_slug(slug).get_members() + pr.remove_from_labels(self.APPROVED) + logger.info(f'Removed "{self.APPROVED}" from PR #{pr.number}') + except Exception as e: + logger.warning(f'Failed to remove "{self.APPROVED}" from PR #{pr.number}: {e}') + + if len(pending_expert) > 0: + # Back to expert review — also remove Final Review if present + try: + pr.remove_from_labels(self.FINAL_REVIEW) + except Exception: + pass + logger.info( + f'Reverted PR #{pr.number} to expert review — pending: {pending_expert}' + ) + else: + # Expert review done but final review needed again + try: + pr.add_to_labels(self.FINAL_REVIEW) + except Exception: + pass + logger.info(f'Reverted PR #{pr.number} to final review — pending: {pending_final}') + else: + logger.info(f"PR #{pr.number} is approved. No changes needed.") + + def _handle_final_review_stage(self, pr, pending_expert, pending_final): + """Handle PRs in the Final Review stage.""" + if len(pending_expert) > 0: + # New expert reviewers appeared — revert to expert review + try: + pr.remove_from_labels(self.FINAL_REVIEW) + logger.info( + f'Removed "{self.FINAL_REVIEW}" from PR #{pr.number} — ' + f'new expert reviewers pending: {pending_expert}' ) except Exception as e: - logger.warning(f"Could not get members for team {slug} on PR #{pr.number}: {e}") - - # 6. "List A": Combine all users who *still need to review* - all_required_reviewers = ( - pending_individuals | pending_team_members | non_approving_reviewers - ) - - # 7. Final list (List A - List B): - pending_reviewers = all_required_reviewers - approvers - logger.info(f"Pending reviewers: {pending_reviewers}") - if len(pending_reviewers) == 0: + logger.warning(f'Failed to remove "{self.FINAL_REVIEW}" from PR #{pr.number}: {e}') + elif len(pending_final) == 0: + # All final reviewers approved — move to Approved, remove Final Review try: - pr.remove_from_labels(self.EXPERT_REVIEW) - logger.info(f'Removed "{self.EXPERT_REVIEW}" label from PR #{pr.number}') + pr.remove_from_labels(self.FINAL_REVIEW) + logger.info(f'Removed "{self.FINAL_REVIEW}" from PR #{pr.number}') except Exception as e: - logger.warning( - f'Failed to remove "{self.EXPERT_REVIEW}" label from PR #{pr.number}: {e}' - ) + logger.warning(f'Failed to remove "{self.FINAL_REVIEW}" from PR #{pr.number}: {e}') + try: + pr.add_to_labels(self.APPROVED) + logger.info(f'Added "{self.APPROVED}" to PR #{pr.number}') + except Exception as e: + logger.warning(f'Failed to add "{self.APPROVED}" to PR #{pr.number}: {e}') + else: + logger.info(f"PR #{pr.number} is in final review. Pending: {pending_final}") + + def _handle_expert_review_stage(self, pr, pending_expert, pending_final, needs_final_review): + """Handle PRs in the Expert Review stage (no review labels yet).""" + if len(pending_expert) > 0: + logger.info(f"PR #{pr.number} is in expert review. Pending: {pending_expert}") + return + # All expert reviewers approved + if needs_final_review and len(pending_final) > 0: + # Final review teams are assigned and still pending try: pr.add_to_labels(self.FINAL_REVIEW) - logger.info(f'Added "{self.FINAL_REVIEW}" label to PR #{pr.number}') + logger.info(f'Added "{self.FINAL_REVIEW}" to PR #{pr.number}') + except Exception as e: + logger.warning(f'Failed to add "{self.FINAL_REVIEW}" to PR #{pr.number}: {e}') + else: + # No final review needed, or final reviewers already approved + try: + pr.add_to_labels(self.APPROVED) + logger.info(f'Added "{self.APPROVED}" to PR #{pr.number}') except Exception as e: - logger.warning(f'Failed to add "{self.FINAL_REVIEW}" label to PR #{pr.number}: {e}') + logger.warning(f'Failed to add "{self.APPROVED}" to PR #{pr.number}: {e}') def main():