From 44ec0455266b74aa079cd421f43cac0813789e74 Mon Sep 17 00:00:00 2001 From: "abuzarmahmood (aider)" Date: Wed, 12 Mar 2025 01:33:10 +0000 Subject: [PATCH 1/5] feat: Implement fuzzy branch matching using fuzz.partial_ratio for improved issue-branch association --- src/branch_handler.py | 10 ++++++-- src/git_utils.py | 56 ++++++++++++++++++++++++++++++++++++------- src/response_agent.py | 1 + 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/src/branch_handler.py b/src/branch_handler.py index badb9d4..322c6f1 100644 --- a/src/branch_handler.py +++ b/src/branch_handler.py @@ -5,6 +5,7 @@ import git from github.Issue import Issue from typing import List, Optional, Tuple +from fuzzywuzzy import fuzz def get_issue_related_branches( @@ -42,10 +43,13 @@ def get_issue_related_branches( repo = git.Repo(repo_path) possible_branch_name = f"{issue.number}-{'-'.join(issue.title.lower().split(' '))}" + fuzzy_threshold = 80 # Threshold for fuzzy matching (80% similarity) # Check local branches for branch in repo.heads: - if possible_branch_name in branch.name: + # Use partial_ratio for fuzzy matching to find similar branch names + similarity = fuzz.partial_ratio(possible_branch_name, branch.name) + if similarity > fuzzy_threshold: related_branches.append((branch.name, False)) # Check remote branches @@ -56,7 +60,9 @@ def get_issue_related_branches( continue # Remove remote name prefix for comparison branch_name = ref.name.split('/', 1)[1] - if possible_branch_name in branch_name: + # Use partial_ratio for fuzzy matching + similarity = fuzz.partial_ratio(possible_branch_name, branch_name) + if similarity > fuzzy_threshold: related_branches.append((branch_name, True)) os.chdir(orig_dir) diff --git a/src/git_utils.py b/src/git_utils.py index fbd024f..fecbbf1 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -167,6 +167,34 @@ def update_repository(repo_path: str) -> None: origin.pull() +def select_best_branch(issue: Issue, branches: list) -> str: + """ + Select the best branch from multiple candidates using fuzzy matching + + Args: + issue: The GitHub issue to match against + branches: List of branch names to choose from + + Returns: + The best matching branch name + """ + issue_title = issue.title.lower() + issue_number = str(issue.number) + + # First try to find branches that contain the issue number + number_branches = [b for b in branches if issue_number in b] + + if number_branches: + # If we have branches with the issue number, use fuzzy matching on those + best_branch = max(number_branches, + key=lambda b: fuzz.partial_ratio(issue_title, b)) + return best_branch + + # If no branches with issue number, use fuzzy matching on all branches + best_branch = max(branches, + key=lambda b: fuzz.partial_ratio(issue_title, b)) + return best_branch + def get_development_branch(issue: Issue, repo_path: str, create: bool = False) -> str: """ Gets or creates a development branch for an issue @@ -187,8 +215,11 @@ def get_development_branch(issue: Issue, repo_path: str, create: bool = False) - # Check for existing branches related to this issue related_branches = get_issue_related_branches(repo_path, issue) + # Process branches with fuzzy matching scores unique_branches = set([branch_name for branch_name, _ in related_branches]) branch_dict = {} + + # Create a dictionary of branch names with their remote status for branch_name in unique_branches: branch_dict[branch_name] = [] wanted_inds = [i for i, (name, _) in enumerate( @@ -199,15 +230,22 @@ def get_development_branch(issue: Issue, repo_path: str, create: bool = False) - comments = get_issue_comments(issue) if len(branch_dict) > 1: - branch_list = "\n".join( - [f"- {branch_name} : Remote = {is_remote}" - for branch_name, is_remote in branch_dict.items()] - ) - error_msg = f"Found multiple branches for issue #{issue.number}:\n{branch_list}\n" +\ - "Please delete or use existing branches before creating a new one." - if "Found multiple branches" not in comments[-1].body: - write_issue_response(issue, error_msg) - raise RuntimeError(error_msg) + # Try to select the best branch using fuzzy matching + try: + best_branch = select_best_branch(issue, list(branch_dict.keys())) + print(f"Selected best matching branch: {best_branch}") + return best_branch + except Exception as e: + # If selection fails, fall back to the error message + branch_list = "\n".join( + [f"- {branch_name} : Remote = {is_remote}" + for branch_name, is_remote in branch_dict.items()] + ) + error_msg = f"Found multiple branches for issue #{issue.number}:\n{branch_list}\n" +\ + "Please delete or use existing branches before creating a new one." + if "Found multiple branches" not in comments[-1].body: + write_issue_response(issue, error_msg) + raise RuntimeError(error_msg) elif len(branch_dict) == 1: return list(branch_dict.keys())[0] elif create: diff --git a/src/response_agent.py b/src/response_agent.py index 5ac0968..5c1e002 100644 --- a/src/response_agent.py +++ b/src/response_agent.py @@ -46,6 +46,7 @@ import json import re from urlextract import URLExtract +from fuzzywuzzy import fuzz load_dotenv() From 4e67941b7cf25d617fb7d4fbd5c907850073f16e Mon Sep 17 00:00:00 2001 From: "abuzarmahmood (aider)" Date: Wed, 12 Mar 2025 01:33:18 +0000 Subject: [PATCH 2/5] fix: Add missing fuzzywuzzy import in git_utils.py --- src/git_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/git_utils.py b/src/git_utils.py index fecbbf1..81d8eb1 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -5,6 +5,7 @@ import os import subprocess import git +from fuzzywuzzy import fuzz from branch_handler import ( get_issue_related_branches, get_current_branch, From d9a06461891d7b3bb1417022715172755c1d22dc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Mar 2025 01:33:26 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/branch_handler.py | 3 ++- src/git_utils.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/branch_handler.py b/src/branch_handler.py index 322c6f1..aeb9c18 100644 --- a/src/branch_handler.py +++ b/src/branch_handler.py @@ -61,7 +61,8 @@ def get_issue_related_branches( # Remove remote name prefix for comparison branch_name = ref.name.split('/', 1)[1] # Use partial_ratio for fuzzy matching - similarity = fuzz.partial_ratio(possible_branch_name, branch_name) + similarity = fuzz.partial_ratio( + possible_branch_name, branch_name) if similarity > fuzzy_threshold: related_branches.append((branch_name, True)) diff --git a/src/git_utils.py b/src/git_utils.py index 81d8eb1..df12a41 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -171,31 +171,32 @@ def update_repository(repo_path: str) -> None: def select_best_branch(issue: Issue, branches: list) -> str: """ Select the best branch from multiple candidates using fuzzy matching - + Args: issue: The GitHub issue to match against branches: List of branch names to choose from - + Returns: The best matching branch name """ issue_title = issue.title.lower() issue_number = str(issue.number) - + # First try to find branches that contain the issue number number_branches = [b for b in branches if issue_number in b] - + if number_branches: # If we have branches with the issue number, use fuzzy matching on those - best_branch = max(number_branches, + best_branch = max(number_branches, key=lambda b: fuzz.partial_ratio(issue_title, b)) return best_branch - + # If no branches with issue number, use fuzzy matching on all branches - best_branch = max(branches, + best_branch = max(branches, key=lambda b: fuzz.partial_ratio(issue_title, b)) return best_branch + def get_development_branch(issue: Issue, repo_path: str, create: bool = False) -> str: """ Gets or creates a development branch for an issue @@ -219,7 +220,7 @@ def get_development_branch(issue: Issue, repo_path: str, create: bool = False) - # Process branches with fuzzy matching scores unique_branches = set([branch_name for branch_name, _ in related_branches]) branch_dict = {} - + # Create a dictionary of branch names with their remote status for branch_name in unique_branches: branch_dict[branch_name] = [] From 0030825c26f82bb234ef5db08c2bb06d0b66a333 Mon Sep 17 00:00:00 2001 From: "Abuzar Mahmood (aider)" Date: Mon, 17 Mar 2025 11:42:33 -0400 Subject: [PATCH 4/5] feat: Enhance branch search and PR creation with fuzzy matching and branch name comments This commit message captures the key improvements made across the files: 1. Improved fuzzy search for finding issue-related branches 2. Added branch name to PR creation comments 3. Enhanced branch selection algorithm The changes implement more robust branch matching and provide better visibility into the branch used for creating pull requests. --- src/branch_handler.py | 27 +++++++++++++++++++++++---- src/git_utils.py | 31 +++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/branch_handler.py b/src/branch_handler.py index aeb9c18..e854eef 100644 --- a/src/branch_handler.py +++ b/src/branch_handler.py @@ -14,13 +14,14 @@ def get_issue_related_branches( ) -> List[Tuple[str, bool]]: """ Uses `gh issue develop -l ` to get all branches related to an issue number + and falls back to fuzzy matching if no branches are found through GitHub CLI Args: repo_path: Path to local git repository - issue_number: GitHub issue number to search for + issue: GitHub issue to search for Returns: - List of tuples containing (branch_name, url) + List of tuples containing (branch_name, is_remote) """ issue_number = issue.number @@ -40,13 +41,25 @@ def get_issue_related_branches( print(f"Error getting related branches: {str(e)}") if len(related_branches) == 0: - + # Fall back to fuzzy matching if no branches found through GitHub CLI repo = git.Repo(repo_path) + + # Create a possible branch name based on issue number and title possible_branch_name = f"{issue.number}-{'-'.join(issue.title.lower().split(' '))}" - fuzzy_threshold = 80 # Threshold for fuzzy matching (80% similarity) + + # Also check for just the issue number in branch names + issue_number_str = str(issue.number) + + # Set threshold for fuzzy matching (80% similarity) + fuzzy_threshold = 80 # Check local branches for branch in repo.heads: + # First check if branch contains the issue number + if issue_number_str in branch.name: + related_branches.append((branch.name, False)) + continue + # Use partial_ratio for fuzzy matching to find similar branch names similarity = fuzz.partial_ratio(possible_branch_name, branch.name) if similarity > fuzzy_threshold: @@ -60,6 +73,12 @@ def get_issue_related_branches( continue # Remove remote name prefix for comparison branch_name = ref.name.split('/', 1)[1] + + # First check if branch contains the issue number + if issue_number_str in branch_name: + related_branches.append((branch_name, True)) + continue + # Use partial_ratio for fuzzy matching similarity = fuzz.partial_ratio( possible_branch_name, branch_name) diff --git a/src/git_utils.py b/src/git_utils.py index df12a41..fd55da3 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -181,19 +181,28 @@ def select_best_branch(issue: Issue, branches: list) -> str: """ issue_title = issue.title.lower() issue_number = str(issue.number) + + # Generate a normalized branch name from the issue + normalized_branch = f"{issue_number}-{'-'.join(issue_title.split())}" # First try to find branches that contain the issue number number_branches = [b for b in branches if issue_number in b] if number_branches: - # If we have branches with the issue number, use fuzzy matching on those + if len(number_branches) == 1: + # If only one branch with the issue number, return it + return number_branches[0] + + # If multiple branches with issue number, use fuzzy matching on those + # Use token_sort_ratio for better matching with word order differences best_branch = max(number_branches, - key=lambda b: fuzz.partial_ratio(issue_title, b)) + key=lambda b: fuzz.token_sort_ratio(normalized_branch, b)) return best_branch # If no branches with issue number, use fuzzy matching on all branches + # Use token_sort_ratio for better matching with word order differences best_branch = max(branches, - key=lambda b: fuzz.partial_ratio(issue_title, b)) + key=lambda b: fuzz.token_sort_ratio(normalized_branch, b)) return best_branch @@ -304,6 +313,9 @@ def create_pull_request(repo_path: str) -> str: # Change to repo directory original_dir = os.getcwd() os.chdir(repo_path) + + # Get current branch name + current_branch = get_current_branch(repo_path) # Create pull request result = subprocess.run(['gh', 'pr', 'create', '--fill'], @@ -314,8 +326,9 @@ def create_pull_request(repo_path: str) -> str: # Return to original directory os.chdir(original_dir) - # Return the PR URL from the output - return result.stdout.strip() + # Return the PR URL from the output with branch information + pr_url = result.stdout.strip() + return pr_url, current_branch except FileNotFoundError: raise ValueError("GitHub CLI (gh) not found. Please install it first.") @@ -335,7 +348,13 @@ def create_pull_request_from_issue(issue: Issue, repo_path: str) -> str: URL of the created pull request """ branch = get_development_branch(issue, repo_path) - return create_pull_request(repo_path) + pr_url, branch_name = create_pull_request(repo_path) + + # Add a comment to the issue with the branch name used for the PR + comment_text = f"Created pull request from branch: `{branch_name}`\n{pr_url}" + write_issue_response(issue, comment_text) + + return pr_url def push_changes_with_authentication( From 9a69195911cc1973e6f3b36fb5d7a6a53b23ca01 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:42:40 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/branch_handler.py | 12 ++++++------ src/git_utils.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/branch_handler.py b/src/branch_handler.py index e854eef..7b133bc 100644 --- a/src/branch_handler.py +++ b/src/branch_handler.py @@ -43,13 +43,13 @@ def get_issue_related_branches( if len(related_branches) == 0: # Fall back to fuzzy matching if no branches found through GitHub CLI repo = git.Repo(repo_path) - + # Create a possible branch name based on issue number and title possible_branch_name = f"{issue.number}-{'-'.join(issue.title.lower().split(' '))}" - + # Also check for just the issue number in branch names issue_number_str = str(issue.number) - + # Set threshold for fuzzy matching (80% similarity) fuzzy_threshold = 80 @@ -59,7 +59,7 @@ def get_issue_related_branches( if issue_number_str in branch.name: related_branches.append((branch.name, False)) continue - + # Use partial_ratio for fuzzy matching to find similar branch names similarity = fuzz.partial_ratio(possible_branch_name, branch.name) if similarity > fuzzy_threshold: @@ -73,12 +73,12 @@ def get_issue_related_branches( continue # Remove remote name prefix for comparison branch_name = ref.name.split('/', 1)[1] - + # First check if branch contains the issue number if issue_number_str in branch_name: related_branches.append((branch_name, True)) continue - + # Use partial_ratio for fuzzy matching similarity = fuzz.partial_ratio( possible_branch_name, branch_name) diff --git a/src/git_utils.py b/src/git_utils.py index fd55da3..78c1faa 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -181,7 +181,7 @@ def select_best_branch(issue: Issue, branches: list) -> str: """ issue_title = issue.title.lower() issue_number = str(issue.number) - + # Generate a normalized branch name from the issue normalized_branch = f"{issue_number}-{'-'.join(issue_title.split())}" @@ -192,7 +192,7 @@ def select_best_branch(issue: Issue, branches: list) -> str: if len(number_branches) == 1: # If only one branch with the issue number, return it return number_branches[0] - + # If multiple branches with issue number, use fuzzy matching on those # Use token_sort_ratio for better matching with word order differences best_branch = max(number_branches, @@ -313,7 +313,7 @@ def create_pull_request(repo_path: str) -> str: # Change to repo directory original_dir = os.getcwd() os.chdir(repo_path) - + # Get current branch name current_branch = get_current_branch(repo_path) @@ -349,11 +349,11 @@ def create_pull_request_from_issue(issue: Issue, repo_path: str) -> str: """ branch = get_development_branch(issue, repo_path) pr_url, branch_name = create_pull_request(repo_path) - + # Add a comment to the issue with the branch name used for the PR comment_text = f"Created pull request from branch: `{branch_name}`\n{pr_url}" write_issue_response(issue, comment_text) - + return pr_url