From 8000b21884887b7462b4bef69455b49adbb0eab0 Mon Sep 17 00:00:00 2001 From: "abuzarmahmood (aider)" Date: Thu, 27 Feb 2025 01:15:38 +0000 Subject: [PATCH 1/3] feat: Implement issue timeline caching mechanism for efficient GitHub issue processing --- src/check_issues.py | 101 ++++++++++++++++++++++++++++++++++++++++++++ src/git_utils.py | 90 ++++++++++++++++++++++++++++++++++++++- src/triggers.py | 25 ++++++++++- 3 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 src/check_issues.py diff --git a/src/check_issues.py b/src/check_issues.py new file mode 100644 index 0000000..a191a00 --- /dev/null +++ b/src/check_issues.py @@ -0,0 +1,101 @@ +""" +Main script to check GitHub issues with caching optimization +""" +import os +import sys +from typing import Dict, List + +# Add the src directory to the path if needed +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from github import Issue +from git_utils import ( + get_github_client, + get_repository, + get_open_issues, + load_cache_from_file, + save_cache_to_file +) +from triggers import ( + should_run_detailed_checks, + has_blech_bot_tag, + has_generate_edit_command_trigger, + has_bot_response, + has_user_feedback, + has_develop_issue_trigger, + has_pull_request_trigger, + has_pr_creation_comment, + has_user_comment_on_pr +) +from response_agent import process_issue + + +def check_all_triggers(issue: Issue) -> Dict[str, bool]: + """ + Run all trigger checks on an issue + + Args: + issue: The GitHub issue to check + + Returns: + Dictionary with trigger names as keys and boolean results as values + """ + return { + 'has_blech_bot_tag': has_blech_bot_tag(issue), + 'has_generate_edit_command_trigger': has_generate_edit_command_trigger(issue), + 'has_bot_response': has_bot_response(issue), + 'has_user_feedback': has_user_feedback(issue), + 'has_develop_issue_trigger': has_develop_issue_trigger(issue), + 'has_pull_request_trigger': has_pull_request_trigger(issue), + 'has_pr_creation_comment': has_pr_creation_comment(issue)[0], + 'has_user_comment_on_pr': has_user_comment_on_pr(issue) + } + + +def main(): + """Main function to check issues with caching optimization""" + # Load the cache from file + cache = load_cache_from_file() + + # Initialize GitHub client and get repository + client = get_github_client() + + # Get repositories from config file + repo_names = [] + with open('config/repos.txt', 'r') as f: + repo_names = [line.strip() for line in f if line.strip()] + + for repo_name in repo_names: + try: + repo = get_repository(client, repo_name) + issues = get_open_issues(repo) + + print(f"Checking {len(issues)} open issues in {repo_name}") + + for issue in issues: + # Check if we need to run detailed checks + if should_run_detailed_checks(issue, cache): + print(f"Changes detected in issue #{issue.number}, running detailed checks") + + # Run all trigger checks + trigger_results = check_all_triggers(issue) + + # Process the issue based on trigger results + if any(trigger_results.values()): + print(f"Processing issue #{issue.number} with active triggers: {[k for k, v in trigger_results.items() if v]}") + process_issue(issue, repo_name) + else: + print(f"No active triggers for issue #{issue.number}") + else: + print(f"No changes detected in issue #{issue.number}, skipping detailed checks") + + except Exception as e: + print(f"Error processing repository {repo_name}: {str(e)}") + + # Save the updated cache to file + save_cache_to_file(cache) + print("Cache saved successfully") + + +if __name__ == "__main__": + main() diff --git a/src/git_utils.py b/src/git_utils.py index 2929f87..3769526 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -1,7 +1,9 @@ """ Utility functions for interacting with GitHub API """ -from typing import List, Dict, Optional, Tuple +from typing import List, Dict, Optional, Tuple, Any +import json +import pickle import os import subprocess import git @@ -375,6 +377,92 @@ def has_linked_pr(issue: Issue) -> bool: return False +def get_issue_timeline_hash(issue: Issue) -> str: + """ + Generate a hash representing the current state of an issue's timeline + + Args: + issue: The GitHub issue to check + + Returns: + A string hash representing the current state of the issue's timeline + """ + # Get comments and their timestamps + comments = get_issue_comments(issue) + comment_data = [(c.id, c.updated_at.timestamp()) for c in comments] + + # Get issue details that might change + issue_data = { + 'updated_at': issue.updated_at.timestamp(), + 'state': issue.state, + 'labels': [label.name for label in issue.labels], + 'comments_count': issue.comments + } + + # Combine all data and create a hash + import hashlib + combined_data = str(comment_data) + str(issue_data) + return hashlib.md5(combined_data.encode()).hexdigest() + + +def cache_issue_timeline(issue: Issue, cache: Dict[int, str]) -> bool: + """ + Cache the timeline of an issue and compare with the current timeline. + + Args: + issue: The GitHub issue to check + cache: A dictionary to store cached timeline hashes + + Returns: + True if changes are detected, False otherwise + """ + issue_number = issue.number + current_hash = get_issue_timeline_hash(issue) + + if issue_number not in cache: + cache[issue_number] = current_hash + return True + + cached_hash = cache[issue_number] + if current_hash != cached_hash: + cache[issue_number] = current_hash + return True + + return False + + +def save_cache_to_file(cache: Dict[int, str], filename: str = 'issue_cache.pkl') -> None: + """ + Save the cache to a file. + + Args: + cache: The cache dictionary to save + filename: The name of the file to save the cache to + """ + with open(filename, 'wb') as f: + pickle.dump(cache, f) + + +def load_cache_from_file(filename: str = 'issue_cache.pkl') -> Dict[int, str]: + """ + Load the cache from a file. + + Args: + filename: The name of the file to load the cache from + + Returns: + The loaded cache dictionary, or an empty dictionary if the file doesn't exist + """ + import os + if os.path.exists(filename): + try: + with open(filename, 'rb') as f: + return pickle.load(f) + except (pickle.PickleError, EOFError): + return {} + return {} + + if __name__ == '__main__': client = get_github_client() repo = get_repository(client, 'katzlabbrandeis/blech_clust') diff --git a/src/triggers.py b/src/triggers.py index 7f3994e..b10856f 100644 --- a/src/triggers.py +++ b/src/triggers.py @@ -2,7 +2,12 @@ Functions to check specific conditions """ from github import Issue -from git_utils import get_issue_comments +from git_utils import ( + get_issue_comments, + cache_issue_timeline, + save_cache_to_file, + load_cache_from_file +) def has_blech_bot_tag(issue: Issue) -> bool: @@ -140,3 +145,21 @@ def has_user_comment_on_pr(issue: Issue) -> bool: if "generated by blech_bot" not in comment.body: return True return False + + +def should_run_detailed_checks(issue: Issue, cache: dict) -> bool: + """ + Determine if detailed trigger checks should be run based on timeline changes. + + This function checks if there have been any changes to the issue timeline + since the last check. If changes are detected, it updates the cache and + returns True to indicate that detailed checks should be run. + + Args: + issue: The GitHub issue to check + cache: A dictionary to store cached timeline hashes + + Returns: + True if detailed checks should be run, False otherwise + """ + return cache_issue_timeline(issue, cache) From c480b04a5f560735d427fdad716ba361fd9bee30 Mon Sep 17 00:00:00 2001 From: "abuzarmahmood (aider)" Date: Thu, 27 Feb 2025 01:15:46 +0000 Subject: [PATCH 2/3] fix: Replace undefined pull_request with out_thread in error handling --- src/git_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/git_utils.py b/src/git_utils.py index 3769526..9ca630e 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -337,9 +337,9 @@ def push_changes_with_authentication( if isinstance(out_thread, IssueComment): write_issue_response(out_thread, error_msg) elif isinstance(out_thread, PullRequest): - pr_comments = list(pull_request.get_issue_comments()) + pr_comments = list(out_thread.get_issue_comments()) if 'Failed to push changes' not in pr_comments[-1].body: - pull_request.create_issue_comment(error_msg) + out_thread.create_issue_comment(error_msg) else: raise ValueError( "Invalid output thread type, must be IssueComment or PullRequest") From 9d1cd7eb6564348e20ed0462f126cab93ec6fc8f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 01:15:54 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/check_issues.py | 62 +++++++++++++++++++++++---------------------- src/git_utils.py | 24 +++++++++--------- src/triggers.py | 12 ++++----- 3 files changed, 50 insertions(+), 48 deletions(-) diff --git a/src/check_issues.py b/src/check_issues.py index a191a00..f7768bc 100644 --- a/src/check_issues.py +++ b/src/check_issues.py @@ -1,21 +1,7 @@ """ Main script to check GitHub issues with caching optimization """ -import os -import sys -from typing import Dict, List - -# Add the src directory to the path if needed -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from github import Issue -from git_utils import ( - get_github_client, - get_repository, - get_open_issues, - load_cache_from_file, - save_cache_to_file -) +from response_agent import process_issue from triggers import ( should_run_detailed_checks, has_blech_bot_tag, @@ -27,16 +13,29 @@ has_pr_creation_comment, has_user_comment_on_pr ) -from response_agent import process_issue +from git_utils import ( + get_github_client, + get_repository, + get_open_issues, + load_cache_from_file, + save_cache_to_file +) +from github import Issue +import os +import sys +from typing import Dict, List + +# Add the src directory to the path if needed +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) def check_all_triggers(issue: Issue) -> Dict[str, bool]: """ Run all trigger checks on an issue - + Args: issue: The GitHub issue to check - + Returns: Dictionary with trigger names as keys and boolean results as values """ @@ -56,42 +55,45 @@ def main(): """Main function to check issues with caching optimization""" # Load the cache from file cache = load_cache_from_file() - + # Initialize GitHub client and get repository client = get_github_client() - + # Get repositories from config file repo_names = [] with open('config/repos.txt', 'r') as f: repo_names = [line.strip() for line in f if line.strip()] - + for repo_name in repo_names: try: repo = get_repository(client, repo_name) issues = get_open_issues(repo) - + print(f"Checking {len(issues)} open issues in {repo_name}") - + for issue in issues: # Check if we need to run detailed checks if should_run_detailed_checks(issue, cache): - print(f"Changes detected in issue #{issue.number}, running detailed checks") - + print( + f"Changes detected in issue #{issue.number}, running detailed checks") + # Run all trigger checks trigger_results = check_all_triggers(issue) - + # Process the issue based on trigger results if any(trigger_results.values()): - print(f"Processing issue #{issue.number} with active triggers: {[k for k, v in trigger_results.items() if v]}") + print( + f"Processing issue #{issue.number} with active triggers: {[k for k, v in trigger_results.items() if v]}") process_issue(issue, repo_name) else: print(f"No active triggers for issue #{issue.number}") else: - print(f"No changes detected in issue #{issue.number}, skipping detailed checks") - + print( + f"No changes detected in issue #{issue.number}, skipping detailed checks") + except Exception as e: print(f"Error processing repository {repo_name}: {str(e)}") - + # Save the updated cache to file save_cache_to_file(cache) print("Cache saved successfully") diff --git a/src/git_utils.py b/src/git_utils.py index 9ca630e..a4198b8 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -380,17 +380,17 @@ def has_linked_pr(issue: Issue) -> bool: def get_issue_timeline_hash(issue: Issue) -> str: """ Generate a hash representing the current state of an issue's timeline - + Args: issue: The GitHub issue to check - + Returns: A string hash representing the current state of the issue's timeline """ # Get comments and their timestamps comments = get_issue_comments(issue) comment_data = [(c.id, c.updated_at.timestamp()) for c in comments] - + # Get issue details that might change issue_data = { 'updated_at': issue.updated_at.timestamp(), @@ -398,7 +398,7 @@ def get_issue_timeline_hash(issue: Issue) -> str: 'labels': [label.name for label in issue.labels], 'comments_count': issue.comments } - + # Combine all data and create a hash import hashlib combined_data = str(comment_data) + str(issue_data) @@ -408,33 +408,33 @@ def get_issue_timeline_hash(issue: Issue) -> str: def cache_issue_timeline(issue: Issue, cache: Dict[int, str]) -> bool: """ Cache the timeline of an issue and compare with the current timeline. - + Args: issue: The GitHub issue to check cache: A dictionary to store cached timeline hashes - + Returns: True if changes are detected, False otherwise """ issue_number = issue.number current_hash = get_issue_timeline_hash(issue) - + if issue_number not in cache: cache[issue_number] = current_hash return True - + cached_hash = cache[issue_number] if current_hash != cached_hash: cache[issue_number] = current_hash return True - + return False def save_cache_to_file(cache: Dict[int, str], filename: str = 'issue_cache.pkl') -> None: """ Save the cache to a file. - + Args: cache: The cache dictionary to save filename: The name of the file to save the cache to @@ -446,10 +446,10 @@ def save_cache_to_file(cache: Dict[int, str], filename: str = 'issue_cache.pkl') def load_cache_from_file(filename: str = 'issue_cache.pkl') -> Dict[int, str]: """ Load the cache from a file. - + Args: filename: The name of the file to load the cache from - + Returns: The loaded cache dictionary, or an empty dictionary if the file doesn't exist """ diff --git a/src/triggers.py b/src/triggers.py index b10856f..7549aab 100644 --- a/src/triggers.py +++ b/src/triggers.py @@ -3,9 +3,9 @@ """ from github import Issue from git_utils import ( - get_issue_comments, - cache_issue_timeline, - save_cache_to_file, + get_issue_comments, + cache_issue_timeline, + save_cache_to_file, load_cache_from_file ) @@ -150,15 +150,15 @@ def has_user_comment_on_pr(issue: Issue) -> bool: def should_run_detailed_checks(issue: Issue, cache: dict) -> bool: """ Determine if detailed trigger checks should be run based on timeline changes. - + This function checks if there have been any changes to the issue timeline since the last check. If changes are detected, it updates the cache and returns True to indicate that detailed checks should be run. - + Args: issue: The GitHub issue to check cache: A dictionary to store cached timeline hashes - + Returns: True if detailed checks should be run, False otherwise """