-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_github_issues.py
More file actions
52 lines (46 loc) · 1.53 KB
/
get_github_issues.py
File metadata and controls
52 lines (46 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import requests
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv, find_dotenv
# Load environment variables
_ = load_dotenv(find_dotenv())
# Configuration
GITHUB_OWNER = "langchain-ai"
GITHUB_REPO = "langchain"
api_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO}/issues"
headers = {"Authorization": f"token {os.environ['GITHUB_TOKEN']}"} if os.environ.get('GITHUB_TOKEN') else {}
# Fetch all open issues
params = {
"state": "open",
"per_page": 100,
"since": (datetime.now() - timedelta(days=90)).isoformat()
}
all_issues = []
page = 1
while True:
params['page'] = page
response = requests.get(api_url, params=params, headers=headers)
if response.status_code != 200 or not (issues := response.json()):
break
all_issues.extend(issues)
page += 1
# Process issues into dataset
dataset = [
{
"issue_number": issue["number"],
"issue_url": issue["html_url"],
"title": issue["title"],
"description": issue.get("body", ""),
"labels": ", ".join(label["name"] for label in issue.get("labels", [])),
"created_at": issue["created_at"],
"comments_count": issue["comments"],
"state": issue["state"]
}
for issue in all_issues
if "pull_request" not in issue
]
# Save dataset
output_filename = os.path.join(os.path.dirname(__file__), f"{GITHUB_REPO}_issues_dataset.csv")
pd.DataFrame(dataset).to_csv(output_filename, index=False)
print(f"Saved {len(dataset)} issues to {output_filename}")