Skip to content

Commit e7c0590

Browse files
Copilotmrjf
andauthored
Paginate issue discovery to avoid silent data loss at 100+ issues
Add parseLinkHeader() helper to extract the next URL from GitHub API Link headers. Replace single-fetch issue discovery with a paginated while loop that follows Link headers until all pages are fetched. Add unit tests for parseLinkHeader and export from test infrastructure. Agent-Logs-Url: https://github.com/githubnext/autoloop/sessions/4c3fac44-c844-4240-9fb4-b90b2f2d76bc Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com>
1 parent 9978ca0 commit e7c0590

File tree

3 files changed

+62
-10
lines changed

3 files changed

+62
-10
lines changed

tests/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _load_workflow_functions():
4747
f.write(func_source)
4848
f.write(
4949
"\n\nmodule.exports = "
50-
"{ parseMachineState, parseSchedule, getProgramName, readProgramState };\n"
50+
"{ parseMachineState, parseSchedule, getProgramName, readProgramState, parseLinkHeader };\n"
5151
)
5252

5353
return True
@@ -102,6 +102,7 @@ def _get_program_name_wrapper(pf):
102102
"parse_machine_state": _parse_machine_state_wrapper,
103103
"get_program_name": _get_program_name_wrapper,
104104
"read_program_state": lambda name: _call_js("readProgramState", name),
105+
"parse_link_header": lambda header: _call_js("parseLinkHeader", header),
105106
}
106107

107108

tests/test_scheduling.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
parse_schedule = _funcs["parse_schedule"]
2121
parse_machine_state = _funcs["parse_machine_state"]
2222
get_program_name = _funcs["get_program_name"]
23+
parse_link_header = _funcs["parse_link_header"]
2324

2425

2526
# ---------------------------------------------------------------------------
@@ -663,6 +664,35 @@ def test_forced_program_target_metric_fallback_via_frontmatter(self):
663664
assert target == 0.95
664665

665666

667+
# ---------------------------------------------------------------------------
668+
# parseLinkHeader — extract next-page URL from GitHub API Link header
669+
# ---------------------------------------------------------------------------
670+
671+
class TestParseLinkHeader:
672+
def test_returns_null_for_none(self):
673+
assert parse_link_header(None) is None
674+
675+
def test_returns_null_for_empty_string(self):
676+
assert parse_link_header("") is None
677+
678+
def test_extracts_next_url(self):
679+
header = '<https://api.github.com/repos/o/r/issues?page=2&per_page=100>; rel="next", <https://api.github.com/repos/o/r/issues?page=5&per_page=100>; rel="last"'
680+
assert parse_link_header(header) == "https://api.github.com/repos/o/r/issues?page=2&per_page=100"
681+
682+
def test_returns_null_when_no_next(self):
683+
header = '<https://api.github.com/repos/o/r/issues?page=1&per_page=100>; rel="prev", <https://api.github.com/repos/o/r/issues?page=5&per_page=100>; rel="last"'
684+
assert parse_link_header(header) is None
685+
686+
def test_next_not_first(self):
687+
"""next rel is not the first segment."""
688+
header = '<https://api.github.com/repos/o/r/issues?page=1&per_page=100>; rel="prev", <https://api.github.com/repos/o/r/issues?page=3&per_page=100>; rel="next", <https://api.github.com/repos/o/r/issues?page=5&per_page=100>; rel="last"'
689+
assert parse_link_header(header) == "https://api.github.com/repos/o/r/issues?page=3&per_page=100"
690+
691+
def test_single_next_segment(self):
692+
header = '<https://api.github.com/repos/o/r/issues?page=2&per_page=100>; rel="next"'
693+
assert parse_link_header(header) == "https://api.github.com/repos/o/r/issues?page=2&per_page=100"
694+
695+
666696
# ---------------------------------------------------------------------------
667697
# Extraction sanity check — verify conftest.py found the expected functions
668698
# ---------------------------------------------------------------------------
@@ -677,6 +707,9 @@ def test_parse_machine_state_extracted(self):
677707
def test_get_program_name_extracted(self):
678708
assert callable(get_program_name)
679709

710+
def test_parse_link_header_extracted(self):
711+
assert callable(parse_link_header)
712+
680713
def test_read_program_state_extracted(self):
681714
# read_program_state exists in the workflow but depends on file I/O
682715
assert "read_program_state" in _funcs

workflows/autoloop.md

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,19 @@ steps:
181181
}
182182
}
183183
184+
// Parse the GitHub API Link header to extract the "next" page URL.
185+
// Returns the URL string for the next page, or null if there is none.
186+
function parseLinkHeader(header) {
187+
if (!header) return null;
188+
var parts = header.split(',');
189+
for (var i = 0; i < parts.length; i++) {
190+
var section = parts[i].trim();
191+
var m = section.match(/^<([^>]+)>;\s*rel="next"$/);
192+
if (m) return m[1];
193+
}
194+
return null;
195+
}
196+
184197
// Main execution
185198
async function main() {
186199
// Bootstrap: create autoloop programs directory and template if missing
@@ -273,18 +286,23 @@ steps:
273286
} catch (e) { /* stat failed */ }
274287
}
275288
276-
// Scan GitHub issues with the 'autoloop-program' label
289+
// Scan GitHub issues with the 'autoloop-program' label (paginated)
277290
const issueProgramsDir = '/tmp/gh-aw/issue-programs';
278291
fs.mkdirSync(issueProgramsDir, { recursive: true });
279292
try {
280-
const apiUrl = 'https://api.github.com/repos/' + repo + '/issues?labels=autoloop-program&state=open&per_page=100';
281-
const response = await fetch(apiUrl, {
282-
headers: {
283-
'Authorization': 'token ' + githubToken,
284-
'Accept': 'application/vnd.github.v3+json',
285-
},
286-
});
287-
const issues = await response.json();
293+
let nextUrl = 'https://api.github.com/repos/' + repo + '/issues?labels=autoloop-program&state=open&per_page=100';
294+
const issues = [];
295+
while (nextUrl) {
296+
const response = await fetch(nextUrl, {
297+
headers: {
298+
'Authorization': 'token ' + githubToken,
299+
'Accept': 'application/vnd.github.v3+json',
300+
},
301+
});
302+
const page = await response.json();
303+
issues.push(...page);
304+
nextUrl = parseLinkHeader(response.headers.get('link'));
305+
}
288306
for (const issue of issues) {
289307
if (issue.pull_request) continue; // skip PRs
290308
const body = issue.body || '';

0 commit comments

Comments
 (0)