Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
## 0.0.9

- Extracted HTTP fetch logic into a dedicated `Fetcher` module (`bin/fetcher.ml`), separating it from CLI argument definitions in `bin/main.ml`.
- Added terminal progress spinner on stderr showing current page and optional page cap (e.g. `/ Fetching page 3 of 5...`); clears itself when done so stdout redirection (e.g. `> out.md`) is unaffected.
- Added post-fetch summary on stderr: pages fetched, repository count, and elapsed time in seconds (e.g. `✓ 5 pages • 487 repositories • 3.2s`).
- Bumped `jingoo` dependency to `>= 1.5.2`.
- Fixed `Re2` regex compiled once at module load instead of on every paginated response.
- Cleaned up `github.mli`: removed `[@@deriving ...]` attributes from type declarations and replaced with explicit `val` signatures for the functions actually used externally, narrowing the public API surface.
- Added `unix` to `bin/dune` library dependencies.
- Using [Eio.Stream](https://ocaml-multicore.github.io/eio/eio/Eio/Stream/index.html) to accumulate responses.
- Added `PAGE_SIZE` cli parameter.
- Added `--timeout` / `-T` CLI arg (default 600s): per-request timeout in seconds passed to the HTTP fetcher.
- Added `--max-retries` / `-r` CLI arg (default 3): number of retry attempts on transient failures or timeouts.
- Added per-request timeout and exponential-backoff retry in `Http_util.fetch` using `Eio.Time.Timeout`.

## 0.0.8

- Replaced deprecated `Mirage_crypto_rng_eio.run` with `Mirage_crypto_rng_unix.use_default` from `mirage-crypto-rng.unix`.
Expand Down
3 changes: 2 additions & 1 deletion bin/dune
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
cmdliner
logs.fmt
logs.threaded
mirage-crypto-rng.unix))
mirage-crypto-rng.unix
unix))
95 changes: 95 additions & 0 deletions bin/fetcher.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
open Cohttp_eio
open Starred_ml.Util
open Starred_ml.Http_util
module Github = Starred_ml.Github

let spinner_frames = [| "/"; "-"; "\\"; "|" |]
let total_pages = ref 0

let show_progress frame page (max_pages : int option) =
let max = Option.fold ~none:"" ~some:(Printf.sprintf "of %i") max_pages in
Printf.eprintf "\r%s Fetching page %d %s..."
spinner_frames.(frame mod Array.length spinner_frames)
page max;
incr total_pages;
flush stderr

let clear_progress () =
Printf.eprintf "\r\027[K";
flush stderr

let print_summary repo_count elapsed =
Printf.eprintf "✓ %d page%s • %d repositor%s • %.1fs\n" !total_pages
(if !total_pages = 1 then "" else "s")
repo_count
(if repo_count = 1 then "y" else "ies")
elapsed

let run (max_pages : int option) (page_size : int) (timeout_s : float)
(max_retries : int) url token template =
if page_size < 1 || page_size > 100 then (
Printf.eprintf "Error: --page-size must be between 1 and 100 (got %d)\n"
page_size;
exit 1);
total_pages := 0;
let t0 = Unix.gettimeofday () in
try
Eio_main.run @@ fun env ->
Mirage_crypto_rng_unix.use_default ();
Random.self_init ();
let clock = env#mono_clock in
let config = { timeout_s; max_retries; backoff_base_s = 1.0 } in
let client =
Client.make ~https:(Some (https ~authenticator:null_auth)) env#net
in
Eio.Switch.run @@ fun sw ->
(* Stream carries one page-batch per item; None signals end-of-stream. *)
let stream : Github.starred list option Eio.Stream.t =
Eio.Stream.create 2
in
(* Producer fiber: pages through the API, pushes batches onto the stream.
Uses [Eio.Fiber.fork ~sw] (fire-and-forget): exceptions cancel the
switch, which unblocks [Eio.Stream.take] in the consumer automatically
via structured concurrency — no extra error handling needed. *)
Eio.Fiber.fork ~sw (fun () ->
let rec produce url curr_page frame =
show_progress frame curr_page max_pages;
match fetch ~sw ~clock ~config url client token with
| Some (body, next_url_opt) -> (
Eio.Stream.add stream (Some (Github.from_string body));
let within_limit =
Option.value ~default:max_int max_pages > curr_page
in
match next_url_opt with
| Some next_url when within_limit ->
produce next_url (curr_page + 1) (frame + 1)
| _ -> ())
| None -> ()
in
produce (Format.sprintf "%s?per_page=%d" url page_size) 1 0;
Eio.Stream.add stream None);
(* Consumer: drains the stream into a Queue (O(1) push per item).
[Queue.to_seq] traverses front-to-back, preserving GitHub insertion
order with no reversal needed. *)
let q = Queue.create () in
let rec consume () =
match Eio.Stream.take stream with
| Some batch ->
List.iter (fun item -> Queue.push item q) batch;
consume ()
| None -> ()
in
consume ();
let content = Queue.to_seq q |> List.of_seq in
clear_progress ();
print_summary (List.length content) (Unix.gettimeofday () -. t0);
Eio.Stdenv.stdout env
|> Eio.Flow.copy_string @@ print_content content template
with
| Failure msg ->
Printf.eprintf "Error: %s\n" msg;
exit 1
| exn ->
Printf.eprintf "Fatal: %s\n" (Printexc.to_string exn);
Printexc.print_backtrace stderr;
exit 1
59 changes: 27 additions & 32 deletions bin/main.ml
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
open Cohttp_eio
open Starred_ml.Util
open Starred_ml.Http_util
open Cmdliner
module Github = Starred_ml.Github

let () = Logs.set_reporter (Logs_fmt.reporter ())
and () = Logs_threaded.enable ()
Expand Down Expand Up @@ -46,35 +42,34 @@ module Render_cli = struct
& opt string "https://api.github.com/user/starred"
& info [ "u"; "url" ] ~env ~docv:"GITHUB_URL" ~doc)

let fetch (max_pages : int option) url token template =
try
Eio_main.run @@ fun env ->
Mirage_crypto_rng_unix.use_default ();
let client =
Client.make ~https:(Some (https ~authenticator:null_auth)) env#net
in
Eio.Switch.run @@ fun sw ->
let rec fetch_github l acc curr_page =
match fetch ~sw l client token with
| Some (r, Some next_url)
when Option.value ~default:max_int max_pages >= curr_page ->
fetch_github next_url (acc @ Github.from_string r) (curr_page + 1)
| Some (r, _) -> acc @ Github.from_string r
| None -> acc
in
let content = fetch_github (Format.sprintf "%s?per_page=100" url) [] 1 in
Eio.Stdenv.stdout env
|> Eio.Flow.copy_string @@ print_content content template
with
| Failure msg ->
Printf.eprintf "Error: %s\n" msg;
exit 1
| exn ->
Printf.eprintf "Fatal: %s\n" (Printexc.to_string exn);
Printexc.print_backtrace stderr;
exit 1
let page_size =
let doc =
"Number of results per page returned by the GitHub API. Must be between \
1 and 100. Higher values reduce the number of HTTP requests required to \
fetch all starred repositories."
in
Arg.(value & opt int 100 & info [ "p"; "page-size" ] ~docv:"PAGE_SIZE" ~doc)

let timeout =
let doc =
"Per-request timeout in seconds. If a GitHub API request (including \
reading the response body) does not complete within this duration it is \
cancelled and retried up to --max-retries times."
in
Arg.(value & opt float 600.0 & info [ "T"; "timeout" ] ~docv:"SECONDS" ~doc)

let max_retries =
let doc =
"Maximum number of times a failed or timed-out request is retried before \
the command exits with an error."
in
Arg.(
value & opt int 3 & info [ "r"; "max-retries" ] ~docv:"MAX_RETRIES" ~doc)

let fetch_t = Term.(const fetch $ max_pages $ url $ token $ template)
let fetch_t =
Term.(
const Fetcher.run $ max_pages $ page_size $ timeout $ max_retries $ url
$ token $ template)

let cmd =
let doc = "Syncs Github starred items for the authenticated user" in
Expand Down
7 changes: 4 additions & 3 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

(package
(name starred_ml)
(synopsis "Generates a awesome list markdown")
(synopsis "Generates an Awesome-style markdown list from GitHub stars")
(description "Turn your starred items into a awesomeness list of repos")
(depends
(cmdliner
Expand All @@ -39,7 +39,7 @@
(logs
(>= 0.7.0))
(jingoo
(>= 1.5.0))
(>= 1.5.2))
(fmt
(>= 0.9.0))
(eio_main
Expand All @@ -48,5 +48,6 @@
(>= 1.3))
(cohttp-eio
(>= 6.2.1))
ocaml
(ocaml
(>= 5.2.0))
dune))
8 changes: 4 additions & 4 deletions lib/github.mli
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
type owner = { login : string }
[@@deriving show, yojson { strict = false; exn = true }]

type starred = {
name : string;
Expand All @@ -9,16 +8,17 @@ type starred = {
html_url : string;
owner : owner;
}
[@@deriving show, yojson { strict = false; exn = true }]

type starred_response = starred list
[@@deriving yojson { strict = false; exn = true }]

val show_starred : starred -> string
(** Pretty-prints a [starred] record; used in test assertions. *)

val from_string : string -> starred_response
(** Converts a result page of starred paged result into a list of starred *)

val by_language : starred list -> (string * starred list) list
(** Converts a list of starred items into a struc grouped by language like
(** Converts a list of starred items into a struct grouped by language like
[("java", [starred; starred]), ("scala", [starred;...])] *)

val languages : ?default_language:string -> starred list -> string list
Expand Down
90 changes: 64 additions & 26 deletions lib/http_util.ml
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,76 @@ let https ~authenticator =

(** Github uses rel links to indicate the next page. It's better to rely on them
instead of keeping a page counter *)
let next_re = Re2.create_exn "<([^;]+)>; rel=\"next\""

let next_link s =
Eio.traceln "%s" @@ Http.Header.to_string s;
match Http.Header.get s "Link" with
| None -> None
| Some l ->
let re = Re2.create_exn "<([^;]+)>; rel=\"next\"" in
let link =
try Some (Re2.find_first_exn ~sub:(`Index 1) re l)
with Re2.Exceptions.Regex_match_failed _ -> None
in
link

let handle_status status =
| Some l -> (
try Some (Re2.find_first_exn ~sub:(`Index 1) next_re l)
with Re2.Exceptions.Regex_match_failed _ -> None)

let classify_status status =
match status with
| `OK -> ()
| `OK -> `Ok
| `Unauthorized ->
failwith
`Fatal
(Http.Status.to_string status ^ ". Please check the provided token.")
| #Http.Status.client_error | #Http.Status.server_error ->
failwith (Http.Status.to_string status)
| status ->
raise
(Invalid_argument
(Printf.sprintf "Catastrophic failure: unexpected status %s"
(Http.Status.to_string status)))

let fetch ~sw api_url client token =
| `Too_many_requests -> `Transient (Http.Status.to_string status)
| #Http.Status.server_error -> `Transient (Http.Status.to_string status)
| #Http.Status.client_error -> `Fatal (Http.Status.to_string status)
| s ->
`Fatal (Printf.sprintf "Unexpected status %s" (Http.Status.to_string s))

type fetch_config = {
timeout_s : float;
max_retries : int;
backoff_base_s : float;
}

let retry ~sleep_fn ~config ~attempt =
let rec loop retries delay =
match attempt () with
| exception Eio.Time.Timeout ->
if retries = 0 then failwith "Request timed out after all retries"
else (
sleep_fn delay;
loop (retries - 1) (delay *. 2.0))
| exception (Eio.Io _ as e) ->
if retries = 0 then raise e
else (
sleep_fn delay;
loop (retries - 1) (delay *. 2.0))
| status, body_str, next_url -> (
match classify_status status with
| `Ok -> Some (body_str, next_url)
| `Fatal msg -> failwith msg
| `Transient msg ->
if retries = 0 then failwith (msg ^ " after all retries")
else (
sleep_fn delay;
loop (retries - 1) (delay *. 2.0)))
in
loop config.max_retries config.backoff_base_s

let fetch ~sw ~clock ~config api_url client token =
let headers =
Http.Header.of_list [ ("Authorization", Format.sprintf "Bearer %s" token) ]
in
let resp, body = Client.get ~headers ~sw client (Uri.of_string api_url) in
handle_status resp.status;
Some
( Eio.Buf_read.(parse_exn take_all) body ~max_size:max_int,
next_link resp.headers )
let timeout = Eio.Time.Timeout.seconds clock config.timeout_s in
let attempt () =
Eio.Time.Timeout.run_exn timeout (fun () ->
let resp, body =
Client.get ~headers ~sw client (Uri.of_string api_url)
in
let body_str =
Eio.Buf_read.(parse_exn take_all) body ~max_size:max_int
in
( resp.Http.Response.status,
body_str,
next_link resp.Http.Response.headers ))
in
let sleep_fn delay =
Eio.Time.Mono.sleep clock (delay +. 0.1 +. Random.float 0.9)
in
retry ~sleep_fn ~config ~attempt
6 changes: 3 additions & 3 deletions starred_ml.opam
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "Generates a awesome list markdown"
synopsis: "Generates an Awesome-style markdown list from GitHub stars"
description: "Turn your starred items into a awesomeness list of repos"
maintainer: ["Paulo Suzart"]
authors: ["Paulo Suzart"]
Expand All @@ -17,12 +17,12 @@ depends: [
"ppx_deriving" {>= "5.2.1"}
"mirage-crypto-rng" {>= "1.2.0"}
"logs" {>= "0.7.0"}
"jingoo" {>= "1.5.0"}
"jingoo" {>= "1.5.2"}
"fmt" {>= "0.9.0"}
"eio_main" {>= "1.3"}
"eio" {>= "1.3"}
"cohttp-eio" {>= "6.2.1"}
"ocaml"
"ocaml" {>= "5.2.0"}
"dune" {>= "3.21"}
"odoc" {with-doc}
]
Expand Down
5 changes: 5 additions & 0 deletions test/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
(name test_starred_ml)
(modules test_starred_ml)
(libraries alcotest starred_ml))

(test
(name test_retry)
(modules test_retry)
(libraries alcotest starred_ml))
Loading
Loading