Skip to content

Latest commit

 

History

History
422 lines (340 loc) · 9.06 KB

File metadata and controls

422 lines (340 loc) · 9.06 KB

arXiv downloader

Mix.install([
  {:kino, "~> 0.9.1"},
  {:poison, "~> 4.0"},
  {:httpoison, "~> 1.8"},
  {:floki, "~> 0.34.3"}
])

Section

defmodule ArXivUtils do
  require Regex

  defp get_arxiv_id_by_re(re, url) do
    case Regex.run(re, url) do
      nil -> nil
      [_, capture] -> capture
    end
  end

  def get_arxiv_id(url) do
    abs_id = ~r/arxiv\.org\/abs\/(\d+\.\d+)/ |> get_arxiv_id_by_re(url)
    pdf_id = ~r/arxiv\.org\/pdf\/(\d+\.\d+)/ |> get_arxiv_id_by_re(url)

    if abs_id != nil do
      abs_id
    else
      pdf_id
    end
  end

  def get_arxiv_pdf_url(arxiv_id) do
    "http://arxiv.org/pdf/#{arxiv_id}/pdf"
  end

  def get_arxiv_bibtex_url(arxiv_id) do
    "https://arxiv.org/bibtex/#{arxiv_id}"
  end
end
defmodule ArXivAPI do
  require HTTPoison
  require Floki
  require List
  require Poison

  def get_arxiv_paper_metadata(arxiv_url) do
    arxiv_id = arxiv_url |> ArXivUtils.get_arxiv_id()

    Map.merge(
      %{
        "url" => arxiv_url,
        "bibtex" => arxiv_id |> get_arxiv_bibtex()
      },
      arxiv_id |> get_arxiv_response()
    )
  end

  def get_arxiv_response(arxiv_id) do
    arxiv_id
    |> get_arxiv_api_uri()
    |> HTTPoison.get()
    |> parse_metadata_body()
  end

  def get_arxiv_bibtex(arxiv_id) do
    res =
      arxiv_id
      |> ArXivUtils.get_arxiv_bibtex_url()
      |> HTTPoison.get()
      |> parse_bibtex_body()
  end

  defp parse_bibtex_body({:ok, %HTTPoison.Response{body: body}}), do: body

  defp parse_metadata_body({:ok, %HTTPoison.Response{body: body}}) do
    {status, parsed_body} = body |> Floki.parse_document()
    [meta, content] = parsed_body

    map_content = convert_tuples_to_dict(content)

    %{
      "title" => get_in(map_content, ["feed", "entry", "title"]),
      "summary" => get_in(map_content, ["feed", "entry", "summary"]),
      "published" => get_in(map_content, ["feed", "entry", "published"]),
      "author" => get_authors(map_content)["name"]
    }
  end

  defp get_authors(map_content) do
    map_content |> get_in(["feed", "entry", "author"]) |> Enum.reduce(&merge_dicts_by_list/2)
  end

  defp get_arxiv_api_uri(arxiv_id) do
    "http://export.arxiv.org/api/query?id_list=#{arxiv_id}"
  end

  defp convert_tuples_to_dict({k, v}), do: %{k => convert_tuples_to_dict(v)}
  defp convert_tuples_to_dict({k, _, v}), do: %{k => convert_tuples_to_dict(v)}

  defp convert_tuples_to_dict(other) do
    case other do
      [] ->
        %{}

      _ when is_bitstring(other) ->
        other

      _ when is_list(other) ->
        dicts = Enum.map(other, &convert_tuples_to_dict/1)
        Enum.reduce(dicts, &merge_dicts_by_list/2)
    end
  end

  defp merge_dicts_by_list(d1, d2) do
    Map.merge(d1, d2, fn k, v1, v2 -> List.flatten([v1, v2]) end)
  end
end
abstract_url = "https://arxiv.org/abs/2203.05115"
pdf_url = "https://arxiv.org/pdf/1509.09169.pdf"
abstract_url |> ArXivAPI.get_arxiv_paper_metadata()
ArXivUtils.get_arxiv_id(abstract_url) |> ArXivUtils.get_arxiv_pdf_url()
save_dir = Path.expand("~/Downloads/czytnik")
folders = Path.wildcard(save_dir <> "/*")
folder_options = for f <- folders, File.dir?(f), do: {f, Path.basename(f)}
Kino.Input.select("folder", folder_options)
defmodule ArXivDownloader do
  def download_pdf(arxiv_url, save_dir) do
    {title, _} = arxiv_url |> ArXivAPI.get_arxiv_paper_metadata() |> Map.pop("title")
    save_path = save_dir |> Path.expand() |> Path.join(get_pdf_filename(title))
    result = arxiv_url |> get_pdf_content() |> save_pdf_content(save_path)

    case result do
      {:ok, _} -> "Written PDF to #{save_path}"
      {:error, _} -> "Error downloading #{title}"
    end
  end

  def get_pdf_content(arxiv_url) do
    pdf_url = arxiv_url |> ArXivUtils.get_arxiv_id() |> ArXivUtils.get_arxiv_pdf_url()
    pdf_url |> get_pdf_content_from_pdf_url()
  end

  defp get_pdf_content_from_pdf_url(pdf_url) do
    case HTTPoison.get!(pdf_url) do
      %HTTPoison.Response{body: body} -> {:ok, body}
      _ -> {:error, nil}
    end
  end

  defp get_pdf_filename(title) do
    Enum.join(String.split(title), "_") <> ".pdf"
  end

  defp save_pdf_content({:ok, body}, save_path) do
    {File.write(save_path, body), nil}
  end

  defp save_pdf_content(other, _) do
    other
  end
end
ArXivDownloader.download_pdf(
  "https://arxiv.org/abs/2203.05115",
  "/home/kuba/Downloads/czytnik/tmp"
)

TODO implement more generic interface with macros

defmodule UISubComponents do
  defstruct [:inputs, :form, :frame]
end

defprotocol ArXivUIComponent do
  def new(args)
  def wire_widgets(args)
  def listen_fn(frame, callback)
  def make_widgets(args)

  def add_markdown_to_frame(frame, markdown_str) do
    Kino.Frame.append(frame, Kino.Markdown.new(markdown_str))
  end
end

defmodule ArXivDownloaderArgs do
  @enforce_keys [:base_dir]
  defstruct [:base_dir, :folder_opts]
end

defimpl ArXivUIComponent, for: ArXivDownloaderArgs do
  def new(args) do
    {inputs, form, frame} = args |> make_widgets()
    form |> Kino.render()
    frame |> Kino.render()
    wire_widgets(frame, form)
  end

  def make_widgets(%ArXivDownloaderArgs{base_dir: base_dir, folder_opts: folder_opts}) do
    folder_options =
      cond do
        folder_opts == nil -> get_folders_options(base_dir)
        true -> get_folders_options(folder_opts)
      end

    inputs = [
      arxiv_url: Kino.Input.text("ArXiv URL"),
      save_dir: Kino.Input.select("directory", folder_opts)
    ]

    frame = Kino.Frame.new()
    form = Kino.Control.form(inputs, submit: "Send", reset_on_submit: [:arxiv_url, :save_dir])
    {inputs, form, frame}
  end

  defp get_folder_options(folders) when is_list(folders) do
    for f <- folders, File.dir?(f), do: {f, Path.basename(f)}
  end

  defp get_folders_options(base_dir) do
    folders = base_dir |> Path.expand() |> Path.join("*") |> Path.wildcard()
    for f <- folders, File.dir?(f), do: {f, Path.basename(f)}
  end

  def wire_widgets(frame, form) do
    Kino.listen(form, frame |> listen_fn)
  end

  defp listen_fn(frame, %{data: %{arxiv_url: arxiv_url, save_dir: save_dir}, origin: origin}) do
    if arxiv_url != "" do
      add_markdown_to_frame(frame, "**arxiv_url**: #{arxiv_url}")
      downloader_status = ArXivDownloader.download_pdf(arxiv_url, save_dir)
      add_markdown_to_frame(frame, downloader_status)
    else
      content = Kino.Markdown.new("_ERROR! You need a name and message to submit..._")
      Kino.Frame.append(frame, content, to: origin)
    end
  end
end
defmodule ArXivDownloaderInterface do
  def new(base_dir, folder_opts \\ nil) do
    folder_options =
      cond do
        folder_opts == nil -> get_folders_options(base_dir)
        true -> folder_opts
      end

    {inputs, form, frame} = base_dir |> make_widgets(folder_options)
    form |> Kino.render()
    frame |> Kino.render()
    wire_widgets(frame, form)
  end

  defp get_folders_options(base_dir) do
    folders = base_dir |> Path.expand() |> Path.join("*") |> Path.wildcard()
    for f <- folders, File.dir?(f), do: {f, Path.basename(f)}
  end

  def make_widgets(base_dir, folder_options) do
    inputs = [
      arxiv_url: Kino.Input.text("ArXiv URL"),
      save_dir: Kino.Input.select("directory", folder_options)
    ]

    frame = Kino.Frame.new()
    form = Kino.Control.form(inputs, submit: "Send", reset_on_submit: [:arxiv_url, :save_dir])
    {inputs, form, frame}
  end

  def wire_widgets(frame, form) do
    Kino.listen(form, frame |> listen_fn)
  end

  defp listen_fn(frame, %{data: %{arxiv_url: arxiv_url, save_dir: save_dir}, origin: origin}) do
    if arxiv_url != "" do
      add_markdown_to_frame(frame, "**arxiv_url**: #{arxiv_url}")
      downloader_status = ArXivDownloader.download_pdf(arxiv_url, save_dir)
      add_markdown_to_frame(frame, downloader_status)
    else
      content = Kino.Markdown.new("_ERROR! You need a name and message to submit..._")
      Kino.Frame.append(frame, content, to: origin)
    end
  end

  def add_markdown_to_frame(frame, markdown_str) do
    Kino.Frame.append(frame, Kino.Markdown.new(markdown_str))
  end
end
defmodule MacroExample do
  defmacro get_symbol(x) do
    name
  end

  defmacro make_function(var, fn_expr, var_assign) do
    name = var!(var)

    quote do
      fn %{data: %{name => unquote(var)}} ->
        unquote(var) = unquote(var_assign)
        unquote(fn_expr)
      end
    end
  end

  defmacro make_dict(var, val) do
    quote do
      %{unquote(var) => unquote(val)}
    end
  end
end
require MacroExample

x = 1
MacroExample.make_dict(:x, 1).x
Macro.expand(
  f =
    MacroExample.make_function(
      x,
      x * 2,
      x + 1
    ),
  __ENV__
)
f =
  MacroExample.make_function(
    x,
    x * 2,
    x + 1
  )

f.(%{data: %{x: 1}})
import Macro

Macro.pipe(
  quote do
    x + 1
  end,
  1,
  0
)
Map.keys(__ENV__)
v = "1"
TurboArXivDownloaderInterface.(v)
ArXivDownloaderInterface.new("~/Downloads/czytnik")