• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

camatcode / basenji / 4e6ab01ab8578dff199cf92f5b41a9c641c12b53

03 Aug 2025 05:03PM UTC coverage: 84.192%. Remained the same
4e6ab01ab8578dff199cf92f5b41a9c641c12b53

Pull #85

github

camatcode
refactor: Reader behaviour
Pull Request #85: refactor: Reader behaviour

46 of 46 new or added lines in 6 files covered. (100.0%)

16 existing lines in 8 files now uncovered.

1209 of 1436 relevant lines covered (84.19%)

487.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.47
/lib/basenji/reader/pdf_reader.ex
1
defmodule Basenji.Reader.PDFReader do
2
  @moduledoc false
3
  @behaviour Basenji.Reader
4

5
  use Basenji.TelemetryHelpers
6

7
  alias Basenji.Reader
8

9
  @impl Reader
10
  def format, do: :pdf
4✔
11

12
  @impl Reader
13
  def file_extensions, do: ["pdf"]
788✔
14

15
  @impl Reader
16
  def magic_numbers, do: [%{offset: 0, magic: [0x25, 0x50, 0x44, 0x46, 0x2D]}]
786✔
17

18
  @impl Reader
19
  def get_entries(pdf_file_path, _opts \\ []) do
20
    with {:ok, %{pages: pages}} <- get_metadata(pdf_file_path) do
789✔
21
      padding = String.length("#{pages}")
788✔
22

23
      file_entries =
788✔
24
        1..pages
25
        |> Enum.map(fn idx ->
26
          %{file_name: "#{String.pad_leading("#{idx}", padding, "0")}.jpg"}
3,152✔
27
        end)
28

29
      {:ok, %{entries: file_entries}}
30
    end
31
  end
32

33
  @impl Reader
34
  def read(pdf_file_path, _opts \\ []) do
35
    meter_duration [:basenji, :process], "read_pdf" do
787✔
UNCOV
36
      with {:ok, %{entries: file_entries}} <- get_entries(pdf_file_path) do
×
37
        file_entries =
787✔
38
          file_entries
39
          |> Enum.map(fn entry ->
40
            entry
41
            |> Map.put(:stream_fun, fn -> get_entry_stream!(pdf_file_path, entry) end)
3,148✔
42
          end)
43

44
        {:ok, %{entries: file_entries}}
45
      end
46
    end
47
  end
48

49
  @impl Reader
50
  def close(_), do: :ok
5✔
51

52
  defp get_entry_stream!(pdf_file_path, entry) do
53
    file_name = entry[:file_name]
50✔
54
    {page_num, _rest} = Integer.parse(file_name)
50✔
55

56
    Reader.create_resource(fn ->
50✔
57
      with {:ok, output} <- Reader.exec("pdftoppm", ["-f", "#{page_num}", "-singlefile", "-jpeg", "-q", pdf_file_path]) do
50✔
58
        [output |> :binary.bin_to_list()]
59
      end
60
    end)
61
  end
62

63
  defp get_metadata(pdf_file_path) do
64
    with {:ok, output} <- Reader.exec("pdfinfo", ["-isodates", pdf_file_path]) do
789✔
65
      metadata =
788✔
66
        String.split(output, "\n")
67
        |> Map.new(fn line ->
68
          String.split(line, ":", parts: 2)
69
          |> case do
14,184✔
70
            [k, v] -> to_metadata(k, v)
14,184✔
UNCOV
71
            [v] -> to_metadata("unknown_#{System.monotonic_time()}", v)
×
72
          end
73
        end)
74

75
      {:ok, metadata}
76
    end
77
  end
78

79
  defp to_metadata(k, v) do
80
    k = k |> String.trim() |> ProperCase.snake_case() |> String.to_atom()
14,184✔
81
    v = convert_value(k, v |> String.trim())
14,184✔
82
    {k, v}
83
  end
84

85
  defp convert_value(:creation_date, v), do: DateTimeParser.parse!(v)
788✔
86
  defp convert_value(:mod_date, v), do: DateTimeParser.parse!(v)
788✔
87
  defp convert_value(:pages, v), do: String.to_integer(v)
788✔
88

89
  defp convert_value(:filesize, v) do
90
    {first, _rest} = Integer.parse(v)
788✔
91
    first
788✔
92
  end
93

94
  defp convert_value(:pagesize, v) do
95
    String.split(v, " ")
96
    |> Enum.reduce([], fn part, acc ->
97
      Integer.parse(part)
98
      |> case do
3,152✔
99
        {coord, _rest} -> [coord | acc]
1,576✔
100
        _ -> acc
1,576✔
101
      end
102
    end)
103
    |> case do
788✔
104
      [x, y] -> {x, y}
788✔
UNCOV
105
      _ -> v
×
106
    end
107
  end
108

UNCOV
109
  defp convert_value(_k, "yes"), do: true
×
110
  defp convert_value(_k, "no"), do: false
6,304✔
111
  defp convert_value(_k, "none"), do: nil
788✔
112
  defp convert_value(_k, v), do: v
3,152✔
113
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc