• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

camatcode / basenji / 510183623a1afcec86a81c40dc2c479a483e7ff7

03 Aug 2025 05:27PM UTC coverage: 84.181% (-0.01%) from 84.192%
510183623a1afcec86a81c40dc2c479a483e7ff7

Pull #85

github

camatcode
refactor: Reader behaviour
Pull Request #85: refactor: Reader behaviour

59 of 62 new or added lines in 6 files covered. (95.16%)

13 existing lines in 6 files now uncovered.

1208 of 1435 relevant lines covered (84.18%)

491.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.47
/lib/basenji/reader/pdf_reader.ex
1
defmodule Basenji.Reader.PDFReader do
2
  @moduledoc false
3
  @behaviour Basenji.Reader
4

5
  use Basenji.TelemetryHelpers
6

7
  alias Basenji.Reader
8

9
  @impl Reader
10
  def format, do: :pdf
2✔
11

12
  @impl Reader
13
  def file_extensions, do: ["pdf"]
782✔
14

15
  @impl Reader
16
  def magic_numbers, do: [%{offset: 0, magic: [0x25, 0x50, 0x44, 0x46, 0x2D]}]
780✔
17

18
  @impl Reader
19
  def get_entries(pdf_file_path, _opts \\ []) do
20
    with {:ok, %{pages: pages}} <- get_metadata(pdf_file_path) do
783✔
21
      padding = String.length("#{pages}")
782✔
22

23
      file_entries =
782✔
24
        1..pages
25
        |> Enum.map(fn idx ->
26
          %{file_name: "#{String.pad_leading("#{idx}", padding, "0")}.jpg"}
3,128✔
27
        end)
28

29
      {:ok, %{entries: file_entries}}
30
    end
31
  end
32

33
  def read(pdf_file_path, _opts \\ []) do
34
    meter_duration [:basenji, :process], "read_pdf" do
779✔
NEW
35
      with {:ok, %{entries: file_entries}} <- get_entries(pdf_file_path) do
×
36
        file_entries =
779✔
37
          file_entries
38
          |> Enum.map(fn entry ->
39
            entry
40
            |> Map.put(:stream_fun, fn -> get_entry_stream!(pdf_file_path, entry) end)
3,116✔
41
          end)
42

43
        {:ok, %{entries: file_entries}}
44
      end
45
    end
46
  end
47

48
  @impl Reader
49
  def get_entry_stream!(pdf_file_path, entry) do
50
    file_name = entry[:file_name]
44✔
51
    {page_num, _rest} = Integer.parse(file_name)
44✔
52

53
    Reader.create_resource(fn ->
44✔
54
      with {:ok, output} <- Reader.exec("pdftoppm", ["-f", "#{page_num}", "-singlefile", "-jpeg", "-q", pdf_file_path]) do
44✔
55
        [output |> :binary.bin_to_list()]
56
      end
57
    end)
58
  end
59

60
  @impl Reader
61
  def close(_), do: :ok
3✔
62

63
  defp get_metadata(pdf_file_path) do
64
    with {:ok, output} <- Reader.exec("pdfinfo", ["-isodates", pdf_file_path]) do
783✔
65
      metadata =
782✔
66
        String.split(output, "\n")
67
        |> Map.new(fn line ->
68
          String.split(line, ":", parts: 2)
69
          |> case do
14,076✔
70
            [k, v] -> to_metadata(k, v)
14,076✔
UNCOV
71
            [v] -> to_metadata("unknown_#{System.monotonic_time()}", v)
×
72
          end
73
        end)
74

75
      {:ok, metadata}
76
    end
77
  end
78

79
  defp to_metadata(k, v) do
80
    k = k |> String.trim() |> ProperCase.snake_case() |> String.to_atom()
14,076✔
81
    v = convert_value(k, v |> String.trim())
14,076✔
82
    {k, v}
83
  end
84

85
  defp convert_value(:creation_date, v), do: DateTimeParser.parse!(v)
782✔
86
  defp convert_value(:mod_date, v), do: DateTimeParser.parse!(v)
782✔
87
  defp convert_value(:pages, v), do: String.to_integer(v)
782✔
88

89
  defp convert_value(:filesize, v) do
90
    {first, _rest} = Integer.parse(v)
782✔
91
    first
782✔
92
  end
93

94
  defp convert_value(:pagesize, v) do
95
    String.split(v, " ")
96
    |> Enum.reduce([], fn part, acc ->
97
      Integer.parse(part)
98
      |> case do
3,128✔
99
        {coord, _rest} -> [coord | acc]
1,564✔
100
        _ -> acc
1,564✔
101
      end
102
    end)
103
    |> case do
782✔
104
      [x, y] -> {x, y}
782✔
UNCOV
105
      _ -> v
×
106
    end
107
  end
108

UNCOV
109
  defp convert_value(_k, "yes"), do: true
×
110
  defp convert_value(_k, "no"), do: false
6,256✔
111
  defp convert_value(_k, "none"), do: nil
782✔
112
  defp convert_value(_k, v), do: v
3,128✔
113
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc