• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

xu-chris / toon_ex / c762e615afe92857f8aa856472d1eaf494c1d634

21 Jan 2026 06:56AM UTC coverage: 63.791% (+8.8%) from 54.957%
c762e615afe92857f8aa856472d1eaf494c1d634

push

github

web-flow
refactor: improve code quality and test coverage (#10)

Encoder refactoring:
- Refactor arrays.ex with pattern-matched clauses for composability
- Refactor objects.ex with pattern-matched encode_regular_entry/encode_folded_value
- Rename predicates to follow Elixir conventions (tabular_array?, list_array?)
- Extract reusable helpers (apply_marker, build_*_line functions)

Decoder refactoring:
- Remove Process dictionary anti-pattern from structural_parser.ex
- Thread metadata explicitly through all parsing functions
- Add key_was_quoted? and add_key_to_metadata helpers
- Clean up parser.ex by removing Process.put/get calls

Test improvements:
- Add decode/options_test.exs with 23 tests (100% coverage)
- Add encode/options_test.exs with 25 tests (100% coverage)
- Expand encoder_test.exs from 4 to 23 tests (65% coverage)
- Add test fixtures: UserWithOnly, StructWithoutEncoder
- Strengthen assertions with specific error.value checks

Total: 511 tests, 83.5% coverage, all quality checks pass.

Co-authored-by: Chris Xu <git@chrisxu.me>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>

248 of 418 new or added lines in 6 files covered. (59.33%)

8 existing lines in 4 files now uncovered.

673 of 1055 relevant lines covered (63.79%)

18.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.46
/lib/toon/decode/structural_parser.ex
1
defmodule Toon.Decode.StructuralParser do
2
  @moduledoc """
3
  Structural parser for TOON format that handles indentation-based nesting.
4

5
  This parser processes TOON input by analyzing indentation levels and building
6
  a hierarchical structure from the flat text representation.
7
  """
8

9
  alias Toon.Decode.Parser
10
  alias Toon.DecodeError
11

12
  @invalid_escape_pattern ~r/\\/
13

14
  # Module-level regex patterns for structural matching
15
  @tabular_header_pattern ~r/^(?:"[^"]*"|[\w.]+)\[\d+.*\]\{[^}]+\}:$/
16
  @list_header_pattern ~r/^(?:"[^"]*"|[\w.]+)\[\d+.*\]:$/
17
  @inline_array_pattern ~r/^\[.*?\]: .+/
18
  @list_array_header_pattern ~r/^\[\d+[^\]]*\]:$/
19
  @field_pattern ~r/^[\w"]+\s*:/
20
  @tabular_header_regex ~r/^((?:"[^"]*"|[\w.]+))(\[\d+.*\])\{([^}]+)\}:$/
21
  @list_array_regex ~r/^((?:"[^"]*"|[\w.]+))\[(\d+).*\]:$/
22

23
  @type line_info :: %{
24
          content: String.t(),
25
          indent: non_neg_integer(),
26
          line_number: non_neg_integer(),
27
          original: String.t()
28
        }
29

30
  @type parse_metadata :: %{
31
          quoted_keys: MapSet.t(String.t()),
32
          key_order: list(String.t())
33
        }
34

35
  @doc """
36
  Parses TOON input string into a structured format.
37

38
  Returns a tuple of {result, metadata} where metadata contains quoted_keys and key_order.
39
  """
40
  @spec parse(String.t(), map()) :: {:ok, {term(), parse_metadata()}} | {:error, DecodeError.t()}
41
  def parse(input, opts) when is_binary(input) do
74✔
42
    lines = preprocess_lines(input)
74✔
43

44
    # Validate indentation in strict mode
45
    if opts.strict do
74✔
46
      validate_indentation(lines, opts)
72✔
47
    end
48

49
    # Initialize metadata accumulator
50
    initial_metadata = %{
74✔
51
      quoted_keys: MapSet.new(),
52
      key_order: []
53
    }
54

55
    {result, metadata} =
74✔
56
      case lines do
57
        [] ->
5✔
58
          {%{}, initial_metadata}
59

60
        _ ->
61
          parse_structure(lines, 0, opts, initial_metadata)
69✔
62
      end
63

64
    {:ok, {result, metadata}}
65
  rescue
66
    e in DecodeError ->
×
67
      {:error, e}
68

69
    e ->
×
70
      {:error,
71
       DecodeError.exception(
72
         message: "Parse failed: #{Exception.message(e)}",
×
73
         input: input
74
       )}
75
  end
76

77
  # Preprocess input into line information structures
78
  defp preprocess_lines(input) do
79
    input
80
    |> String.split("\n")
81
    |> Enum.with_index(1)
82
    |> Enum.map(fn {line, line_num} ->
83
      %{
169✔
84
        content: String.trim_leading(line),
85
        indent: calculate_indent(line),
86
        line_number: line_num,
87
        original: line,
88
        is_blank: String.trim(line) == ""
89
      }
90
    end)
91
    # Filter out blank lines at the end
92
    |> Enum.reverse()
93
    |> Enum.drop_while(& &1.is_blank)
78✔
94
    |> Enum.reverse()
74✔
95
  end
96

97
  # Calculate indentation level (number of leading spaces)
98
  defp calculate_indent(line) do
99
    line
100
    |> String.to_charlist()
101
    |> Enum.take_while(&(&1 == ?\s))
393✔
102
    |> length()
169✔
103
  end
104

105
  # Validate indentation in strict mode
106
  defp validate_indentation(lines, opts) do
107
    Enum.each(lines, fn line ->
72✔
108
      # Skip blank lines
109
      unless line.is_blank do
156✔
110
        # Check for tab characters in INDENTATION only (not in content after the key/value starts)
111
        # We need to check the leading whitespace before any content
112
        # Find where content starts (first non-whitespace character)
113
        leading_whitespace =
156✔
114
          line.original
156✔
115
          |> String.to_charlist()
116
          |> Enum.take_while(&(&1 == ?\s or &1 == ?\t))
380✔
117
          |> List.to_string()
118

119
        if String.contains?(leading_whitespace, "\t") do
156✔
120
          raise DecodeError,
×
121
            message: "Tab characters are not allowed in indentation (strict mode)",
122
            input: line.original
×
123
        end
124

125
        # Check if indent is a multiple of indent_size
126
        if line.indent > 0 and rem(line.indent, opts.indent_size) != 0 do
156✔
127
          raise DecodeError,
×
128
            message: "Indentation must be a multiple of #{opts.indent_size} spaces (strict mode)",
×
129
            input: line.original
×
130
        end
131
      end
132
    end)
133
  end
134

135
  # Parse a structure starting from given lines at a specific indent level
136
  defp parse_structure(lines, base_indent, opts, metadata) do
137
    {root_type, _} = detect_root_type(lines)
69✔
138

139
    case root_type do
69✔
140
      :root_array ->
141
        parse_root_array(lines, opts, metadata)
25✔
142

143
      :root_primitive ->
144
        parse_root_primitive(lines, opts, metadata)
16✔
145

146
      :object ->
147
        parse_object_lines(lines, base_indent, opts, metadata)
28✔
148
    end
149
  end
150

151
  # Detect if the root is an array or object or primitive
152
  defp detect_root_type([%{content: content} | rest]) do
153
    cond do
69✔
154
      # Root array header patterns
155
      String.starts_with?(content, "[") ->
25✔
156
        {:root_array, :inline}
157

158
      String.match?(content, ~r/^\[.*\]\{.*\}:/) ->
44✔
159
        {:root_array, :tabular}
160

161
      String.match?(content, ~r/^\[.*\]:/) ->
44✔
162
        {:root_array, :list}
163

164
      # Single line -> check if it's a primitive or key-value
165
      rest == [] ->
44✔
166
        # Check if it looks like a key-value pair by pattern matching
167
        # Match: <key>: <value> or <key>: (empty) where key can include array markers like [N]
168
        # Pattern: (quoted_key|unquoted_key)(optional_array_marker): (space or end of line)
169
        # Quoted keys can contain escaped quotes: "(?:[^"\\]|\\.)*"
170
        # Unquoted keys can include: letters, numbers, _, -, .
171
        if String.match?(content, ~r/^(?:"(?:[^"\\]|\\.)*"|[\w.-]+)(?:\[[^\]]*\])?:(?:\s|$)/) do
24✔
172
          # It's a key-value pair -> object
173
          {:object, nil}
174
        else
175
          # Not a valid key-value pair -> treat as root primitive
176
          {:root_primitive, nil}
177
        end
178

179
      true ->
20✔
180
        {:object, nil}
181
    end
182
  end
183

184
  # Parse root primitive value (single value without key)
185
  defp parse_root_primitive([%{content: content}], _opts, metadata) do
16✔
186
    # For root primitives, we parse directly without parser combinator
187
    # This handles quoted strings with escapes correctly
188
    {parse_value(content), metadata}
189
  end
190

191
  # Parse root-level array
192
  defp parse_root_array([%{content: header_line} = line_info | rest], opts, metadata) do
193
    case Parser.parse_line(header_line) do
25✔
194
      {:ok, [result], "", _, _, _} ->
195
        # Handle inline array
196
        case result do
×
197
          {key, value} when is_list(value) ->
198
            # Track metadata from parsed key-value
NEW
199
            was_quoted = key_was_quoted?(header_line)
×
NEW
200
            updated_metadata = add_key_to_metadata(key, was_quoted, metadata)
×
201
            {value, updated_metadata}
202

203
          _ ->
204
            raise DecodeError, message: "Invalid root array format", input: header_line
×
205
        end
206

207
      {:error, _reason, _, _, _, _} ->
208
        # Try parsing as tabular or list format
209
        parse_complex_root_array(line_info, rest, opts, metadata)
25✔
210
    end
211
  end
212

213
  defp parse_complex_root_array(%{content: header}, rest, opts, metadata) do
214
    cond do
25✔
215
      # Inline array with delimiter marker: [3\t]: ... or [3|]: ... or [3]: ...
216
      String.match?(header, ~r/^\[\d+[^\]]*\]: /) ->
6✔
217
        {parse_root_inline_array(header, opts), metadata}
218

219
      # Tabular array: [N]{fields}:
220
      String.match?(header, ~r/^\[\d+[^\]]*\]\{[^}]+\}:$/) ->
19✔
221
        {parse_tabular_array_data(header, rest, 0, opts), metadata}
222

223
      # List array: [N]:
224
      String.match?(header, ~r/^\[\d+[^\]]*\]:$/) ->
14✔
225
        {parse_list_array_items(rest, 0, opts), metadata}
226

227
      true ->
×
228
        raise DecodeError, message: "Invalid root array header", input: header
×
229
    end
230
  end
231

232
  # Parse root inline array from header line
233
  defp parse_root_inline_array(header, _opts) do
234
    # Extract everything after ": "
235
    case String.split(header, ": ", parts: 2) do
6✔
236
      [array_marker, values_str] ->
237
        # Extract declared length from [N]
238
        declared_length =
6✔
239
          case Regex.run(~r/\[(\d+)/, array_marker) do
240
            [_, length_str] -> String.to_integer(length_str)
6✔
241
            _ -> nil
×
242
          end
243

244
        delimiter = extract_delimiter(array_marker)
6✔
245
        values = parse_delimited_values(values_str, delimiter)
6✔
246

247
        # Validate length if declared
248
        if declared_length != nil and length(values) != declared_length do
6✔
249
          raise DecodeError,
×
250
            message: "Array length mismatch: declared #{declared_length}, got #{length(values)}",
×
251
            input: header
252
        end
253

254
        values
6✔
255

256
      _ ->
257
        raise DecodeError, message: "Invalid root inline array", input: header
×
258
    end
259
  end
260

261
  # Helper function to build map with appropriate key type
262
  defp build_map_with_keys(entries, opts) do
263
    case opts.keys do
54✔
264
      :strings -> Map.new(entries)
54✔
NEW
265
      :atoms -> Map.new(entries, fn {k, v} -> {String.to_atom(k), v} end)
×
NEW
266
      :atoms! -> Map.new(entries, fn {k, v} -> {String.to_existing_atom(k), v} end)
×
267
    end
268
  end
269

270
  defp put_key(map, key, value, opts) do
271
    case opts.keys do
2✔
272
      :strings -> Map.put(map, key, value)
2✔
NEW
273
      :atoms -> Map.put(map, String.to_atom(key), value)
×
NEW
274
      :atoms! -> Map.put(map, String.to_existing_atom(key), value)
×
275
    end
276
  end
277

NEW
278
  defp empty_map(_opts), do: %{}
×
279

280
  # Parse object from lines
281
  defp parse_object_lines(lines, base_indent, opts, metadata) do
282
    {entries, _remaining, updated_metadata} = parse_entries(lines, base_indent, opts, metadata)
42✔
283
    {build_map_with_keys(entries, opts), updated_metadata}
284
  end
285

286
  # Parse entries at a specific indentation level
287
  defp parse_entries([], _base_indent, _opts, metadata), do: {[], [], metadata}
42✔
288

289
  defp parse_entries([line | rest] = lines, base_indent, opts, metadata) do
290
    cond do
66✔
291
      # Skip blank lines (only at root level or when not strict)
292
      line.is_blank ->
66✔
293
        # When strict, blank lines in nested content should be rejected by take_nested_lines
NEW
294
        parse_entries(rest, base_indent, opts, metadata)
×
295

296
      # Skip lines that are less indented (parent level)
297
      line.indent < base_indent ->
66✔
NEW
298
        {[], lines, metadata}
×
299

300
      # Skip lines that are more indented (will be handled by parent)
301
      line.indent > base_indent ->
66✔
NEW
302
        {[], lines, metadata}
×
303

304
      # Process line at current level
305
      true ->
66✔
306
        case parse_entry_line(line, rest, base_indent, opts, metadata) do
66✔
307
          {:entry, key, value, remaining, updated_metadata} ->
308
            {entries, final_remaining, final_metadata} =
66✔
309
              parse_entries(remaining, base_indent, opts, updated_metadata)
310

311
            {[{key, value} | entries], final_remaining, final_metadata}
66✔
312

313
          {:skip, remaining, updated_metadata} ->
NEW
314
            parse_entries(remaining, base_indent, opts, updated_metadata)
×
315
        end
316
    end
317
  end
318

319
  # Parse a single entry line
320
  defp parse_entry_line(%{content: content} = line_info, rest, base_indent, opts, metadata) do
321
    # Track if key was quoted by checking if line starts with quote
322
    was_quoted = key_was_quoted?(content)
66✔
323

324
    case Parser.parse_line(content) do
66✔
325
      {:ok, [result], "", _, _, _} ->
326
        case result do
54✔
327
          {key, value} when is_list(value) ->
328
            updated_meta = add_key_to_metadata(key, was_quoted, metadata)
6✔
329

330
            # Check if this is an empty array with nested content (list or tabular format)
331
            # Pattern like items[3]: with indented lines following
332
            if value == [] and peek_next_indent(rest) > base_indent do
6✔
333
              # This is a list/tabular array header, not an inline array
334
              # Fall through to special line handling
335
              case handle_special_line(line_info, rest, base_indent, opts, updated_meta) do
1✔
336
                {:skip, _, updated_meta2} ->
337
                  # If special line handling doesn't work, treat as empty array
NEW
338
                  {:entry, key, [], rest, updated_meta2}
×
339

340
                result ->
341
                  result
1✔
342
              end
343
            else
344
              # Inline array - ALWAYS re-parse to respect leading zeros and other edge cases
345
              # The Parser module may have already parsed numbers incorrectly
346
              # Extract array marker from content to get delimiter
347
              corrected_value =
5✔
348
                case Regex.run(~r/^[\w"]+(\[(\d+)[^\]]*\]):/, content) do
349
                  [_, array_marker, length_str] ->
350
                    declared_length = String.to_integer(length_str)
4✔
351
                    delimiter = extract_delimiter(array_marker)
4✔
352
                    # Re-parse the values with correct delimiter
353
                    case String.split(content, ": ", parts: 2) do
4✔
354
                      [_, values_str] ->
355
                        values = parse_delimited_values(values_str, delimiter)
4✔
356

357
                        # Validate length
358
                        if length(values) != declared_length do
4✔
359
                          raise DecodeError,
×
360
                            message:
361
                              "Array length mismatch: declared #{declared_length}, got #{length(values)}",
×
362
                            input: content
363
                        end
364

365
                        values
4✔
366

367
                      _ ->
368
                        value
×
369
                    end
370

371
                  _ ->
372
                    value
1✔
373
                end
374

375
              {:entry, key, corrected_value, rest, updated_meta}
5✔
376
            end
377

378
          {key, value} when is_map(value) ->
NEW
379
            updated_meta = add_key_to_metadata(key, was_quoted, metadata)
×
380
            # Simple value, not nested
NEW
381
            {:entry, key, value, rest, updated_meta}
×
382

383
          {key, value} ->
384
            updated_meta = add_key_to_metadata(key, was_quoted, metadata)
48✔
385

386
            # Check if next lines are nested
387
            case peek_next_indent(rest) do
48✔
388
              indent when indent > base_indent ->
389
                # Has nested content
NEW
390
                {nested_value, nested_meta} =
×
391
                  parse_nested_value(key, rest, base_indent, opts, updated_meta)
392

393
                {remaining_lines, _} = skip_nested_lines(rest, base_indent)
×
NEW
394
                {:entry, key, nested_value, remaining_lines, nested_meta}
×
395

396
              _ ->
397
                # Simple primitive value - re-parse the entire value to respect special cases
398
                # This handles: leading zeros, commas in strings, etc.
399
                corrected_value =
48✔
400
                  case String.split(content, ": ", parts: 2) do
401
                    [_, value_str] ->
402
                      # Re-parse the entire value string
403
                      parse_value(String.trim(value_str))
48✔
404

405
                    _ ->
406
                      value
×
407
                  end
408

409
                {:entry, key, corrected_value, rest, updated_meta}
48✔
410
            end
411
        end
412

413
      {:ok, [parsed_result], rest_content, _, _, _} when rest_content != "" ->
414
        # Parser didn't consume the entire line - re-parse the value manually
415
        # This handles cases like "note: a,b" where the parser stops at the comma
416
        case parsed_result do
1✔
417
          {key, _partial_value} ->
418
            updated_meta = add_key_to_metadata(key, was_quoted, metadata)
1✔
419

420
            # Re-extract the full value from the original content
421
            case String.split(content, ": ", parts: 2) do
1✔
422
              [_, value_str] ->
423
                full_value = parse_value(String.trim(value_str))
1✔
424
                {:entry, key, full_value, rest, updated_meta}
1✔
425

426
              _ ->
NEW
427
                {:skip, rest, metadata}
×
428
            end
429

430
          _ ->
NEW
431
            {:skip, rest, metadata}
×
432
        end
433

434
      {:ok, _, _, _, _, _} ->
435
        # Unexpected parse result
NEW
436
        {:skip, rest, metadata}
×
437

438
      {:error, reason, _, _, _, _} ->
439
        # Try to handle special cases like array headers
440
        # If it still fails, raise an error
441
        case handle_special_line(line_info, rest, base_indent, opts, metadata) do
11✔
442
          {:skip, _, _meta} ->
443
            raise DecodeError,
×
444
              message: "Failed to parse line: #{reason}",
×
445
              input: content
446

447
          result ->
448
            result
11✔
449
        end
450
    end
451
  end
452

453
  # Pattern matching helpers for handle_special_line
454
  defp tabular_array_header?(content), do: String.match?(content, @tabular_header_pattern)
21✔
455
  defp list_array_header?(content), do: String.match?(content, @list_header_pattern)
20✔
456

457
  defp nested_object_header?(content) do
458
    String.ends_with?(content, ":") and not String.contains?(content, " ")
10✔
459
  end
460

461
  # Handle special line formats (array headers, etc.)
462
  defp handle_special_line(%{content: content} = line_info, rest, base_indent, opts, metadata) do
463
    cond do
12✔
464
      tabular_array_header?(content) ->
465
        parse_tabular_array_entry(line_info, rest, base_indent, opts, metadata)
1✔
466

467
      list_array_header?(content) ->
11✔
468
        parse_list_array_entry(line_info, rest, base_indent, opts, metadata)
1✔
469

470
      nested_object_header?(content) ->
10✔
471
        parse_nested_object_entry(content, rest, base_indent, opts, metadata)
10✔
472

NEW
473
      true ->
×
NEW
474
        {:skip, rest, metadata}
×
475
    end
476
  end
477

478
  defp parse_tabular_array_entry(line_info, rest, base_indent, opts, metadata) do
479
    {{key, array_value}, updated_meta} =
1✔
480
      parse_tabular_array(line_info, rest, base_indent, opts, metadata)
481

482
    {remaining, _} = skip_nested_lines(rest, base_indent)
1✔
483
    {:entry, key, array_value, remaining, updated_meta}
1✔
484
  end
485

486
  defp parse_list_array_entry(line_info, rest, base_indent, opts, metadata) do
487
    {{key, array_value}, updated_meta} =
1✔
488
      parse_list_array(line_info, rest, base_indent, opts, metadata)
489

490
    {remaining, _} = skip_nested_lines(rest, base_indent)
1✔
491
    {:entry, key, array_value, remaining, updated_meta}
1✔
492
  end
493

494
  defp parse_nested_object_entry(content, rest, base_indent, opts, metadata) do
495
    key = content |> String.trim_trailing(":") |> unquote_key()
10✔
496
    was_quoted = key_was_quoted?(content)
10✔
497
    updated_meta = add_key_to_metadata(key, was_quoted, metadata)
10✔
498

499
    case peek_next_indent(rest) do
10✔
500
      indent when indent > base_indent ->
501
        {nested_value, nested_meta} = parse_nested_object(rest, base_indent, opts, updated_meta)
10✔
502
        {remaining, _} = skip_nested_lines(rest, base_indent)
10✔
503
        {:entry, key, nested_value, remaining, nested_meta}
10✔
504

505
      _ ->
NEW
506
        {:entry, key, %{}, rest, updated_meta}
×
507
    end
508
  end
509

510
  # Parse nested value (object or array)
511
  defp parse_nested_value(_key, lines, base_indent, opts, metadata) do
512
    nested_lines = take_nested_lines(lines, base_indent)
×
513
    # Use the actual indent of the first nested line, not base_indent + indent_size
514
    # This allows non-multiple indentation when strict=false
515
    actual_indent = get_first_content_indent(nested_lines)
×
NEW
516
    parse_object_lines(nested_lines, actual_indent, opts, metadata)
×
517
  end
518

519
  # Parse nested object
520
  defp parse_nested_object(lines, base_indent, opts, metadata) do
521
    nested_lines = take_nested_lines(lines, base_indent)
10✔
522
    # Use the actual indent of the first nested line, not base_indent + indent_size
523
    actual_indent = get_first_content_indent(nested_lines)
10✔
524
    parse_object_lines(nested_lines, actual_indent, opts, metadata)
10✔
525
  end
526

527
  # Parse tabular array
528
  defp parse_tabular_array(%{content: header}, rest, base_indent, opts, metadata) do
529
    # Extract key and fields from header (with optional # length marker and quoted key)
530
    case Regex.run(~r/^((?:"[^"]*"|[\w.]+))(\[\d+.*\])\{([^}]+)\}:$/, header) do
1✔
531
      [_, raw_key, array_marker, fields_str] ->
532
        key = unquote_key(raw_key)
1✔
533
        was_quoted = key_was_quoted?(header)
1✔
534
        updated_meta = add_key_to_metadata(key, was_quoted, metadata)
1✔
535

536
        delimiter = extract_delimiter(array_marker)
1✔
537
        fields = parse_fields(fields_str, delimiter)
1✔
538

539
        # Get data rows
540
        data_rows = take_nested_lines(rest, base_indent)
1✔
541
        array_data = parse_tabular_data_rows(data_rows, fields, delimiter, opts)
1✔
542

543
        {{key, array_data}, updated_meta}
544

545
      nil ->
546
        raise DecodeError, message: "Invalid tabular array header", input: header
×
547
    end
548
  end
549

550
  # Parse tabular array data rows
551
  defp parse_tabular_data_rows(lines, fields, delimiter, opts) do
552
    # Filter out blank lines (validate in strict mode)
553
    non_blank_lines =
6✔
554
      Enum.reject(lines, fn line ->
555
        if line.is_blank do
12✔
556
          if opts.strict do
×
557
            raise DecodeError,
×
558
              message: "Blank lines are not allowed inside arrays in strict mode",
559
              input: line.original
×
560
          end
561

562
          true
563
        else
564
          false
565
        end
566
      end)
567

568
    Enum.map(non_blank_lines, fn %{content: row_content} ->
6✔
569
      values = parse_delimited_values(row_content, delimiter)
12✔
570

571
      if length(values) != length(fields) do
12✔
572
        raise DecodeError,
×
573
          message: "Row value count mismatch: expected #{length(fields)}, got #{length(values)}",
×
574
          input: row_content
575
      end
576

577
      # Build object from fields and values using helper
578
      entries = Enum.zip(fields, values)
12✔
579
      build_map_with_keys(entries, opts)
12✔
580
    end)
581
  end
582

583
  # Parse tabular array data (for root arrays)
584
  defp parse_tabular_array_data(header, rest, base_indent, opts) do
585
    case Regex.run(~r/^\[((\d+))([^\]]*)\]\{([^}]+)\}:$/, header) do
5✔
586
      [_, _full_length, length_str, delimiter_marker, fields_str] ->
587
        declared_length = String.to_integer(length_str)
5✔
588
        delimiter = extract_delimiter("[#{delimiter_marker}]")
5✔
589
        fields = parse_fields(fields_str, delimiter)
5✔
590
        data_rows = take_nested_lines(rest, base_indent)
5✔
591

592
        # Validate row count
593
        if length(data_rows) != declared_length do
5✔
594
          raise DecodeError,
×
595
            message:
596
              "Tabular array row count mismatch: declared #{declared_length}, got #{length(data_rows)}",
×
597
            input: header
598
        end
599

600
        parse_tabular_data_rows(data_rows, fields, delimiter, opts)
5✔
601

602
      nil ->
603
        raise DecodeError, message: "Invalid tabular array header", input: header
×
604
    end
605
  end
606

607
  # Parse list array
608
  defp parse_list_array(%{content: header}, rest, base_indent, opts, metadata) do
609
    case Regex.run(~r/^((?:"[^"]*"|[\w.]+))(\[\d+[^\]]*\]):$/, header) do
1✔
610
      [_, raw_key, array_marker] ->
611
        length_str =
1✔
612
          case Regex.run(~r/\[(\d+)/, array_marker) do
613
            [_, len] -> len
1✔
NEW
614
            nil -> "0"
×
615
          end
616

617
        declared_length = String.to_integer(length_str)
1✔
618
        key = unquote_key(raw_key)
1✔
619
        was_quoted = key_was_quoted?(header)
1✔
620
        updated_meta = add_key_to_metadata(key, was_quoted, metadata)
1✔
621

622
        # Extract delimiter from array marker and pass through opts
623
        delimiter = extract_delimiter(array_marker)
1✔
624
        opts_with_delimiter = Map.put(opts, :delimiter, delimiter)
1✔
625

626
        items = parse_list_array_items(rest, base_indent, opts_with_delimiter)
1✔
627

628
        # Validate length
629
        if length(items) != declared_length do
1✔
630
          raise DecodeError,
×
631
            message: "Array length mismatch: declared #{declared_length}, got #{length(items)}",
×
632
            input: header
633
        end
634

635
        {{key, items}, updated_meta}
636

637
      nil ->
638
        raise DecodeError, message: "Invalid list array header", input: header
×
639
    end
640
  end
641

642
  # Parse list array items
643
  defp parse_list_array_items(lines, base_indent, opts) do
644
    list_lines = take_nested_lines(lines, base_indent)
15✔
645
    # Use the actual indent of the first list item, not base_indent + indent_size
646
    actual_indent = get_first_content_indent(list_lines)
15✔
647
    parse_list_items(list_lines, actual_indent, opts, [])
15✔
648
  end
649

650
  # Parse individual list items
651
  defp parse_list_items([], _expected_indent, _opts, acc), do: Enum.reverse(acc)
30✔
652

653
  defp parse_list_items([line | rest], expected_indent, opts, acc) do
654
    cond do
41✔
655
      # Skip blank lines (validate in strict mode if within array content)
656
      line.is_blank ->
41✔
657
        if opts.strict do
×
658
          raise DecodeError,
×
659
            message: "Blank lines are not allowed inside arrays in strict mode",
660
            input: line.original
×
661
        else
662
          parse_list_items(rest, expected_indent, opts, acc)
×
663
        end
664

665
      # Inline array item with values on same line: - [N]: val1,val2
666
      # (must have content after ": ", otherwise it's a list-format array header)
667
      String.match?(line.content, ~r/^\s*- \[.*\]: .+/) ->
41✔
668
        {item, remaining} = parse_inline_array_item(line, rest, expected_indent, opts)
8✔
669
        parse_list_items(remaining, expected_indent, opts, [item | acc])
8✔
670

671
      # List item marker (with space "- " or just "-")
672
      String.starts_with?(String.trim_leading(line.content), "-") ->
33✔
673
        {item, remaining} = parse_list_item(line, rest, expected_indent, opts)
33✔
674
        parse_list_items(remaining, expected_indent, opts, [item | acc])
33✔
675

676
      true ->
×
677
        parse_list_items(rest, expected_indent, opts, acc)
×
678
    end
679
  end
680

681
  # Pattern matching helpers for list item parsing
682
  defp remove_list_marker(content) do
683
    content
684
    |> String.trim_leading()
685
    |> String.replace_prefix("- ", "")
686
    |> String.replace_prefix("-", "")
33✔
687
  end
688

689
  defp inline_array_with_values?(str), do: String.match?(str, @inline_array_pattern)
33✔
690
  defp list_array_header_only?(str), do: String.match?(str, @list_array_header_pattern)
33✔
691

692
  # Parse a single list item
693
  defp parse_list_item(%{content: content} = line, rest, expected_indent, opts) do
694
    trimmed = remove_list_marker(content)
33✔
695
    route_list_item(trimmed, rest, line, expected_indent, opts)
33✔
696
  end
697

NEW
698
  defp route_list_item("", rest, _line, _expected_indent, _opts), do: {%{}, rest}
×
699

700
  defp route_list_item(trimmed, rest, line, expected_indent, opts) do
701
    trimmed_stripped = String.trim(trimmed)
33✔
702

703
    cond do
33✔
NEW
704
      trimmed_stripped == "" ->
×
705
        {%{}, rest}
706

707
      inline_array_with_values?(trimmed) ->
33✔
UNCOV
708
        parse_inline_array_from_line(trimmed, rest)
×
709

710
      list_array_header_only?(trimmed) ->
33✔
711
        parse_nested_list_array(trimmed, rest, line, expected_indent, opts)
24✔
712

713
      tabular_array_header?(trimmed) ->
9✔
UNCOV
714
        parse_list_item_with_array(trimmed, rest, line, expected_indent, opts, :tabular)
×
715

716
      list_array_header?(trimmed) ->
9✔
UNCOV
717
        parse_list_item_with_array(trimmed, rest, line, expected_indent, opts, :list)
×
718

719
      true ->
9✔
720
        parse_list_item_normal(trimmed, rest, line, expected_indent, opts)
9✔
721
    end
722
  end
723

724
  # Normal list item parsing (extracted to helper)
725
  defp parse_list_item_normal(trimmed, rest, line, expected_indent, opts) do
726
    delimiter = Map.get(opts, :delimiter, ",")
9✔
727

728
    case Parser.parse_line(trimmed) do
9✔
729
      {:ok, [result], "", _, _, _} ->
NEW
730
        handle_complete_parse(result, trimmed, rest, line, expected_indent, opts)
×
731

732
      {:ok, [{key, partial_value}], remaining_input, _, _, _}
733
      when is_binary(remaining_input) and remaining_input != "" ->
NEW
734
        handle_partial_parse(
×
735
          key,
736
          partial_value,
737
          remaining_input,
738
          delimiter,
739
          rest,
740
          line,
741
          expected_indent,
742
          opts
743
        )
744

745
      {:error, _, _, _, _, _} ->
746
        handle_parse_error(trimmed, rest, expected_indent, opts)
9✔
747
    end
748
  end
749

750
  # Handle case when parser fully consumed input
751
  defp handle_complete_parse(result, trimmed, rest, line, expected_indent, opts) do
NEW
752
    case result do
×
753
      {_key, _value} ->
754
        # Object item - collect all fields including continuation lines
NEW
755
        continuation_lines = take_item_lines(rest, expected_indent)
×
756

NEW
757
        item_indent =
×
758
          if length(continuation_lines) > 0 do
NEW
759
            continuation_lines |> Enum.map(& &1.indent) |> Enum.min()
×
760
          else
NEW
761
            line.indent
×
762
          end
763

NEW
764
        item_lines = [%{line | content: trimmed, indent: item_indent} | continuation_lines]
×
765
        # List items don't need metadata tracking (not top-level)
NEW
766
        empty_metadata = %{quoted_keys: MapSet.new(), key_order: []}
×
NEW
767
        {object, _} = parse_object_lines(item_lines, item_indent, opts, empty_metadata)
×
NEW
768
        remaining = Enum.drop(rest, length(continuation_lines))
×
769
        {object, remaining}
770

NEW
771
      value ->
×
772
        # Primitive item
773
        {value, rest}
774
    end
775
  end
776

777
  # Handle case when parser has remaining input
778
  defp handle_partial_parse(
779
         key,
780
         partial_value,
781
         remaining_input,
782
         delimiter,
783
         rest,
784
         line,
785
         expected_indent,
786
         opts
787
       ) do
788
    # If delimiter is NOT comma but remaining starts with comma, the value has commas
NEW
789
    if delimiter != "," and String.starts_with?(remaining_input, ",") do
×
790
      # Re-parse: the full value is partial_value + remaining_input
NEW
791
      full_value = parse_value(to_string(partial_value) <> remaining_input)
×
792

NEW
793
      continuation_lines = take_item_lines(rest, expected_indent)
×
794

NEW
795
      item_indent =
×
796
        if length(continuation_lines) > 0 do
NEW
797
          continuation_lines |> Enum.map(& &1.indent) |> Enum.min()
×
798
        else
NEW
799
          line.indent
×
800
        end
801

NEW
802
      adjusted_content = "#{key}: #{full_value}"
×
NEW
803
      item_lines = [%{line | content: adjusted_content, indent: item_indent} | continuation_lines]
×
804
      # List items don't need metadata tracking (not top-level)
NEW
805
      empty_metadata = %{quoted_keys: MapSet.new(), key_order: []}
×
NEW
806
      {object, _} = parse_object_lines(item_lines, item_indent, opts, empty_metadata)
×
NEW
807
      remaining = Enum.drop(rest, length(continuation_lines))
×
808
      {object, remaining}
809
    else
NEW
810
      raise DecodeError,
×
NEW
811
        message: "Parse failed: unexpected remaining input '#{remaining_input}'",
×
812
        reason: :parse_error
813
    end
814
  end
815

816
  # Handle case when parser failed
817
  defp handle_parse_error(trimmed, rest, expected_indent, opts) do
818
    # Check if this is a key-only line (e.g., "data:") with nested content
819
    if String.ends_with?(trimmed, ":") and not String.contains?(trimmed, " ") do
9✔
820
      next_indent = peek_next_indent(rest)
2✔
821

822
      if next_indent > expected_indent do
2✔
823
        parse_nested_key_with_content(trimmed, rest, next_indent, expected_indent, opts)
2✔
824
      else
825
        # No nested content, treat as primitive value
826
        {parse_value(trimmed), rest}
827
      end
828
    else
829
      # Primitive value without key - parse as standalone value
830
      {parse_value(trimmed), rest}
831
    end
832
  end
833

834
  # Helper to drop lines at a certain level
835
  defp drop_lines_at_level(lines, min_indent) do
836
    Enum.drop_while(lines, fn line -> !line.is_blank and line.indent >= min_indent end)
2✔
837
  end
838

839
  # Helper to build object with nested value
840
  defp build_object_with_nested(key, nested_value, [], opts) do
NEW
841
    put_key(empty_map(opts), key, nested_value, opts)
×
842
  end
843

844
  defp build_object_with_nested(key, nested_value, more_fields, opts) do
845
    field_indent = more_fields |> Enum.map(& &1.indent) |> Enum.min()
2✔
846
    empty_metadata = %{quoted_keys: MapSet.new(), key_order: []}
2✔
847
    {remaining_object, _} = parse_object_lines(more_fields, field_indent, opts, empty_metadata)
2✔
848
    put_key(remaining_object, key, nested_value, opts)
2✔
849
  end
850

851
  # Parse a key with nested content
852
  defp parse_nested_key_with_content(trimmed, rest, next_indent, expected_indent, opts) do
853
    key = trimmed |> String.trim_trailing(":") |> unquote_key()
2✔
854

855
    # Take lines at the nested level
856
    nested_lines = take_lines_at_level(rest, next_indent)
2✔
857
    empty_metadata = %{quoted_keys: MapSet.new(), key_order: []}
2✔
858
    {nested_value, _} = parse_object_lines(nested_lines, next_indent, opts, empty_metadata)
2✔
859

860
    # Skip consumed nested lines
861
    remaining_after_nested = drop_lines_at_level(rest, next_indent)
2✔
862

863
    # Take remaining fields at the same level
864
    more_fields = take_item_lines(remaining_after_nested, expected_indent)
2✔
865

866
    object = build_object_with_nested(key, nested_value, more_fields, opts)
2✔
867

868
    final_remaining =
2✔
869
      if more_fields == [],
NEW
870
        do: remaining_after_nested,
×
871
        else: Enum.drop(remaining_after_nested, length(more_fields))
2✔
872

873
    {object, final_remaining}
874
  end
875

876
  # Helper to get nested indent for list arrays
877
  defp get_nested_indent([], expected_indent, opts),
NEW
878
    do: expected_indent + Map.get(opts, :indent_size, 2)
×
879

880
  defp get_nested_indent(lines, _expected_indent, _opts),
NEW
881
    do: lines |> Enum.map(& &1.indent) |> Enum.min()
×
882

883
  # Helper to parse remaining fields in list item
NEW
884
  defp parse_remaining_fields([], _opts), do: empty_map(nil)
×
885

886
  defp parse_remaining_fields(fields, opts) do
NEW
887
    field_indent = fields |> Enum.map(& &1.indent) |> Enum.min()
×
NEW
888
    empty_metadata = %{quoted_keys: MapSet.new(), key_order: []}
×
NEW
889
    {result, _} = parse_object_lines(fields, field_indent, opts, empty_metadata)
×
NEW
890
    result
×
891
  end
892

893
  # Parse array from tabular header
894
  defp parse_array_from_header(trimmed, rest, expected_indent, opts, :tabular) do
NEW
895
    case Regex.run(@tabular_header_regex, trimmed) do
×
896
      [_, raw_key, array_marker, fields_str] ->
NEW
897
        key = unquote_key(raw_key)
×
NEW
898
        delimiter = extract_delimiter(array_marker)
×
NEW
899
        fields = parse_fields(fields_str, delimiter)
×
NEW
900
        array_lines = take_array_data_lines(rest, expected_indent, opts)
×
901
        {key, parse_tabular_data_rows(array_lines, fields, delimiter, opts)}
902

903
      nil ->
NEW
904
        raise DecodeError, message: "Invalid tabular array in list item", input: trimmed
×
905
    end
906
  end
907

908
  # Parse array from list header
909
  defp parse_array_from_header(trimmed, rest, expected_indent, opts, :list) do
NEW
910
    case Regex.run(@list_array_regex, trimmed) do
×
911
      [_, raw_key, _length_str] ->
NEW
912
        key = unquote_key(raw_key)
×
NEW
913
        array_lines = take_array_data_lines(rest, expected_indent, opts)
×
NEW
914
        nested_indent = get_nested_indent(array_lines, expected_indent, opts)
×
915
        {key, parse_list_items(array_lines, nested_indent, opts, [])}
916

917
      nil ->
NEW
918
        raise DecodeError, message: "Invalid list array in list item", input: trimmed
×
919
    end
920
  end
921

922
  # Parse list item that starts with an array (tabular or list format)
923
  defp parse_list_item_with_array(trimmed, rest, _line, expected_indent, opts, array_type) do
NEW
924
    {key, array_value} = parse_array_from_header(trimmed, rest, expected_indent, opts, array_type)
×
NEW
925
    {rest_after_array, _} = skip_array_data_lines(rest, expected_indent)
×
NEW
926
    remaining_fields = take_item_lines(rest_after_array, expected_indent)
×
927

NEW
928
    remaining_object = parse_remaining_fields(remaining_fields, opts)
×
NEW
929
    object = put_key(remaining_object, key, array_value, opts)
×
930

UNCOV
931
    {remaining, _} = skip_item_lines(rest, expected_indent)
×
932
    {object, remaining}
933
  end
934

935
  # Take lines for array data (until we hit a non-array line at same level or higher)
936
  defp take_array_data_lines(lines, base_indent, opts) do
937
    # For tabular arrays: take lines at depth > base_indent that DON'T look like fields
938
    # For list arrays: take all lines > base_indent (list items and their nested content)
939

940
    # First, check if the first non-blank line starts with "-" (list array) or not (tabular)
941
    first_content = Enum.find(lines, fn line -> !line.is_blank end)
×
942

943
    is_list_array =
×
944
      case first_content do
945
        %{content: content} -> String.starts_with?(String.trim_leading(content), "-")
×
946
        nil -> false
×
947
      end
948

949
    if is_list_array do
×
950
      # For list arrays, we need to carefully track list items and their content
951
      # Find the expected indent of list items (should be base_indent + indent_size)
952
      list_item_indent =
×
953
        case first_content do
954
          %{indent: indent} -> indent
×
NEW
955
          nil -> base_indent + Map.get(opts, :indent_size, 2)
×
956
        end
957

958
      # Take all list items and their nested content
959
      # Stop at lines at list_item_indent level that don't start with "-"
960
      Enum.take_while(lines, fn line ->
×
961
        cond do
×
962
          line.is_blank ->
×
963
            true
964

965
          line.indent > list_item_indent ->
×
966
            # Nested content of list items
967
            true
968

969
          line.indent == list_item_indent ->
×
970
            # At list item level: only continue if it's a list marker
971
            String.starts_with?(String.trim_leading(line.content), "-")
×
972

973
          true ->
×
974
            false
975
        end
976
      end)
977
    else
978
      # Tabular array: take lines that don't look like fields
979
      Enum.take_while(lines, fn line ->
×
980
        cond do
×
981
          line.is_blank ->
×
982
            true
983

984
          line.indent > base_indent ->
×
985
            # Tabular array: take lines that don't look like "key: value"
NEW
986
            not String.match?(line.content, @field_pattern)
×
987

988
          true ->
×
989
            false
990
        end
991
      end)
992
    end
993
  end
994

995
  # Skip array data lines
996
  defp skip_array_data_lines(lines, base_indent) do
997
    # Use same logic as take_array_data_lines
998
    first_content = Enum.find(lines, fn line -> !line.is_blank end)
×
999

1000
    is_list_array =
×
1001
      case first_content do
1002
        %{content: content} -> String.starts_with?(String.trim_leading(content), "-")
×
1003
        nil -> false
×
1004
      end
1005

1006
    remaining =
×
1007
      if is_list_array do
1008
        # Use same logic as take: find list item indent and skip accordingly
1009
        list_item_indent =
×
1010
          case first_content do
1011
            %{indent: indent} -> indent
×
1012
            nil -> base_indent + 2
×
1013
          end
1014

1015
        Enum.drop_while(lines, fn line ->
×
1016
          cond do
×
1017
            line.is_blank ->
×
1018
              true
1019

1020
            line.indent > list_item_indent ->
×
1021
              true
1022

1023
            line.indent == list_item_indent ->
×
1024
              String.starts_with?(String.trim_leading(line.content), "-")
×
1025

1026
            true ->
×
1027
              false
1028
          end
1029
        end)
1030
      else
1031
        Enum.drop_while(lines, fn line ->
×
1032
          cond do
×
1033
            line.is_blank ->
×
1034
              true
1035

1036
            line.indent > base_indent ->
×
NEW
1037
              not String.match?(line.content, @field_pattern)
×
1038

1039
            true ->
×
1040
              false
1041
          end
1042
        end)
1043
      end
1044

1045
    {remaining, length(lines) - length(remaining)}
1046
  end
1047

1048
  # Parse inline array from a line like "[2]: a,b"
1049
  defp parse_inline_array_from_line(trimmed, rest) do
1050
    # Extract: [N], [N|], [N\t] format
1051
    case Regex.run(~r/^\[([^\]]+)\]:\s*(.*)$/, trimmed) do
8✔
1052
      [_, array_marker, values_str] ->
1053
        delimiter = extract_delimiter(array_marker)
8✔
1054

1055
        values =
8✔
1056
          if values_str == "" do
×
1057
            []
1058
          else
1059
            parse_delimited_values(values_str, delimiter)
8✔
1060
          end
1061

1062
        {values, rest}
1063

1064
      nil ->
×
1065
        # Malformed, return as string
1066
        {trimmed, rest}
1067
    end
1068
  end
1069

1070
  # Parse nested list-format array within a list item (e.g., "- [1]:" with nested items)
1071
  defp parse_nested_list_array(_trimmed, rest, _line, expected_indent, opts) do
1072
    array_lines = take_nested_lines(rest, expected_indent)
24✔
1073

1074
    if Enum.empty?(array_lines) do
24✔
1075
      {[], rest}
1076
    else
1077
      nested_indent = get_first_content_indent(array_lines)
15✔
1078
      array_items = parse_list_items(array_lines, nested_indent, opts, [])
15✔
1079
      {rest_after_array, _} = skip_nested_lines(rest, expected_indent)
15✔
1080

1081
      {array_items, rest_after_array}
1082
    end
1083
  end
1084

1085
  # Parse inline array item in list
1086
  defp parse_inline_array_item(%{content: content}, rest, _expected_indent, _opts) do
1087
    trimmed = String.trim_leading(content) |> String.replace_prefix("- ", "")
8✔
1088

1089
    # Use parse_inline_array_from_line directly since it handles [N]: format
1090
    parse_inline_array_from_line(trimmed, rest)
8✔
1091
  end
1092

1093
  # Parse fields from tabular header
1094
  defp parse_fields(fields_str, delimiter) do
1095
    # Split while respecting quoted strings (same logic as parse_delimited_values)
1096
    delimiter_escaped = Regex.escape(delimiter)
6✔
1097
    regex = ~r/("(?:[^"\\]|\\.)*"|[^#{delimiter_escaped}]+)/
6✔
1098

1099
    Regex.scan(regex, fields_str)
1100
    |> Enum.map(&hd/1)
1101
    |> Enum.map(&String.trim/1)
1102
    |> Enum.map(&unquote_key/1)
6✔
1103
  end
1104

1105
  # Extract delimiter from array marker like [2], [2|], [2\t]
1106
  defp extract_delimiter(array_marker) do
1107
    cond do
25✔
1108
      String.contains?(array_marker, "|") -> "|"
×
1109
      String.contains?(array_marker, "\t") -> "\t"
25✔
1110
      true -> ","
25✔
1111
    end
1112
  end
1113

1114
  # Parse delimited values from row
1115
  defp parse_delimited_values(row_str, delimiter) do
1116
    # Auto-detect delimiter if the declared delimiter doesn't seem to be present
1117
    actual_delimiter =
30✔
1118
      if delimiter == "," and String.contains?(row_str, "\t") and
30✔
1119
           not String.contains?(row_str, ",") do
×
1120
        "\t"
1121
      else
1122
        delimiter
30✔
1123
      end
1124

1125
    # Split by delimiter, respecting quoted strings
1126
    # This handles spaces around delimiters and empty tokens
1127
    split_respecting_quotes(row_str, actual_delimiter)
1128
    |> Enum.map(&String.trim/1)
1129
    |> Enum.map(&parse_value/1)
30✔
1130
  end
1131

1132
  # Split a string by delimiter, but don't split inside quoted strings
1133
  defp split_respecting_quotes(str, delimiter) do
1134
    # Use a simple state machine approach with iolist building for O(n) performance
1135
    do_split_respecting_quotes(str, delimiter, [], false, [])
30✔
1136
  end
1137

1138
  defp do_split_respecting_quotes("", _delimiter, current, _in_quote, acc) do
1139
    # Reverse current iolist and convert to string, then reverse acc
1140
    current_str = current |> Enum.reverse() |> IO.iodata_to_binary()
30✔
1141
    Enum.reverse([current_str | acc])
30✔
1142
  end
1143

1144
  defp do_split_respecting_quotes(<<"\\", char, rest::binary>>, delimiter, current, in_quote, acc) do
1145
    # Escaped character - keep both backslash and char as iolist
NEW
1146
    do_split_respecting_quotes(rest, delimiter, [<<char>>, "\\" | current], in_quote, acc)
×
1147
  end
1148

1149
  defp do_split_respecting_quotes(<<"\"", rest::binary>>, delimiter, current, in_quote, acc) do
1150
    # Toggle quote state
1151
    do_split_respecting_quotes(rest, delimiter, ["\"" | current], not in_quote, acc)
4✔
1152
  end
1153

1154
  defp do_split_respecting_quotes(<<char, rest::binary>>, delimiter, current, false, acc)
1155
       when <<char>> == delimiter do
1156
    # Delimiter outside quotes - split here, convert current iolist to string
1157
    current_str = current |> Enum.reverse() |> IO.iodata_to_binary()
27✔
1158
    do_split_respecting_quotes(rest, delimiter, [], false, [current_str | acc])
27✔
1159
  end
1160

1161
  defp do_split_respecting_quotes(<<char, rest::binary>>, delimiter, current, in_quote, acc) do
1162
    # Normal character - prepend to iolist
1163
    do_split_respecting_quotes(rest, delimiter, [<<char>> | current], in_quote, acc)
108✔
1164
  end
1165

1166
  # Parse a single value
1167
  defp parse_value(str) do
1168
    str |> String.trim() |> do_parse_value()
129✔
1169
  end
1170

1171
  defp do_parse_value("null"), do: nil
4✔
1172
  defp do_parse_value("true"), do: true
2✔
1173
  defp do_parse_value("false"), do: false
2✔
1174
  defp do_parse_value("\"" <> _ = str), do: unquote_string(str)
1175
  defp do_parse_value(str), do: parse_number_or_string(str)
117✔
1176

1177
  # Parse number or return as string
1178
  # Per TOON spec: numbers with leading zeros (except "0" itself) are treated as strings
1179

1180
  # "0" and "-0" are valid numbers (both return 0)
1181
  defp parse_number_or_string("0"), do: 0
2✔
NEW
1182
  defp parse_number_or_string("-0"), do: 0
×
1183

1184
  # Leading zeros make it a string (e.g., "05", "-007")
NEW
1185
  defp parse_number_or_string(<<"0", d, _rest::binary>> = str) when d in ?0..?9, do: str
×
NEW
1186
  defp parse_number_or_string(<<"-0", d, _rest::binary>> = str) when d in ?0..?9, do: str
×
1187

1188
  # Try to parse as number, fall back to string
1189
  defp parse_number_or_string(str) do
1190
    case Float.parse(str) do
115✔
1191
      {num, ""} -> normalize_parsed_number(num, str)
57✔
1192
      _ -> str
58✔
1193
    end
1194
  end
1195

1196
  # Convert parsed float to appropriate type based on original string format
1197
  defp normalize_parsed_number(num, str) do
1198
    if has_decimal_or_exponent?(str) do
57✔
1199
      normalize_decimal_number(num)
2✔
1200
    else
1201
      String.to_integer(str)
55✔
1202
    end
1203
  end
1204

1205
  defp has_decimal_or_exponent?(str) do
1206
    String.contains?(str, ".") or String.contains?(str, "e") or String.contains?(str, "E")
57✔
1207
  end
1208

NEW
1209
  defp normalize_decimal_number(num) when num == trunc(num), do: trunc(num)
×
1210
  defp normalize_decimal_number(num), do: num
2✔
1211

1212
  # Remove quotes from key
1213
  defp unquote_key("\"" <> _ = key) do
NEW
1214
    key |> String.slice(1..-2//1) |> unescape_string()
×
1215
  end
1216

1217
  defp unquote_key(key), do: key
24✔
1218

1219
  # Check if a key was originally quoted in the source line
1220
  defp key_was_quoted?(original_line) do
1221
    trimmed = String.trim_leading(original_line)
78✔
1222
    String.starts_with?(trimmed, "\"")
78✔
1223
  end
1224

1225
  # Update metadata with a key, checking if it was quoted
1226
  defp add_key_to_metadata(key, was_quoted, metadata) do
1227
    updated_metadata =
67✔
1228
      if was_quoted do
1229
        %{metadata | quoted_keys: MapSet.put(metadata.quoted_keys, key)}
1✔
1230
      else
1231
        metadata
66✔
1232
      end
1233

1234
    %{updated_metadata | key_order: updated_metadata.key_order ++ [key]}
67✔
1235
  end
1236

1237
  # Remove quotes and unescape string
1238
  defp unquote_string("\"" <> _ = str) do
1239
    if properly_quoted?(str) do
4✔
1240
      str |> String.slice(1..-2//1) |> unescape_string()
4✔
1241
    else
NEW
1242
      raise DecodeError, message: "Unterminated string", input: str
×
1243
    end
1244
  end
1245

NEW
1246
  defp unquote_string(str), do: str
×
1247

1248
  # Check if a quoted string is properly terminated
1249
  # The string should start and end with " and the ending " should not be escaped
NEW
1250
  defp properly_quoted?(str) when byte_size(str) < 2, do: false
×
1251

1252
  defp properly_quoted?("\"" <> _ = str) do
1253
    String.ends_with?(str, "\"") and not escaped_quote_at_end?(str)
4✔
1254
  end
1255

NEW
1256
  defp properly_quoted?(_), do: false
×
1257

1258
  # Check if the closing quote is escaped
1259
  defp escaped_quote_at_end?(str) do
1260
    # Count consecutive backslashes before the final quote
1261
    # If odd number, the quote is escaped; if even, it's not
1262
    str
1263
    # Remove final quote
1264
    |> String.slice(0..-2//1)
1265
    |> String.reverse()
1266
    |> String.to_charlist()
1267
    |> Enum.take_while(&(&1 == ?\\))
4✔
1268
    |> length()
1269
    # Odd number means escaped
1270
    |> rem(2) == 1
4✔
1271
  end
1272

1273
  # Unescape string
1274
  defp unescape_string(str) do
1275
    # Per TOON spec: only \\, \", \n, \r, \t are valid escape sequences
1276
    # We need to do replacements in the right order to handle \\ correctly
1277
    # First replace \\ to a placeholder, then other escapes, then placeholder back to \
1278
    str
1279
    |> String.replace("\\\\", <<0>>)
1280
    |> String.replace("\\\"", "\"")
1281
    |> String.replace("\\n", "\n")
1282
    |> String.replace("\\r", "\r")
1283
    |> String.replace("\\t", "\t")
1284
    |> validate_no_invalid_escapes(str)
1285
    |> String.replace(<<0>>, "\\")
4✔
1286
  end
1287

1288
  defp validate_no_invalid_escapes(processed, original) do
1289
    if String.match?(processed, @invalid_escape_pattern) do
4✔
NEW
1290
      raise DecodeError, message: "Invalid escape sequence", input: original
×
1291
    else
1292
      processed
4✔
1293
    end
1294
  end
1295

1296
  # Peek at next line's indent (skip blank lines)
1297
  defp peek_next_indent([]), do: 0
29✔
1298
  defp peek_next_indent([%{is_blank: true} | rest]), do: peek_next_indent(rest)
×
1299
  defp peek_next_indent([%{indent: indent} | _]), do: indent
32✔
1300

1301
  # Get the indent of the first non-blank line
1302
  defp get_first_content_indent([]), do: 0
1✔
1303
  defp get_first_content_indent([%{is_blank: true} | rest]), do: get_first_content_indent(rest)
×
1304
  defp get_first_content_indent([%{indent: indent} | _]), do: indent
39✔
1305

1306
  # Take lines at or above a specific indent level (for nested content at exact level)
1307
  defp take_lines_at_level(lines, min_indent) do
1308
    Enum.take_while(lines, fn line ->
2✔
1309
      line.is_blank or line.indent >= min_indent
6✔
1310
    end)
1311
  end
1312

1313
  # Take lines that are more indented than base
1314
  defp take_nested_lines(lines, base_indent) do
1315
    # We need to handle blank lines carefully:
1316
    # - Blank lines BETWEEN nested content should be included
1317
    # - Blank lines AFTER nested content should NOT be included
1318
    # We'll use a helper that tracks whether we're still in nested content
1319
    take_nested_lines_helper(lines, base_indent, false)
55✔
1320
  end
1321

1322
  defp take_nested_lines_helper([], _base_indent, _seen_content), do: []
44✔
1323

1324
  defp take_nested_lines_helper([line | rest], base_indent, seen_content) do
1325
    cond do
113✔
1326
      # Non-blank line that's more indented: include it and continue
1327
      !line.is_blank and line.indent > base_indent ->
113✔
1328
        [line | take_nested_lines_helper(rest, base_indent, true)]
1329

1330
      # Non-blank line at base level or less: stop here
1331
      !line.is_blank ->
11✔
1332
        []
1333

1334
      # Blank line: only include if the next non-blank line is still nested
1335
      line.is_blank ->
×
1336
        next_content_indent = peek_next_indent(rest)
×
1337

1338
        if next_content_indent > base_indent do
×
1339
          [line | take_nested_lines_helper(rest, base_indent, seen_content)]
1340
        else
1341
          # Next content is at base level or less, so stop here
1342
          []
1343
        end
1344
    end
1345
  end
1346

1347
  # Skip lines that are more indented than base
1348
  defp skip_nested_lines(lines, base_indent) do
1349
    remaining = Enum.drop_while(lines, fn %{indent: indent} -> indent > base_indent end)
27✔
1350
    {remaining, length(lines) - length(remaining)}
1351
  end
1352

1353
  # Take lines for a list item (until next item marker at same level)
1354
  defp take_item_lines(lines, base_indent) do
1355
    Enum.take_while(lines, fn line ->
2✔
1356
      # Take lines that are MORE indented than base (continuation lines)
1357
      # Stop at next list item marker at the same level
1358
      if line.indent == base_indent do
3✔
1359
        not String.starts_with?(String.trim_leading(line.content), "- ")
1✔
1360
      else
1361
        line.indent > base_indent
2✔
1362
      end
1363
    end)
1364
  end
1365

1366
  # Skip lines for a list item
1367
  defp skip_item_lines(lines, base_indent) do
1368
    remaining =
×
1369
      Enum.drop_while(lines, fn line ->
1370
        # Skip lines that are MORE indented than base (continuation lines)
1371
        # Stop at next list item marker at the same level
1372
        if line.indent == base_indent do
×
1373
          not String.starts_with?(String.trim_leading(line.content), "- ")
×
1374
        else
1375
          line.indent > base_indent
×
1376
        end
1377
      end)
1378

1379
    {remaining, length(lines) - length(remaining)}
1380
  end
1381
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc