• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ocaml / odoc / 2931

17 Mar 2025 07:06PM UTC coverage: 73.263% (-0.1%) from 73.409%
2931

push

github

jonludlam
OCaml 4.02 compat

10388 of 14179 relevant lines covered (73.26%)

9820.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.32
/src/parser/syntax.ml
1
(* This module is a recursive descent parser for the ocamldoc syntax. The parser
2
   consumes a token stream of type [Token.t Stream.t], provided by the lexer,
3
   and produces a comment AST of the type defined in [Parser_.Ast].
4

5
   The AST has two main levels: inline elements, which can appear inside
6
   paragraphs, and are spaced horizontally when presented, and block elements,
7
   such as paragraphs and lists, which are spaced vertically when presented.
8
   Block elements contain inline elements, but not vice versa.
9

10
   Corresponding to this, the parser has three "main" functions:
11

12
   - [delimited_inline_element_list] parses a run of inline elements that is
13
     delimited by curly brace markup ([{...}]).
14
   - [paragraph] parses a run of inline elements that make up a paragraph, and
15
     is not explicitly delimited with curly braces.
16
   - [block_element_list] parses a sequence of block elements. A comment is a
17
     sequence of block elements, so [block_element_list] is the top-level
18
     parser. It is also used for list item and tag content. *)
19

20
open! Compat
21

22
type 'a with_location = 'a Loc.with_location
23

24
(* {2 Input} *)
25

26
type input = {
27
  tokens : Token.t Loc.with_location Stream.t;
28
  warnings : Warning.t list ref;
29
}
30

31
(* {2 Output} *)
32

33
let add_warning input warning = input.warnings := warning :: !(input.warnings)
185✔
34
let junk input = Stream.junk input.tokens
13,930✔
35

36
let peek input =
37
  match Stream.peek input.tokens with
19,150✔
38
  | Some token -> token
19,150✔
39
  | None -> assert false
40

41
module Table = struct
42
  module Light_syntax = struct
43
    let valid_align = function
44
      | [ { Loc.value = `Word w; _ } ] -> (
71✔
45
          match String.length w with
46
          | 0 -> `Valid None
×
47
          | 1 -> (
14✔
48
              match w with
49
              | "-" -> `Valid None
×
50
              | ":" -> `Valid (Some `Center)
×
51
              | _ -> `Invalid)
14✔
52
          | len ->
57✔
53
              if String.for_all (Char.equal '-') (String.sub w 1 (len - 2)) then
57✔
54
                match (String.get w 0, String.get w (len - 1)) with
55✔
55
                | ':', ':' -> `Valid (Some `Center)
5✔
56
                | ':', '-' -> `Valid (Some `Left)
6✔
57
                | '-', ':' -> `Valid (Some `Right)
6✔
58
                | '-', '-' -> `Valid None
34✔
59
                | _ -> `Invalid
4✔
60
              else `Invalid)
2✔
61
      | _ -> `Invalid
5✔
62

63
    let valid_align_row lx =
64
      let rec loop acc = function
44✔
65
        | [] -> Some (List.rev acc)
19✔
66
        | x :: q -> (
76✔
67
            match valid_align x with
68
            | `Invalid -> None
25✔
69
            | `Valid alignment -> loop (alignment :: acc) q)
51✔
70
      in
71
      loop [] lx
72

73
    let create ~grid ~align : Ast.table =
74
      let cell_to_block (x, k) =
26✔
75
        let whole_loc = Loc.span (List.map (fun x -> x.Loc.location) x) in
107✔
76
        match x with
107✔
77
        | [] -> ([], k)
4✔
78
        | _ -> ([ Loc.at whole_loc (`Paragraph x) ], k)
103✔
79
      in
80
      let row_to_block = List.map cell_to_block in
81
      let grid_to_block = List.map row_to_block in
26✔
82
      ((grid_to_block grid, align), `Light)
26✔
83

84
    let with_kind kind : 'a with_location list list -> 'a Ast.row =
85
      List.map (fun c -> (c, kind))
41✔
86

87
    let from_raw_data grid : Ast.table =
88
      match grid with
26✔
89
      | [] -> create ~grid:[] ~align:None
2✔
90
      | row1 :: rows2_N -> (
24✔
91
          match valid_align_row row1 with
92
          (* If the first line is the align row, everything else is data. *)
93
          | Some _ as align ->
2✔
94
              create ~grid:(List.map (with_kind `Data) rows2_N) ~align
2✔
95
          | None -> (
22✔
96
              match rows2_N with
97
              (* Only 1 line, if this is not the align row this is data. *)
98
              | [] -> create ~grid:[ with_kind `Data row1 ] ~align:None
2✔
99
              | row2 :: rows3_N -> (
20✔
100
                  match valid_align_row row2 with
101
                  (* If the second line is the align row, the first one is the
102
                     header and the rest is data. *)
103
                  | Some _ as align ->
17✔
104
                      let header = with_kind `Header row1 in
105
                      let data = List.map (with_kind `Data) rows3_N in
17✔
106
                      create ~grid:(header :: data) ~align
17✔
107
                  (* No align row in the first 2 lines, everything is considered
108
                     data. *)
109
                  | None ->
3✔
110
                      create ~grid:(List.map (with_kind `Data) grid) ~align:None
3✔
111
                  )))
112
  end
113

114
  module Heavy_syntax = struct
115
    let create ~grid : Ast.table = ((grid, None), `Heavy)
12✔
116
    let from_grid grid : Ast.table = create ~grid
12✔
117
  end
118
end
119

120
module Reader = struct
121
  let until_rbrace_or_eof input acc =
122
    let rec consume () =
134✔
123
      let next_token = peek input in
200✔
124
      match next_token.value with
200✔
125
      | `Right_brace ->
29✔
126
          junk input;
127
          `End (acc, next_token.location)
29✔
128
      | `End ->
2✔
129
          Parse_error.end_not_allowed next_token.location ~in_what:"table"
130
          |> add_warning input;
2✔
131
          junk input;
2✔
132
          `End (acc, next_token.location)
2✔
133
      | `Space _ | `Single_newline _ | `Blank_line _ ->
1✔
134
          junk input;
135
          consume ()
66✔
136
      | _ -> `Token next_token
103✔
137
    in
138
    consume ()
139

140
  module Infix = struct
141
    let ( >>> ) consume if_token =
142
      match consume with
134✔
143
      | `End (ret, loc) -> (ret, loc)
31✔
144
      | `Token t -> if_token t
103✔
145
  end
146
end
147

148
open Reader.Infix
149

150
(* The last token in the stream is always [`End], and it is never consumed by
151
   the parser, so the [None] case is impossible. *)
152

153
let npeek n input = Stream.npeek n input.tokens
1,596✔
154

155
(* {2 Non-link inline elements} *)
156
type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ]
157

158
(* Convenient abbreviation for use in patterns. *)
159
type token_that_always_begins_an_inline_element =
160
  [ `Word of string
161
  | `Code_span of string
162
  | `Raw_markup of string option * string
163
  | `Begin_style of style
164
  | `Simple_reference of string
165
  | `Begin_reference_with_replacement_text of string
166
  | `Simple_link of string
167
  | `Begin_link_with_replacement_text of string
168
  | `Math_span of string ]
169

170
(* Check that the token constructors above actually are all in [Token.t]. *)
171
let _check_subset : token_that_always_begins_an_inline_element -> Token.t =
172
 fun t -> (t :> Token.t)
×
173

174
(* Consumes tokens that make up a single non-link inline element:
175

176
   - a horizontal space ([`Space], significant in inline elements),
177
   - a word (see [word]),
178
   - a code span ([...], [`Code_span _]), or
179
   - styled text ({e ...}).
180

181
   The latter requires a recursive call to [delimited_inline_element_list],
182
   defined below.
183

184
   This should be part of [delimited_inline_element_list]; however, it is also
185
   called by function [paragraph]. As a result, it is factored out, and made
186
   mutually-recursive with [delimited_inline_element_list].
187

188
   This is called only when it is known that the first token in the list is the
189
   beginning of an inline element. In the case of [`Minus] and [`Plus], that
190
   means the caller has determined that they are not a list bullet (i.e., not
191
   the first non-whitespace tokens on their line).
192

193
   This function consumes exactly the tokens that make up the element. *)
194
let rec inline_element :
195
    input -> Loc.span -> _ -> Ast.inline_element with_location =
196
 fun input location next_token ->
197
  match next_token with
8,950✔
198
  | `Space _ as token ->
3,179✔
199
      junk input;
200
      Loc.at location token
3,179✔
201
  | `Word _ as token ->
4,669✔
202
      junk input;
203
      Loc.at location token
4,669✔
204
      (* This is actually the same memory representation as the token, complete
205
         with location, and is probably the most common case. Perhaps the token
206
         can be reused somehow. The same is true of [`Space], [`Code_span]. *)
207
  | `Minus ->
6✔
208
      junk input;
209
      Loc.at location (`Word "-")
6✔
210
  | `Plus ->
3✔
211
      junk input;
212
      Loc.at location (`Word "+")
3✔
213
  | `Bar ->
2✔
214
      junk input;
215
      Loc.at location (`Word "|")
2✔
216
  | (`Code_span _ | `Math_span _ | `Raw_markup _) as token ->
8✔
217
      junk input;
218
      Loc.at location token
223✔
219
  | `Begin_style s as parent_markup ->
147✔
220
      junk input;
221

222
      let requires_leading_whitespace =
147✔
223
        match s with
224
        | `Bold | `Italic | `Emphasis -> true
18✔
225
        | `Superscript | `Subscript -> false
13✔
226
      in
227
      let content, brace_location =
228
        delimited_inline_element_list ~parent_markup
229
          ~parent_markup_location:location ~requires_leading_whitespace input
230
      in
231

232
      let location = Loc.span [ location; brace_location ] in
147✔
233

234
      if content = [] then
147✔
235
        Parse_error.should_not_be_empty
236
          ~what:(Token.describe parent_markup)
8✔
237
          location
238
        |> add_warning input;
8✔
239

240
      Loc.at location (`Styled (s, content))
147✔
241
  | `Simple_reference r ->
629✔
242
      junk input;
243

244
      let r_location = Loc.nudge_start (String.length "{!") location in
629✔
245
      let r = Loc.at r_location r in
629✔
246

247
      Loc.at location (`Reference (`Simple, r, []))
629✔
248
  | `Begin_reference_with_replacement_text r as parent_markup ->
51✔
249
      junk input;
250

251
      let r_location = Loc.nudge_start (String.length "{{!") location in
51✔
252
      let r = Loc.at r_location r in
51✔
253

254
      let content, brace_location =
51✔
255
        delimited_inline_element_list ~parent_markup
256
          ~parent_markup_location:location ~requires_leading_whitespace:false
257
          input
258
      in
259

260
      let location = Loc.span [ location; brace_location ] in
51✔
261

262
      if content = [] then
51✔
263
        Parse_error.should_not_be_empty
264
          ~what:(Token.describe parent_markup)
4✔
265
          location
266
        |> add_warning input;
4✔
267

268
      Loc.at location (`Reference (`With_text, r, content))
51✔
269
  | `Simple_link u ->
9✔
270
      junk input;
271

272
      let u = String.trim u in
9✔
273

274
      if u = "" then
9✔
275
        Parse_error.should_not_be_empty
276
          ~what:(Token.describe next_token)
2✔
277
          location
278
        |> add_warning input;
2✔
279

280
      Loc.at location (`Link (u, []))
9✔
281
  | `Begin_link_with_replacement_text u as parent_markup ->
32✔
282
      junk input;
283

284
      let u = String.trim u in
32✔
285

286
      if u = "" then
32✔
287
        Parse_error.should_not_be_empty
288
          ~what:(Token.describe parent_markup)
2✔
289
          location
290
        |> add_warning input;
2✔
291

292
      let content, brace_location =
32✔
293
        delimited_inline_element_list ~parent_markup
294
          ~parent_markup_location:location ~requires_leading_whitespace:false
295
          input
296
      in
297

298
      `Link (u, content) |> Loc.at (Loc.span [ location; brace_location ])
32✔
299

300
(* Consumes tokens that make up a sequence of inline elements that is ended by
301
   a '}', a [`Right_brace] token. The brace token is also consumed.
302

303
   The sequences are also preceded by some markup like '{b'. Some of these
304
   markup tokens require whitespace immediately after the token, and others not.
305
   The caller indicates which way that is through the
306
   [~requires_leading_whitespace] argument.
307

308
   Whitespace is significant in inline element lists. In particular, "foo [bar]"
309
   is represented as [`Word "foo"; `Space; `Code_span "bar"], while "foo[bar]"
310
   is [`Word "foo"; `Code_span "bar"]. It doesn't matter how much whitespace is
311
   there, just whether it is present or not. Single newlines and horizontal
312
   space in any amount are allowed. Blank lines are not, as these are separators
313
   for {e block} elements.
314

315
   In correct input, the first and last elements emitted will not be [`Space],
316
   i.e. [`Space] appears only between other non-link inline elements. In
317
   incorrect input, there might be [`Space] followed immediately by something
318
   like an @author tag.
319

320
   The [~parent_markup] and [~parent_markup_location] arguments are used for
321
   generating error messages. *)
322
and delimited_inline_element_list :
323
    parent_markup:[< Token.t ] ->
324
    parent_markup_location:Loc.span ->
325
    requires_leading_whitespace:bool ->
326
    input ->
327
    Ast.inline_element with_location list * Loc.span =
328
 fun ~parent_markup ~parent_markup_location ~requires_leading_whitespace
329
     input ->
330
  (* [~at_start_of_line] is used to interpret [`Minus] and [`Plus]. These are
331
     word tokens if not the first non-whitespace tokens on their line. Then,
332
     they are allowed in a non-link element list. *)
333
  let rec consume_elements :
553✔
334
      at_start_of_line:bool ->
335
      Ast.inline_element with_location list ->
336
      Ast.inline_element with_location list * Loc.span =
337
   fun ~at_start_of_line acc ->
338
    let next_token = peek input in
1,718✔
339
    match next_token.value with
1,718✔
340
    | `Right_brace ->
529✔
341
        junk input;
342
        (List.rev acc, next_token.location)
529✔
343
    (* The [`Space] token is not space at the beginning or end of line, because
344
       that is combined into [`Single_newline] or [`Blank_line] tokens. It is
345
       also not at the beginning of markup (after e.g. '{b'), because that is
346
       handled separately before calling
347
       [consume_non_link_inline_elements], and not immediately before '}',
348
       because that is combined into the [`Right_brace] token by the lexer. So,
349
       it is an internal space, and we want to add it to the non-link inline
350
       element list. *)
351
    | (`Space _ | #token_that_always_begins_an_inline_element) as token ->
313✔
352
        let acc = inline_element input next_token.location token :: acc in
1,165✔
353
        consume_elements ~at_start_of_line:false acc
354
    | `Single_newline ws ->
6✔
355
        junk input;
356
        let element = Loc.same next_token (`Space ws) in
6✔
357
        consume_elements ~at_start_of_line:true (element :: acc)
6✔
358
    | `Blank_line ws as blank ->
2✔
359
        Parse_error.not_allowed ~what:(Token.describe blank)
2✔
360
          ~in_what:(Token.describe parent_markup)
2✔
361
          next_token.location
362
        |> add_warning input;
2✔
363

364
        junk input;
2✔
365
        let element = Loc.same next_token (`Space ws) in
2✔
366
        consume_elements ~at_start_of_line:true (element :: acc)
2✔
367
    | `Bar as token ->
×
368
        let acc = inline_element input next_token.location token :: acc in
×
369
        consume_elements ~at_start_of_line:false acc
370
    | (`Minus | `Plus) as bullet ->
2✔
371
        (if at_start_of_line then
372
           let suggestion =
4✔
373
             Printf.sprintf "move %s so it isn't the first thing on the line."
374
               (Token.print bullet)
4✔
375
           in
376
           Parse_error.not_allowed ~what:(Token.describe bullet)
4✔
377
             ~in_what:(Token.describe parent_markup)
4✔
378
             ~suggestion next_token.location
379
           |> add_warning input);
4✔
380

381
        let acc = inline_element input next_token.location bullet :: acc in
5✔
382
        consume_elements ~at_start_of_line:false acc
383
    | other_token ->
11✔
384
        Parse_error.not_allowed
385
          ~what:(Token.describe other_token)
11✔
386
          ~in_what:(Token.describe parent_markup)
11✔
387
          next_token.location
388
        |> add_warning input;
11✔
389

390
        let last_location =
11✔
391
          match acc with
392
          | last_token :: _ -> last_token.location
2✔
393
          | [] -> parent_markup_location
9✔
394
        in
395

396
        (List.rev acc, last_location)
11✔
397
  in
398

399
  let first_token = peek input in
400
  match first_token.value with
553✔
401
  | `Space _ ->
478✔
402
      junk input;
403
      consume_elements ~at_start_of_line:false []
478✔
404
      (* [~at_start_of_line] is [false] here because the preceding token was some
405
         some markup like '{b', and we didn't move to the next line, so the next
406
         token will not be the first non-whitespace token on its line. *)
407
  | `Single_newline _ ->
18✔
408
      junk input;
409
      consume_elements ~at_start_of_line:true []
18✔
410
  | `Blank_line _ as blank ->
2✔
411
      (* In case the markup is immediately followed by a blank line, the error
412
         message printed by the catch-all case below can be confusing, as it will
413
         suggest that the markup must be followed by a newline (which it is). It
414
         just must not be followed by two newlines. To explain that clearly,
415
         handle that case specifically. *)
416
      Parse_error.not_allowed ~what:(Token.describe blank)
2✔
417
        ~in_what:(Token.describe parent_markup)
2✔
418
        first_token.location
419
      |> add_warning input;
2✔
420

421
      junk input;
2✔
422
      consume_elements ~at_start_of_line:true []
2✔
423
  | `Right_brace ->
13✔
424
      junk input;
425
      ([], first_token.location)
13✔
426
  | _ ->
42✔
427
      if requires_leading_whitespace then
428
        Parse_error.should_be_followed_by_whitespace
429
          ~what:(Token.print parent_markup)
5✔
430
          parent_markup_location
431
        |> add_warning input;
5✔
432
      consume_elements ~at_start_of_line:false []
42✔
433

434
(* {2 Paragraphs} *)
435

436
(* Consumes tokens that make up a paragraph.
437

438
   A paragraph is a sequence of inline elements that ends on a blank line, or
439
   explicit block markup such as a verbatim block on a new line.
440

441
   Because of the significance of newlines, paragraphs are parsed line-by-line.
442
   The function [paragraph] is called only when the current token is the first
443
   non-whitespace token on its line, and begins an inline element. [paragraph]
444
   then parses a line of inline elements. Afterwards, it looks ahead to the next
445
   line. If that line also begins with an inline element, it parses that line,
446
   and so on. *)
447
let paragraph : input -> Ast.nestable_block_element with_location =
448
 fun input ->
449
  (* Parses a single line of a paragraph, consisting of inline elements. The
450
     only valid ways to end a paragraph line are with [`End], [`Single_newline],
451
     [`Blank_line], and [`Right_brace]. Everything else either belongs in the
452
     paragraph, or signifies an attempt to begin a block element inside a
453
     paragraph line, which is an error. These errors are caught elsewhere; the
454
     paragraph parser just stops. *)
455
  let rec paragraph_line :
1,490✔
456
      Ast.inline_element with_location list ->
457
      Ast.inline_element with_location list =
458
   fun acc ->
459
    let next_token = peek input in
9,082✔
460
    match next_token.value with
9,082✔
461
    | ( `Space _ | `Minus | `Plus | `Bar
1✔
462
      | #token_that_always_begins_an_inline_element ) as token ->
4,614✔
463
        let element = inline_element input next_token.location token in
464
        paragraph_line (element :: acc)
7,486✔
465
    | _ -> acc
1,596✔
466
  in
467

468
  (* After each line is parsed, decides whether to parse more lines. *)
469
  let rec additional_lines :
470
      Ast.inline_element with_location list ->
471
      Ast.inline_element with_location list =
472
   fun acc ->
473
    match npeek 2 input with
1,596✔
474
    | { value = `Single_newline ws; location }
106✔
475
      :: { value = #token_that_always_begins_an_inline_element | `Bar; _ }
×
476
      :: _ ->
477
        junk input;
478
        let acc = Loc.at location (`Space ws) :: acc in
106✔
479
        let acc = paragraph_line acc in
480
        additional_lines acc
106✔
481
    | _ -> List.rev acc
1,490✔
482
  in
483

484
  let elements = paragraph_line [] |> additional_lines in
1,490✔
485
  `Paragraph elements |> Loc.at (Loc.span (List.map Loc.location elements))
1,490✔
486

487
(* {2 Block elements} *)
488

489
(* {3 Helper types} *)
490

491
(* The interpretation of tokens in the block parser depends on where on a line
492
    each token appears. The six possible "locations" are:
493

494
    - [`At_start_of_line], when only whitespace has been read on the current
495
      line.
496
    - [`After_tag], when a valid tag token, such as [@deprecated], has been read,
497
      and only whitespace has been read since.
498
    - [`After_shorthand_bullet], when a valid shorthand list item bullet, such as
499
      [-], has been read, and only whitespace has been read since.
500
    - [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li],
501
      has been read, and only whitespace has been read since.
502
    - [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read.
503
    - [`After_text], when any other valid non-whitespace token has already been
504
      read on the current line.
505

506
    Here are some examples of how this affects the interpretation of tokens:
507

508
    - A paragraph can start anywhere except [`After_text] (two paragraphs cannot
509
      be on the same line, but paragraphs can be nested in just about anything).
510
    - [`Minus] is interpreted as a list item bullet [`At_start_of_line],
511
      [`After_tag], and [`After_explicit_list_bullet].
512
    - Tags are only allowed [`At_start_of_line].
513

514
   To track the location accurately, the functions that make up the block parser
515
   pass explicit [where_in_line] values around and return them.
516

517
   In a few cases, [where_in_line] can be inferred from what helper was called.
518
   For example, the [paragraph] parser always stops on the same line as the last
519
   significant token that is in the paragraph it consumed, so the location must
520
   be [`After_text]. *)
521
type where_in_line =
522
  [ `At_start_of_line
523
  | `After_tag
524
  | `After_shorthand_bullet
525
  | `After_explicit_list_bullet
526
  | `After_table_cell
527
  | `After_text ]
528

529
(* The block parsing loop, function [block_element_list], stops when it
530
   encounters certain tokens.
531

532
   When it is called for the whole comment, or for in explicit list item
533
   ([{li foo}]), it can only stop on end of input or a right brace.
534

535
   When it is called inside a shorthand list item ([- foo]), it stops on end of
536
   input, right brace, a blank line (indicating end of shorthand list), plus or
537
   minus (indicating the start of the next list item), or a section heading or
538
   tag, which cannot be nested in list markup.
539

540
   The block parser [block_element_list] explicitly returns the token that
541
   stopped it, with a type more precise than [Token.t stream_head]: if it was
542
   called for the whole comment or an explicit list item, the stop token will
543
   have type [stops_at_delimiters stream_head], and if it was called for a
544
   shorthand list item, the stop token will have type
545
   [implicit_stop stream_head]. This allows the calling parsers to write precise
546
   cases for exactly the tokens that might be at the front of the stream after
547
   the block parser returns. *)
548
type stops_at_delimiters = [ `End | `Right_brace ]
549
type code_stop = [ `End | `Right_code_delimiter ]
550

551
type stopped_implicitly =
552
  [ `End
553
  | `Blank_line of string
554
  | `Right_brace
555
  | `Minus
556
  | `Plus
557
  | Token.section_heading
558
  | Token.media_markup
559
  | Token.tag ]
560

561
(* Ensure that the above two types are really subsets of [Token.t]. *)
562
let _check_subset : stops_at_delimiters -> Token.t = fun t -> (t :> Token.t)
×
563
let _check_subset : stopped_implicitly -> Token.t = fun t -> (t :> Token.t)
×
564

565
(* The different contexts in which the block parser [block_element_list] can be
566
   called. The block parser's behavior depends somewhat on the context. For
567
   example, while paragraphs are allowed anywhere, shorthand lists are not
568
   allowed immediately inside other shorthand lists, while tags are not allowed
569
   anywhere except at the comment top level.
570

571
   Besides telling the block parser how to behave, each context also carries two
572
   types, which determine the return type of the block parser:
573

574
   - The type of blocks the parser returns. Note that [nestable_block_element]
575
     is included in [block_element]. However, the extra block kinds in
576
     [block_element] are only allowed at the comment top level.
577
   - The type of token that the block parser stops at. See discussion above. *)
578
type ('block, 'stops_at_which_tokens) context =
579
  | Top_level : (Ast.block_element, stops_at_delimiters) context
580
  | In_implicitly_ended :
581
      [ `Tag | `Shorthand_list ]
582
      -> (Ast.nestable_block_element, stopped_implicitly) context
583
  | In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context
584
  | In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context
585
  | In_code_results : (Ast.nestable_block_element, code_stop) context
586

587
(* This is a no-op. It is needed to prove to the type system that nestable block
588
   elements are acceptable block elements in all contexts. *)
589
let accepted_in_all_contexts : type block stops_at_which_tokens.
590
    (block, stops_at_which_tokens) context ->
591
    Ast.nestable_block_element ->
592
    block =
593
 fun context block ->
594
  match context with
1,953✔
595
  | Top_level -> (block :> Ast.block_element)
1,664✔
596
  | In_implicitly_ended (`Tag | `Shorthand_list) -> block
74✔
597
  | In_explicit_list -> block
53✔
598
  | In_table_cell -> block
14✔
599
  | In_code_results -> block
4✔
600

601
(* Converts a tag to a series of words. This is used in error recovery, when a
602
   tag cannot be generated. *)
603
let tag_to_words = function
604
  | `Author s -> [ `Word "@author"; `Space " "; `Word s ]
5✔
605
  | `Before s -> [ `Word "@before"; `Space " "; `Word s ]
×
606
  | `Canonical s -> [ `Word "@canonical"; `Space " "; `Word s ]
×
607
  | `Deprecated -> [ `Word "@deprecated" ]
2✔
608
  | `Inline -> [ `Word "@inline" ]
×
609
  | `Open -> [ `Word "@open" ]
×
610
  | `Closed -> [ `Word "@closed" ]
×
611
  | `Hidden -> [ `Word "@hidden" ]
×
612
  | `Param s -> [ `Word "@param"; `Space " "; `Word s ]
1✔
613
  | `Raise s -> [ `Word "@raise"; `Space " "; `Word s ]
×
614
  | `Return -> [ `Word "@return" ]
×
615
  | `See (`Document, s) -> [ `Word "@see"; `Space " "; `Word ("\"" ^ s ^ "\"") ]
×
616
  | `See (`File, s) -> [ `Word "@see"; `Space " "; `Word ("'" ^ s ^ "'") ]
×
617
  | `See (`Url, s) -> [ `Word "@see"; `Space " "; `Word ("<" ^ s ^ ">") ]
×
618
  | `Since s -> [ `Word "@since"; `Space " "; `Word s ]
×
619
  | `Version s -> [ `Word "@version"; `Space " "; `Word s ]
×
620
  | `Children_order -> [ `Word "@children_order" ]
×
621
  | `Toc_status -> [ `Word "@toc_status" ]
×
622
  | `Order_category -> [ `Word "@order_category" ]
×
623
  | `Short_title -> [ `Word "@short_title" ]
×
624

625
(* {3 Block element lists} *)
626

627
(* Consumes tokens making up a sequence of block elements. These are:
628

629
   - paragraphs,
630
   - code blocks,
631
   - verbatim text blocks,
632
   - tables,
633
   - lists, and
634
   - section headings. *)
635
let rec block_element_list : type block stops_at_which_tokens.
636
    (block, stops_at_which_tokens) context ->
637
    parent_markup:[< Token.t | `Comment ] ->
638
    input ->
639
    block with_location list
640
    * stops_at_which_tokens with_location
641
    * where_in_line =
642
 fun context ~parent_markup input ->
643
  let rec consume_block_elements :
2,232✔
644
      where_in_line ->
645
      block with_location list ->
646
      block with_location list
647
      * stops_at_which_tokens with_location
648
      * where_in_line =
649
   fun where_in_line acc ->
650
    let describe token =
6,565✔
651
      match token with
17✔
652
      | #token_that_always_begins_an_inline_element -> "paragraph"
10✔
653
      | _ -> Token.describe token
7✔
654
    in
655

656
    let warn_if_after_text { Loc.location; value = token } =
657
      if where_in_line = `After_text then
1,833✔
658
        Parse_error.should_begin_on_its_own_line ~what:(describe token) location
17✔
659
        |> add_warning input
17✔
660
    in
661

662
    let warn_because_not_at_top_level { Loc.location; value = token } =
663
      let suggestion =
10✔
664
        Printf.sprintf "move %s outside of any other markup."
665
          (Token.print token)
10✔
666
      in
667
      Parse_error.not_allowed ~what:(Token.describe token)
10✔
668
        ~in_what:(Token.describe parent_markup)
10✔
669
        ~suggestion location
670
      |> add_warning input
10✔
671
    in
672

673
    match peek input with
674
    (* Terminators: the two tokens that terminate anything. *)
675
    | { value = `End; _ } as next_token -> (
2,010✔
676
        match context with
677
        | Top_level -> (List.rev acc, next_token, where_in_line)
1,889✔
678
        | In_implicitly_ended (`Tag | `Shorthand_list) ->
43✔
679
            (List.rev acc, next_token, where_in_line)
115✔
680
        | In_explicit_list -> (List.rev acc, next_token, where_in_line)
5✔
681
        | In_table_cell -> (List.rev acc, next_token, where_in_line)
×
682
        | In_code_results -> (List.rev acc, next_token, where_in_line))
1✔
683
    | { value = `Right_brace; _ } as next_token -> (
90✔
684
        (* This little absurdity is needed to satisfy the type system. Without it,
685
           OCaml is unable to prove that [stream_head] has the right type for all
686
           possible values of [context]. *)
687
        match context with
688
        | Top_level -> (List.rev acc, next_token, where_in_line)
18✔
689
        | In_implicitly_ended (`Tag | `Shorthand_list) ->
1✔
690
            (List.rev acc, next_token, where_in_line)
5✔
691
        | In_explicit_list -> (List.rev acc, next_token, where_in_line)
45✔
692
        | In_table_cell -> (List.rev acc, next_token, where_in_line)
21✔
693
        | In_code_results ->
1✔
694
            junk input;
695
            consume_block_elements where_in_line acc)
1✔
696
    | { value = `Right_code_delimiter; _ } as next_token -> (
8✔
697
        match context with
698
        | In_code_results -> (List.rev acc, next_token, where_in_line)
3✔
699
        | _ ->
5✔
700
            junk input;
701
            consume_block_elements where_in_line acc)
5✔
702
    (* Whitespace. This can terminate some kinds of block elements. It is also
703
       necessary to track it to interpret [`Minus] and [`Plus] correctly, as
704
       well as to ensure that all block elements begin on their own line. *)
705
    | { value = `Space _; _ } ->
966✔
706
        junk input;
707
        consume_block_elements where_in_line acc
966✔
708
    | { value = `Single_newline _; _ } ->
386✔
709
        junk input;
710
        consume_block_elements `At_start_of_line acc
386✔
711
    | { value = `Blank_line _; _ } as next_token -> (
347✔
712
        match context with
713
        (* Blank lines terminate shorthand lists ([- foo]) and tags. They also
714
           terminate paragraphs, but the paragraph parser is aware of that
715
           internally. *)
716
        | In_implicitly_ended (`Tag | `Shorthand_list) ->
7✔
717
            (List.rev acc, next_token, where_in_line)
24✔
718
        (* Otherwise, blank lines are pretty much like single newlines. *)
719
        | _ ->
323✔
720
            junk input;
721
            consume_block_elements `At_start_of_line acc)
323✔
722
    (* Explicit list items ([{li ...}] and [{- ...}]) can never appear directly
723
       in block content. They can only appear inside [{ul ...}] and [{ol ...}].
724
       So, catch those. *)
725
    | { value = `Begin_list_item _ as token; location } ->
4✔
726
        let suggestion =
727
          Printf.sprintf "move %s into %s, or use %s." (Token.print token)
4✔
728
            (Token.describe (`Begin_list `Unordered))
4✔
729
            (Token.describe `Minus)
4✔
730
        in
731
        Parse_error.not_allowed ~what:(Token.describe token)
4✔
732
          ~in_what:(Token.describe parent_markup)
4✔
733
          ~suggestion location
734
        |> add_warning input;
4✔
735

736
        junk input;
4✔
737
        consume_block_elements where_in_line acc
4✔
738
    (* Table rows ([{tr ...}]) can never appear directly
739
       in block content. They can only appear inside [{table ...}]. *)
740
    | { value = `Begin_table_row as token; location } ->
×
741
        let suggestion =
742
          Printf.sprintf "move %s into %s." (Token.print token)
×
743
            (Token.describe `Begin_table_heavy)
×
744
        in
745
        Parse_error.not_allowed ~what:(Token.describe token)
×
746
          ~in_what:(Token.describe parent_markup)
×
747
          ~suggestion location
748
        |> add_warning input;
×
749
        junk input;
×
750
        consume_block_elements where_in_line acc
×
751
    (* Table cells ([{th ...}] and [{td ...}]) can never appear directly
752
       in block content. They can only appear inside [{tr ...}]. *)
753
    | { value = `Begin_table_cell _ as token; location } ->
×
754
        let suggestion =
755
          Printf.sprintf "move %s into %s." (Token.print token)
×
756
            (Token.describe `Begin_table_row)
×
757
        in
758
        Parse_error.not_allowed ~what:(Token.describe token)
×
759
          ~in_what:(Token.describe parent_markup)
×
760
          ~suggestion location
761
        |> add_warning input;
×
762
        junk input;
×
763
        consume_block_elements where_in_line acc
×
764
    (* Tags. These can appear at the top level only. *)
765
    | { value = `Tag tag as token; location } as next_token -> (
409✔
766
        let recover_when_not_at_top_level context =
767
          warn_because_not_at_top_level next_token;
8✔
768
          junk input;
8✔
769
          let words = List.map (Loc.at location) (tag_to_words tag) in
8✔
770
          let paragraph =
8✔
771
            `Paragraph words
772
            |> accepted_in_all_contexts context
773
            |> Loc.at location
8✔
774
          in
775
          consume_block_elements `At_start_of_line (paragraph :: acc)
8✔
776
        in
777

778
        match context with
779
        (* Tags cannot make sense in an explicit list ([{ul {li ...}}]). *)
780
        | In_explicit_list -> recover_when_not_at_top_level context
3✔
781
        (* If a tag starts at the beginning of a line, it terminates the preceding
782
           tag and/or the current shorthand list. In this case, return to the
783
           caller, and let the caller decide how to interpret the tag token. *)
784
        | In_implicitly_ended (`Tag | `Shorthand_list) ->
4✔
785
            if where_in_line = `At_start_of_line then
786
              (List.rev acc, next_token, where_in_line)
24✔
787
            else recover_when_not_at_top_level context
5✔
788
        | In_table_cell -> recover_when_not_at_top_level context
×
789
        | In_code_results -> recover_when_not_at_top_level context
×
790
        (* If this is the top-level call to [block_element_list], parse the
791
           tag. *)
792
        | Top_level -> (
377✔
793
            if where_in_line <> `At_start_of_line then
794
              Parse_error.should_begin_on_its_own_line
795
                ~what:(Token.describe token) location
10✔
796
              |> add_warning input;
10✔
797

798
            junk input;
377✔
799

800
            match tag with
377✔
801
            | (`Author s | `Since s | `Version s | `Canonical s) as tag ->
9✔
802
                let s = String.trim s in
803
                if s = "" then
223✔
804
                  Parse_error.should_not_be_empty ~what:(Token.describe token)
8✔
805
                    location
806
                  |> add_warning input;
8✔
807
                let tag =
223✔
808
                  match tag with
809
                  | `Author _ -> `Author s
40✔
810
                  | `Since _ -> `Since s
9✔
811
                  | `Version _ -> `Version s
9✔
812
                  | `Canonical _ ->
165✔
813
                      (* TODO The location is only approximate, as we need lexer
814
                         cooperation to get the real location. *)
815
                      let r_location =
816
                        Loc.nudge_start (String.length "@canonical ") location
165✔
817
                      in
818
                      `Canonical (Loc.at r_location s)
165✔
819
                in
820

821
                let tag = Loc.at location (`Tag tag) in
822
                consume_block_elements `After_text (tag :: acc)
223✔
823
            | ( `Deprecated | `Return | `Children_order | `Short_title
5✔
824
              | `Toc_status | `Order_category ) as tag ->
×
825
                let content, _stream_head, where_in_line =
826
                  block_element_list (In_implicitly_ended `Tag)
827
                    ~parent_markup:token input
828
                in
829
                let tag =
53✔
830
                  match tag with
831
                  | `Deprecated -> `Deprecated content
32✔
832
                  | `Toc_status -> `Toc_status content
×
833
                  | `Return -> `Return content
5✔
834
                  | `Children_order -> `Children_order content
8✔
835
                  | `Short_title -> `Short_title content
8✔
836
                  | `Order_category -> `Order_category content
×
837
                in
838
                let location =
839
                  location :: List.map Loc.location content |> Loc.span
53✔
840
                in
841
                let tag = Loc.at location (`Tag tag) in
53✔
842
                consume_block_elements where_in_line (tag :: acc)
53✔
843
            | (`Param _ | `Raise _ | `Before _) as tag ->
10✔
844
                let content, _stream_head, where_in_line =
845
                  block_element_list (In_implicitly_ended `Tag)
846
                    ~parent_markup:token input
847
                in
848
                let tag =
42✔
849
                  match tag with
850
                  | `Param s -> `Param (s, content)
20✔
851
                  | `Raise s -> `Raise (s, content)
12✔
852
                  | `Before s -> `Before (s, content)
10✔
853
                in
854
                let location =
855
                  location :: List.map Loc.location content |> Loc.span
42✔
856
                in
857
                let tag = Loc.at location (`Tag tag) in
42✔
858
                consume_block_elements where_in_line (tag :: acc)
42✔
859
            | `See (kind, target) ->
17✔
860
                let content, _next_token, where_in_line =
861
                  block_element_list (In_implicitly_ended `Tag)
862
                    ~parent_markup:token input
863
                in
864
                let location =
17✔
865
                  location :: List.map Loc.location content |> Loc.span
17✔
866
                in
867
                let tag = `Tag (`See (kind, target, content)) in
17✔
868
                let tag = Loc.at location tag in
869
                consume_block_elements where_in_line (tag :: acc)
17✔
870
            | (`Inline | `Open | `Closed | `Hidden) as tag ->
6✔
871
                let tag = Loc.at location (`Tag tag) in
872
                consume_block_elements `After_text (tag :: acc)))
42✔
873
    | ( { value = #token_that_always_begins_an_inline_element; _ }
1,490✔
874
      | { value = `Bar; _ } ) as next_token ->
×
875
        warn_if_after_text next_token;
876

877
        let block = paragraph input in
1,490✔
878
        let block = Loc.map (accepted_in_all_contexts context) block in
1,490✔
879
        let acc = block :: acc in
1,490✔
880
        consume_block_elements `After_text acc
881
    | { value = `Verbatim s as token; location } as next_token ->
78✔
882
        warn_if_after_text next_token;
883
        if s = "" then
78✔
884
          Parse_error.should_not_be_empty ~what:(Token.describe token) location
5✔
885
          |> add_warning input;
5✔
886

887
        junk input;
78✔
888
        let block = accepted_in_all_contexts context token in
78✔
889
        let block = Loc.at location block in
78✔
890
        let acc = block :: acc in
78✔
891
        consume_block_elements `After_text acc
892
    | { value = `Math_block s as token; location } as next_token ->
3✔
893
        warn_if_after_text next_token;
894
        if s = "" then
3✔
895
          Parse_error.should_not_be_empty ~what:(Token.describe token) location
×
896
          |> add_warning input;
×
897

898
        junk input;
3✔
899
        let block = accepted_in_all_contexts context token in
3✔
900
        let block = Loc.at location block in
3✔
901
        let acc = block :: acc in
3✔
902
        consume_block_elements `After_text acc
903
    | {
158✔
904
        value =
905
          `Code_block (meta, delim, { value = s; location = v_loc }, has_outputs)
906
          as token;
907
        location;
908
      } as next_token ->
909
        warn_if_after_text next_token;
910
        junk input;
158✔
911
        let delimiter = if delim = "" then None else Some delim in
7✔
912
        let output, location =
913
          if not has_outputs then (None, location)
154✔
914
          else
915
            let content, next_token, _where_in_line =
4✔
916
              block_element_list In_code_results ~parent_markup:token input
917
            in
918
            junk input;
4✔
919
            let locations =
4✔
920
              location :: List.map (fun content -> content.Loc.location) content
4✔
921
            in
922
            let location = Loc.span locations in
923
            let location = { location with end_ = next_token.location.end_ } in
4✔
924
            (Some content, location)
925
        in
926

927
        if s = "" then
928
          Parse_error.should_not_be_empty ~what:(Token.describe token) location
3✔
929
          |> add_warning input;
3✔
930

931
        let meta =
158✔
932
          match meta with
933
          | None -> None
105✔
934
          | Some (language, tags) -> Some { Ast.language; tags }
53✔
935
        in
936
        let block =
937
          accepted_in_all_contexts context
938
            (`Code_block
939
               {
940
                 Ast.meta;
941
                 delimiter;
942
                 content = { value = s; location = v_loc };
943
                 output;
944
               })
945
        in
946
        let block = Loc.at location block in
158✔
947
        let acc = block :: acc in
158✔
948
        consume_block_elements `After_text acc
949
    | { value = `Modules s as token; location } as next_token ->
20✔
950
        warn_if_after_text next_token;
951

952
        junk input;
20✔
953

954
        (* TODO Use some library for a splitting function, or move this out into a
955
           Util module. *)
956
        let split_string delimiters s =
20✔
957
          let rec scan_delimiters acc index =
20✔
958
            if index >= String.length s then List.rev acc
4✔
959
            else if String.contains delimiters s.[index] then
57✔
960
              scan_delimiters acc (index + 1)
15✔
961
            else scan_word acc index (index + 1)
42✔
962
          and scan_word acc start_index index =
963
            if index >= String.length s then
305✔
964
              let word = String.sub s start_index (index - start_index) in
16✔
965
              List.rev (word :: acc)
16✔
966
            else if String.contains delimiters s.[index] then
289✔
967
              let word = String.sub s start_index (index - start_index) in
26✔
968
              scan_delimiters (word :: acc) (index + 1)
26✔
969
            else scan_word acc start_index (index + 1)
263✔
970
          in
971

972
          scan_delimiters [] 0
973
        in
974

975
        (* TODO Correct locations await a full implementation of {!modules}
976
           parsing. *)
977
        let modules =
978
          split_string " \t\r\n" s |> List.map (fun r -> Loc.at location r)
20✔
979
        in
980

981
        if modules = [] then
20✔
982
          Parse_error.should_not_be_empty ~what:(Token.describe token) location
3✔
983
          |> add_warning input;
3✔
984

985
        let block = accepted_in_all_contexts context (`Modules modules) in
20✔
986
        let block = Loc.at location block in
20✔
987
        let acc = block :: acc in
20✔
988
        consume_block_elements `After_text acc
989
    | { value = `Begin_list kind as token; location } as next_token ->
46✔
990
        warn_if_after_text next_token;
991

992
        junk input;
46✔
993

994
        let items, brace_location =
46✔
995
          explicit_list_items ~parent_markup:token input
996
        in
997
        if items = [] then
46✔
998
          Parse_error.should_not_be_empty ~what:(Token.describe token) location
7✔
999
          |> add_warning input;
7✔
1000

1001
        let location = Loc.span [ location; brace_location ] in
46✔
1002
        let block = `List (kind, `Heavy, items) in
46✔
1003
        let block = accepted_in_all_contexts context block in
1004
        let block = Loc.at location block in
46✔
1005
        let acc = block :: acc in
46✔
1006
        consume_block_elements `After_text acc
1007
    | { value = (`Begin_table_light | `Begin_table_heavy) as token; location }
12✔
1008
      as next_token ->
1009
        warn_if_after_text next_token;
1010
        junk input;
38✔
1011
        let block, brace_location =
38✔
1012
          let parent_markup = token in
1013
          let parent_markup_location = location in
1014
          match token with
1015
          | `Begin_table_light ->
26✔
1016
              light_table input ~parent_markup ~parent_markup_location
26✔
1017
          | `Begin_table_heavy ->
12✔
1018
              heavy_table input ~parent_markup ~parent_markup_location
12✔
1019
        in
1020
        let location = Loc.span [ location; brace_location ] in
1021
        let block = accepted_in_all_contexts context (`Table block) in
38✔
1022
        let block = Loc.at location block in
38✔
1023
        let acc = block :: acc in
38✔
1024
        consume_block_elements `After_text acc
1025
    | { value = (`Minus | `Plus) as token; location } as next_token -> (
14✔
1026
        (match where_in_line with
1027
        | `After_text | `After_shorthand_bullet ->
1✔
1028
            Parse_error.should_begin_on_its_own_line
1029
              ~what:(Token.describe token) location
6✔
1030
            |> add_warning input
6✔
1031
        | _ -> ());
133✔
1032

1033
        match context with
1034
        | In_implicitly_ended `Shorthand_list ->
70✔
1035
            (List.rev acc, next_token, where_in_line)
70✔
1036
        | _ ->
69✔
1037
            let items, where_in_line =
1038
              shorthand_list_items next_token where_in_line input
1039
            in
1040
            let kind =
69✔
1041
              match token with `Minus -> `Unordered | `Plus -> `Ordered
5✔
1042
            in
1043
            let location =
1044
              location :: List.map Loc.location (List.flatten items) |> Loc.span
69✔
1045
            in
1046
            let block = `List (kind, `Light, items) in
69✔
1047
            let block = accepted_in_all_contexts context block in
1048
            let block = Loc.at location block in
69✔
1049
            let acc = block :: acc in
69✔
1050
            consume_block_elements where_in_line acc)
1051
    | { value = `Begin_section_heading (level, label) as token; location } as
332✔
1052
      next_token -> (
1053
        let recover_when_not_at_top_level context =
1054
          warn_because_not_at_top_level next_token;
2✔
1055
          junk input;
2✔
1056
          let content, brace_location =
2✔
1057
            delimited_inline_element_list ~parent_markup:token
1058
              ~parent_markup_location:location ~requires_leading_whitespace:true
1059
              input
1060
          in
1061
          let location = Loc.span [ location; brace_location ] in
2✔
1062
          let paragraph =
2✔
1063
            `Paragraph content
1064
            |> accepted_in_all_contexts context
1065
            |> Loc.at location
2✔
1066
          in
1067
          consume_block_elements `At_start_of_line (paragraph :: acc)
2✔
1068
        in
1069

1070
        match context with
1071
        | In_implicitly_ended (`Tag | `Shorthand_list) ->
3✔
1072
            if where_in_line = `At_start_of_line then
1073
              (List.rev acc, next_token, where_in_line)
12✔
1074
            else recover_when_not_at_top_level context
1✔
1075
        | In_explicit_list -> recover_when_not_at_top_level context
1✔
1076
        | In_table_cell -> recover_when_not_at_top_level context
×
1077
        | In_code_results -> recover_when_not_at_top_level context
×
1078
        | Top_level ->
318✔
1079
            if where_in_line <> `At_start_of_line then
1080
              Parse_error.should_begin_on_its_own_line
1081
                ~what:(Token.describe token) location
2✔
1082
              |> add_warning input;
2✔
1083

1084
            let label =
318✔
1085
              match label with
1086
              | Some "" ->
1✔
1087
                  Parse_error.should_not_be_empty ~what:"heading label" location
1088
                  |> add_warning input;
1✔
1089
                  None
1✔
1090
              | _ -> label
317✔
1091
            in
1092

1093
            junk input;
1094

1095
            let content, brace_location =
318✔
1096
              delimited_inline_element_list ~parent_markup:token
1097
                ~parent_markup_location:location
1098
                ~requires_leading_whitespace:true input
1099
            in
1100
            if content = [] then
318✔
1101
              Parse_error.should_not_be_empty ~what:(Token.describe token)
5✔
1102
                location
1103
              |> add_warning input;
5✔
1104

1105
            let location = Loc.span [ location; brace_location ] in
318✔
1106
            let heading = `Heading (level, label, content) in
318✔
1107
            let heading = Loc.at location heading in
1108
            let acc = heading :: acc in
318✔
1109
            consume_block_elements `After_text acc)
1110
    | { value = `Begin_paragraph_style _ as token; location } ->
3✔
1111
        junk input;
1112
        let content, brace_location =
3✔
1113
          delimited_inline_element_list ~parent_markup:token
1114
            ~parent_markup_location:location ~requires_leading_whitespace:true
1115
            input
1116
        in
1117
        let location = Loc.span [ location; brace_location ] in
3✔
1118

1119
        Parse_error.markup_should_not_be_used ~what:(Token.describe token)
3✔
1120
          location
1121
        |> add_warning input;
3✔
1122

1123
        let paragraph =
3✔
1124
          `Paragraph content
1125
          |> accepted_in_all_contexts context
1126
          |> Loc.at location
3✔
1127
        in
1128
        consume_block_elements `At_start_of_line (paragraph :: acc)
3✔
1129
    | {
21✔
1130
     location;
1131
     value = `Media_with_replacement_text (href, media, content) as token;
1132
    } ->
1133
        junk input;
1134

1135
        let r_location =
21✔
1136
          Loc.nudge_start
1137
            (String.length @@ Token.s_of_media `Replaced media)
21✔
1138
            location
1139
          |> Loc.nudge_end (String.length content + 1)
21✔
1140
          (* +1 for closing character *)
1141
        in
1142
        let c_location =
21✔
1143
          Loc.nudge_start
1144
            (String.length (Token.s_of_media `Replaced media)
21✔
1145
            + String.length (match href with `Reference s | `Link s -> s))
6✔
1146
            location
1147
          |> Loc.nudge_end 1
21✔
1148
        in
1149
        let content = String.trim content in
21✔
1150
        let href = href |> Loc.at r_location in
21✔
1151

1152
        if content = "" then
21✔
1153
          Parse_error.should_not_be_empty ~what:(Token.describe token)
5✔
1154
            c_location
1155
          |> add_warning input;
5✔
1156

1157
        let block = `Media (`Simple, href, content, media) in
21✔
1158
        let block = accepted_in_all_contexts context block in
1159
        let block = Loc.at location block in
21✔
1160
        let acc = block :: acc in
21✔
1161
        consume_block_elements `After_text acc
1162
    | { location; value = `Simple_media (href, media) } ->
17✔
1163
        junk input;
1164

1165
        let r_location =
17✔
1166
          Loc.nudge_start
1167
            (String.length @@ Token.s_of_media `Simple media)
17✔
1168
            location
1169
          |> Loc.nudge_end 1
17✔
1170
        in
1171
        let href = href |> Loc.at r_location in
17✔
1172
        let block = `Media (`Simple, href, "", media) in
17✔
1173
        let block = accepted_in_all_contexts context block in
1174
        let block = Loc.at location block in
17✔
1175
        let acc = block :: acc in
17✔
1176
        consume_block_elements `After_text acc
1177
  in
1178

1179
  let where_in_line =
1180
    match context with
1181
    | Top_level -> `At_start_of_line
1,907✔
1182
    | In_implicitly_ended `Shorthand_list -> `After_shorthand_bullet
138✔
1183
    | In_explicit_list -> `After_explicit_list_bullet
50✔
1184
    | In_table_cell -> `After_table_cell
21✔
1185
    | In_code_results -> `After_tag
4✔
1186
    | In_implicitly_ended `Tag -> `After_tag
112✔
1187
  in
1188

1189
  consume_block_elements where_in_line []
1190

1191
(* {3 Lists} *)
1192

1193
(* Consumes a sequence of implicit list items. Each one consists of a [`Minus]
1194
   or [`Plus] token, followed by block elements until:
1195

1196
   - a blank line, or
1197
   - a list bullet of the opposite kind (e.g. [`Plus] for a [`Minus] list).
1198

1199
   This function is called when the next token is known to be [`Minus] or
1200
   [`Plus]. It consumes that token, and calls the block element parser (see
1201
   above). That parser returns to [implicit_list_items] only on [`Blank_line],
1202
   [`End], [`Minus] or [`Plus] at the start of a line, or [`Right_brace]. *)
1203
and shorthand_list_items :
1204
    [ `Minus | `Plus ] with_location ->
1205
    where_in_line ->
1206
    input ->
1207
    Ast.nestable_block_element with_location list list * where_in_line =
1208
 fun first_token where_in_line input ->
1209
  let bullet_token = first_token.value in
69✔
1210

1211
  let rec consume_list_items :
1212
      [> ] with_location ->
1213
      where_in_line ->
1214
      Ast.nestable_block_element with_location list list ->
1215
      Ast.nestable_block_element with_location list list * where_in_line =
1216
   fun next_token where_in_line acc ->
1217
    match next_token.value with
207✔
1218
    | `End | `Right_brace | `Blank_line _ | `Tag _ | `Begin_section_heading _
2✔
1219
    | `Simple_media _ | `Media_with_replacement_text _ ->
×
1220
        (List.rev acc, where_in_line)
68✔
1221
    | (`Minus | `Plus) as bullet ->
14✔
1222
        if bullet = bullet_token then (
138✔
1223
          junk input;
1224

1225
          let content, stream_head, where_in_line =
138✔
1226
            block_element_list (In_implicitly_ended `Shorthand_list)
1227
              ~parent_markup:bullet input
1228
          in
1229
          if content = [] then
138✔
1230
            Parse_error.should_not_be_empty ~what:(Token.describe bullet)
3✔
1231
              next_token.location
1232
            |> add_warning input;
3✔
1233

1234
          let acc = content :: acc in
138✔
1235
          consume_list_items stream_head where_in_line acc)
1236
        else (List.rev acc, where_in_line)
1✔
1237
  in
1238

1239
  consume_list_items
1240
    (first_token :> stopped_implicitly with_location)
1241
    where_in_line []
1242

1243
(* Consumes a sequence of explicit list items (starting with '{li ...}' and
1244
   '{-...}', which are represented by [`Begin_list_item _] tokens).
1245

1246
   This function is called immediately after '{ul' or '{ol' ([`Begin_list _]) is
1247
   read. The only "valid" way to exit is by reading a [`Right_brace] token,
1248
   which is consumed.
1249

1250
   Whitespace inside the list, but outside list items, is not significant – this
1251
   parsing function consumes all of it. Otherwise, only list item start tokens
1252
   are accepted. Everything else is an error. *)
1253
and explicit_list_items :
1254
    parent_markup:[< Token.t ] ->
1255
    input ->
1256
    Ast.nestable_block_element with_location list list * Loc.span =
1257
 fun ~parent_markup input ->
1258
  let rec consume_list_items :
46✔
1259
      Ast.nestable_block_element with_location list list ->
1260
      Ast.nestable_block_element with_location list list * Loc.span =
1261
   fun acc ->
1262
    let next_token = peek input in
156✔
1263
    match next_token.value with
156✔
1264
    | `End ->
9✔
1265
        Parse_error.end_not_allowed next_token.location
1266
          ~in_what:(Token.describe parent_markup)
9✔
1267
        |> add_warning input;
9✔
1268
        (List.rev acc, next_token.location)
9✔
1269
    | `Right_brace ->
37✔
1270
        junk input;
1271
        (List.rev acc, next_token.location)
37✔
1272
    | `Space _ | `Single_newline _ | `Blank_line _ ->
1✔
1273
        junk input;
1274
        consume_list_items acc
54✔
1275
    | `Begin_list_item kind as token ->
50✔
1276
        junk input;
1277

1278
        (* '{li', represented by [`Begin_list_item `Li], must be followed by
1279
           whitespace. *)
1280
        (if kind = `Li then
50✔
1281
           match (peek input).value with
42✔
1282
           | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace ->
1✔
1283
               ()
1284
               (* The presence of [`Right_brace] above requires some explanation:
1285

1286
                  - It is better to be silent about missing whitespace if the next
1287
                    token is [`Right_brace], because the error about an empty list
1288
                    item will be generated below, and that error is more important to
1289
                    the user.
1290
                  - The [`Right_brace] token also happens to include all whitespace
1291
                    before it, as a convenience for the rest of the parser. As a
1292
                    result, not ignoring it could be wrong: there could in fact be
1293
                    whitespace in the concrete syntax immediately after '{li', just
1294
                    it is not represented as [`Space], [`Single_newline], or
1295
                    [`Blank_line]. *)
1296
           | _ ->
3✔
1297
               Parse_error.should_be_followed_by_whitespace next_token.location
1298
                 ~what:(Token.print token)
3✔
1299
               |> add_warning input);
3✔
1300

1301
        let content, token_after_list_item, _where_in_line =
50✔
1302
          block_element_list In_explicit_list ~parent_markup:token input
1303
        in
1304

1305
        if content = [] then
50✔
1306
          Parse_error.should_not_be_empty next_token.location
1307
            ~what:(Token.describe token)
3✔
1308
          |> add_warning input;
3✔
1309

1310
        (match token_after_list_item.value with
50✔
1311
        | `Right_brace -> junk input
45✔
1312
        | `End ->
5✔
1313
            Parse_error.end_not_allowed token_after_list_item.location
1314
              ~in_what:(Token.describe token)
5✔
1315
            |> add_warning input);
5✔
1316

1317
        let acc = content :: acc in
1318
        consume_list_items acc
1319
    | token ->
6✔
1320
        let suggestion =
1321
          match token with
1322
          | `Begin_section_heading _ | `Tag _ ->
1✔
1323
              Printf.sprintf "move %s outside the list." (Token.describe token)
2✔
1324
          | _ ->
4✔
1325
              Printf.sprintf "move %s into a list item, %s or %s."
4✔
1326
                (Token.describe token)
4✔
1327
                (Token.print (`Begin_list_item `Li))
4✔
1328
                (Token.print (`Begin_list_item `Dash))
4✔
1329
        in
1330
        Parse_error.not_allowed next_token.location ~what:(Token.describe token)
6✔
1331
          ~in_what:(Token.describe parent_markup)
6✔
1332
          ~suggestion
1333
        |> add_warning input;
6✔
1334

1335
        junk input;
6✔
1336
        consume_list_items acc
6✔
1337
  in
1338

1339
  consume_list_items []
1340

1341
(* Consumes a sequence of table rows that might start with [`Bar].
1342

1343
   This function is called immediately after '{t' ([`Begin_table `Light]) is
1344
   read. The only "valid" way to exit is by reading a [`Right_brace] token,
1345
   which is consumed. *)
1346
and light_table ~parent_markup ~parent_markup_location input =
1347
  let rec consume_rows acc ~last_loc =
26✔
1348
    Reader.until_rbrace_or_eof input acc >>> fun next_token ->
65✔
1349
    match next_token.Loc.value with
62✔
1350
    | `Bar | #token_that_always_begins_an_inline_element -> (
16✔
1351
        let next, row, last_loc =
1352
          light_table_row ~parent_markup ~last_loc input
1353
        in
1354
        match next with
61✔
1355
        | `Continue -> consume_rows (row :: acc) ~last_loc
38✔
1356
        | `Stop -> (row :: acc, last_loc))
23✔
1357
    | other_token ->
1✔
1358
        Parse_error.not_allowed next_token.location
1359
          ~what:(Token.describe other_token)
1✔
1360
          ~in_what:(Token.describe parent_markup)
1✔
1361
        |> add_warning input;
1✔
1362
        junk input;
1✔
1363
        consume_rows acc ~last_loc
1✔
1364
  in
1365
  let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in
1366
  let grid = List.rev rows in
26✔
1367
  (Table.Light_syntax.from_raw_data grid, brace_location)
26✔
1368

1369
(* Consumes a table row that might start with [`Bar]. *)
1370
and light_table_row ~parent_markup ~last_loc input =
1371
  let rec consume_row acc_row acc_cell acc_space ~new_line ~last_loc =
61✔
1372
    let push_cells row cell =
834✔
1373
      match cell with [] -> row | _ -> List.rev cell :: row
17✔
1374
    in
1375
    let return row cell = List.rev (push_cells row cell) in
61✔
1376
    let next_token = peek input in
1377
    match next_token.value with
834✔
1378
    | `End ->
×
1379
        Parse_error.end_not_allowed next_token.location ~in_what:"table"
1380
        |> add_warning input;
×
1381
        junk input;
×
1382
        (`Stop, return acc_row acc_cell, next_token.location)
×
1383
    | `Right_brace ->
23✔
1384
        junk input;
1385
        (`Stop, return acc_row acc_cell, next_token.location)
23✔
1386
    | `Space _ as token ->
292✔
1387
        junk input;
1388
        let i = Loc.at next_token.location token in
292✔
1389
        consume_row acc_row acc_cell (i :: acc_space) ~new_line ~last_loc
292✔
1390
    | `Single_newline _ | `Blank_line _ ->
3✔
1391
        junk input;
1392
        (`Continue, return acc_row acc_cell, last_loc)
38✔
1393
    | `Bar ->
186✔
1394
        junk input;
1395
        let acc_row = if new_line then [] else List.rev acc_cell :: acc_row in
45✔
1396
        consume_row acc_row [] [] ~new_line:false ~last_loc
1397
    | #token_that_always_begins_an_inline_element as token ->
294✔
1398
        let i = inline_element input next_token.location token in
1399
        if Loc.spans_multiple_lines i then
294✔
1400
          Parse_error.not_allowed
1401
            ~what:(Token.describe (`Single_newline ""))
1✔
1402
            ~in_what:(Token.describe `Begin_table_light)
1✔
1403
            i.location
1404
          |> add_warning input;
1✔
1405
        let acc_cell =
294✔
1406
          if acc_cell = [] then [ i ] else (i :: acc_space) @ acc_cell
140✔
1407
        in
1408
        consume_row acc_row acc_cell [] ~new_line:false
1409
          ~last_loc:next_token.location
1410
    | other_token ->
1✔
1411
        Parse_error.not_allowed next_token.location
1412
          ~what:(Token.describe other_token)
1✔
1413
          ~in_what:(Token.describe parent_markup)
1✔
1414
        |> add_warning input;
1✔
1415
        junk input;
1✔
1416
        consume_row acc_row acc_cell acc_space ~new_line ~last_loc
1✔
1417
  in
1418
  consume_row [] [] [] ~new_line:true ~last_loc
1419

1420
(* Consumes a sequence of table rows (starting with '{tr ...}', which are
1421
   represented by [`Begin_table_row] tokens).
1422

1423
   This function is called immediately after '{table' ([`Begin_table `Heavy]) is
1424
   read. The only "valid" way to exit is by reading a [`Right_brace] token,
1425
   which is consumed. *)
1426
and heavy_table ~parent_markup ~parent_markup_location input =
1427
  let rec consume_rows acc ~last_loc =
12✔
1428
    Reader.until_rbrace_or_eof input acc >>> fun next_token ->
30✔
1429
    match next_token.Loc.value with
18✔
1430
    | `Begin_table_row as token ->
16✔
1431
        junk input;
1432
        let items, last_loc = heavy_table_row ~parent_markup:token input in
16✔
1433
        consume_rows (List.rev items :: acc) ~last_loc
16✔
1434
    | token ->
2✔
1435
        Parse_error.not_allowed next_token.location ~what:(Token.describe token)
2✔
1436
          ~in_what:(Token.describe parent_markup)
2✔
1437
          ~suggestion:"Move outside of {table ...}, or inside {tr ...}"
1438
        |> add_warning input;
2✔
1439
        junk input;
2✔
1440
        consume_rows acc ~last_loc
2✔
1441
  in
1442
  let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in
1443
  let grid = List.rev rows in
12✔
1444
  (Table.Heavy_syntax.from_grid grid, brace_location)
12✔
1445

1446
(* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }',
1447
   which are represented by [`Begin_table_cell] tokens).
1448

1449
   This function is called immediately after '{tr' ([`Begin_table_row]) is
1450
   read. The only "valid" way to exit is by reading a [`Right_brace] token,
1451
   which is consumed. *)
1452
and heavy_table_row ~parent_markup input =
1453
  let rec consume_cell_items acc =
16✔
1454
    Reader.until_rbrace_or_eof input acc >>> fun next_token ->
39✔
1455
    match next_token.Loc.value with
23✔
1456
    | `Begin_table_cell kind as token ->
21✔
1457
        junk input;
1458
        let content, token_after_list_item, _where_in_line =
21✔
1459
          block_element_list In_table_cell ~parent_markup:token input
1460
        in
1461
        (match token_after_list_item.value with
21✔
1462
        | `Right_brace -> junk input
21✔
1463
        | `End ->
×
1464
            Parse_error.not_allowed token_after_list_item.location
1465
              ~what:(Token.describe `End) ~in_what:(Token.describe token)
×
1466
            |> add_warning input);
×
1467
        consume_cell_items ((content, kind) :: acc)
1468
    | token ->
2✔
1469
        Parse_error.not_allowed next_token.location ~what:(Token.describe token)
2✔
1470
          ~in_what:(Token.describe parent_markup)
2✔
1471
          ~suggestion:
1472
            "Move outside of {table ...}, or inside {td ...} or {th ...}"
1473
        |> add_warning input;
2✔
1474
        junk input;
2✔
1475
        consume_cell_items acc
2✔
1476
  in
1477
  consume_cell_items []
1478

1479
(* {2 Entry point} *)
1480

1481
let parse warnings tokens =
1482
  let input : input = { tokens; warnings } in
1,889✔
1483

1484
  let rec parse_block_elements () =
1485
    let elements, last_token, _where_in_line =
1,907✔
1486
      block_element_list Top_level ~parent_markup:`Comment input
1487
    in
1488

1489
    match last_token.value with
1,907✔
1490
    | `End -> elements
1,889✔
1491
    | `Right_brace ->
18✔
1492
        Parse_error.unpaired_right_brace last_token.location
1493
        |> add_warning input;
18✔
1494

1495
        let block =
18✔
1496
          Loc.same last_token (`Paragraph [ Loc.same last_token (`Word "}") ])
18✔
1497
        in
1498

1499
        junk input;
18✔
1500
        elements @ (block :: parse_block_elements ())
18✔
1501
  in
1502
  let ast = parse_block_elements () in
1503
  (ast, List.rev !(input.warnings))
1,889✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc