• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

aantron / markup.ml / 29

06 Oct 2024 07:37PM UTC coverage: 77.678% (+0.07%) from 77.609%
29

push

github

aantron
CI: don't install Beautiful Soup

3118 of 4014 relevant lines covered (77.68%)

333.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.39
/src/html_parser.ml
1
(* This file is part of Markup.ml, released under the MIT license. See
2
   LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *)
3

4
open Common
5
open Token_tag
6
open Kstream
7

8

9

10
(* Namespaces for pattern matching. *)
11
type ns = [ `HTML | `MathML | `SVG | `Other of string ]
12
type qname = ns * string
13

14
module Ns :
15
sig
16
  val to_string : ns -> string
17
end =
18
struct
19
  let to_string = function
20
    | `HTML -> html_ns
491✔
21
    | `MathML -> mathml_ns
×
22
    | `SVG -> svg_ns
11✔
23
    | `Other s -> s
×
24
end
25

26
(* Specialization of List.mem at qname list, to avoid polymorphic
27
   comparison. *)
28
let list_mem_qname ((ns, tag) : qname) l =
29
  let rec loop = function
228✔
30
    | [] -> false
115✔
31
    | (ns', tag')::_ when ns' = ns && tag' = tag -> true
113✔
32
    | _::rest -> loop rest
3,325✔
33
  in
34
  loop l
35

36

37

38
(* Elements. *)
39
type element =
40
  {element_name              : qname;
41
   location                  : location;
42
   is_html_integration_point : bool;
43
   suppress                  : bool;
44
   mutable buffering         : bool;
45
   mutable is_open           : bool;
46
   mutable attributes        : (name * string) list;
47
   mutable end_location      : location;
48
   mutable children          : annotated_node list;
49
   mutable parent            : element}
50

51
and node =
52
  | Element of element
53
  | Text of string list
54
  | PI of string * string
55
  | Comment of string
56

57
and annotated_node = location * node
58

59

60

61
(* Element helpers. *)
62
module Element :
63
sig
64
  val create :
65
    ?is_html_integration_point:bool ->
66
    ?suppress:bool ->
67
    qname -> location ->
68
      element
69
  val dummy : element
70

71
  val is_special : qname -> bool
72
  val is_not_hidden : Token_tag.t -> bool
73
end =
74
struct
75
  let rec dummy =
76
    {element_name              = `HTML, "dummy";
77
     location                  = 1, 1;
78
     is_html_integration_point = false;
79
     suppress                  = true;
80
     buffering                 = false;
81
     is_open                   = false;
82
     attributes                = [];
83
     end_location              = 1, 1;
84
     children                  = [];
85
     parent                    = dummy}
86

87
  let create
88
      ?(is_html_integration_point = false) ?(suppress = false) name location =
85✔
89
    {element_name = name;
631✔
90
     location;
91
     is_html_integration_point;
92
     suppress;
93
     buffering    = false;
94
     is_open      = true;
95
     attributes   = [];
96
     end_location = 1, 1;
97
     children     = [];
98
     parent       = dummy}
99

100
  let is_special name =
101
    list_mem_qname name
19✔
102
      [`HTML, "address"; `HTML, "applet"; `HTML, "area";
103
       `HTML, "article"; `HTML, "aside"; `HTML, "base";
104
       `HTML, "basefont"; `HTML, "bgsound"; `HTML, "blockquote";
105
       `HTML, "body"; `HTML, "br"; `HTML, "button";
106
       `HTML, "caption"; `HTML, "center"; `HTML, "col";
107
       `HTML, "colgroup"; `HTML, "dd"; `HTML, "details";
108
       `HTML, "dir"; `HTML, "div"; `HTML, "dl";
109
       `HTML, "dt"; `HTML, "embed"; `HTML, "fieldset";
110
       `HTML, "figcaption"; `HTML, "figure"; `HTML, "footer";
111
       `HTML, "form"; `HTML, "frame"; `HTML, "frameset";
112
       `HTML, "h1"; `HTML, "h2"; `HTML, "h3";
113
       `HTML, "h4"; `HTML, "h5"; `HTML, "h6";
114
       `HTML, "head"; `HTML, "header"; `HTML, "hgroup";
115
       `HTML, "hr"; `HTML, "html"; `HTML, "iframe";
116
       `HTML, "img"; `HTML, "input"; `HTML, "isindex";
117
       `HTML, "li"; `HTML, "link"; `HTML, "listing";
118
       `HTML, "main"; `HTML, "marquee"; `HTML, "meta";
119
       `HTML, "nav"; `HTML, "noembed"; `HTML, "noframes";
120
       `HTML, "noscript"; `HTML, "object"; `HTML, "ol";
121
       `HTML, "p"; `HTML, "param"; `HTML, "plaintext";
122
       `HTML, "pre"; `HTML, "script"; `HTML, "section";
123
       `HTML, "select"; `HTML, "source"; `HTML, "style";
124
       `HTML, "summary"; `HTML, "table"; `HTML, "tbody";
125
       `HTML, "td"; `HTML, "template"; `HTML, "textarea";
126
       `HTML, "tfoot"; `HTML, "th"; `HTML, "thead";
127
       `HTML, "title"; `HTML, "tr"; `HTML, "track";
128
       `HTML, "ul"; `HTML, "wbr"; `HTML, "xmp";
129
       `MathML, "mi"; `MathML, "mo"; `MathML, "mn";
130
       `MathML, "ms"; `MathML, "mtext"; `MathML, "annotation-xml";
131
       `SVG, "foreignObject"; `SVG, "desc"; `SVG, "title"]
132

133
  let is_not_hidden tag =
134
    tag.Token_tag.attributes |> List.exists (fun (name, value) ->
1✔
135
      name = "type" && value <> "hidden")
1✔
136
end
137

138

139

140
(* Context detection. *)
141
type simple_context = [ `Document | `Fragment of string ]
142
type context = [ `Document | `Fragment of qname ]
143

144
module Context :
145
sig
146
  type t
147

148
  val uninitialized : unit -> t
149
  val initialize :
150
    (location * Html_tokenizer.token) Kstream.t ->
151
    [< simple_context ] option ->
152
    t ->
153
      unit cps
154

155
  val the_context : t -> context
156
  val element : t -> element option
157
  val token : t -> string option
158
end =
159
struct
160
  let detect tokens throw k =
161
    let tokens, restore = checkpoint tokens in
65✔
162

163
    let last_name = ref None in
65✔
164
    let next_token k =
165
      next_expected tokens throw (fun token ->
66✔
166
        begin match token with
66✔
167
        | _, `Start {name} -> last_name := Some name
51✔
168
        | _ -> ()
15✔
169
        end;
170
        k token)
171
    in
172

173
    let k context = restore (); k (context, !last_name) in
65✔
174

175
    let rec scan () =
176
      next_token begin function
66✔
177
        | _, `Doctype _ -> k `Document
4✔
178
        | _, `Char c when not @@ is_whitespace c -> k (`Fragment "body")
10✔
179
        | _, `Char _ -> scan ()
×
180
        | _, `EOF -> k (`Fragment "body")
×
181
        | _, `Start {name = "html"} -> k `Document
1✔
182
        | _, `Start {name = "head" | "body" | "frameset"} ->
2✔
183
          k (`Fragment "html")
184
        | _, `Start {name =
8✔
185
            "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" |
1✔
186
            "style" | "template" | "title"} ->
×
187
          k (`Fragment "head")
188
        | _, `Start {name = "frame"} -> k (`Fragment "frameset")
1✔
189
        | _, `Start {name = "li"} -> k (`Fragment "ul")
×
190
        | _, `Start {name =
1✔
191
            "caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead"} ->
×
192
          k (`Fragment "table")
193
        | _, `Start {name = "tr"} -> k (`Fragment "tbody")
×
194
        | _, `Start {name = "td" | "th"} -> k (`Fragment "tr")
×
195
        | _, `Start {name = "optgroup" | "option"} -> k (`Fragment "select")
×
196
        | _, `Start {name =
1✔
197
            "altglyph" | "altglyphdef" | "altglyphitem" | "animate" |
×
198
            "animatecolor" | "animatemotion" | "animatetransform" | "circle" |
×
199
            "clippath" | "color-profile" | "cursor" | "defs" | "desc" |
×
200
            "ellipse" | "feblend" | "fecolormatrix" | "fecomponenttransfer" |
×
201
            "fecomposite" | "fediffuselighting" | "fedisplacementmap" |
×
202
            "fedistantlight" | "feflood" | "fefunca" | "fefuncb" | "fefuncg" |
×
203
            "fefuncr" | "fegaussianblur" | "feimage" | "femerge" |
×
204
            "femergenode" | "femorphology" | "feoffset" | "fepointlight" |
×
205
            "fespecularlighting" | "fespotlight" | "fetile" | "feturbulence" |
×
206
            "filter" | "font-face" | "font-face-format" | "font-face-name" |
×
207
            "font-face-src" | "font-face-uri" | "foreignobject" | "g" |
×
208
            "glyph" | "glyphref" | "hkern" | "image" | "line" |
×
209
            "lineargradient" | "marker" | "mask" | "metadata" |
×
210
            "missing-glyph" | "mpath" | "path" | "pattern" | "polygon" |
×
211
            "polyline" | "radialgradient" | "rect" | "set" | "stop" | "switch" |
×
212
            "symbol" | "text" | "textpath" | "tref" | "tspan" | "use"} ->
×
213
          k (`Fragment "svg")
214
        | _, `Start {name =
×
215
            "maction" | "maligngroup" | "malignmark" | "menclose" | "merror" |
×
216
            "mfenced" | "mfrac" | "mglyph" | "mi" | "mlabeledtr" | "mlongdiv" |
×
217
            "mmultiscripts" | "mn" | "mo" | "mover" | "mpadded" | "mphantom" |
×
218
            "mroot" | "mrow" | "ms" | "mscarries" | "mscarry" | "msgroup" |
×
219
            "msline" | "mspace" | "msqrt" | "msrow" | "mstack" | "mstyle" |
×
220
            "msub" | "msup" | "msubsup" | "mtable" | "mtd" | "mtext" | "mtr" |
×
221
            "munder" | "munderover" | "semantics" | "annotation" |
×
222
            "annotation-xml"} ->
×
223
          k (`Fragment "math")
224
        | _, `Start _ -> k (`Fragment "body")
29✔
225
        | _, (`End _ | `Comment _) -> scan ()
×
226
      end
227
    in
228

229
    scan ()
230

231
  type t = (context * element option * string option) ref
232

233
  let uninitialized () = ref (`Document, None, None)
146✔
234

235
  let initialize tokens requested_context state throw k =
236
    (fun k ->
146✔
237
      match requested_context with
146✔
238
      | Some (`Fragment element) ->
25✔
239
        (* HTML element names are case-insensitive, even in foreign content.
240
           Lowercase the element name given by the user before analysis by the
241
           parser, to match this convention. [String.lowercase] is acceptable
242
           here because the API assumes the string [element] is in UTF-8. *)
243
        k (`Fragment (String.lowercase_ascii element), None)
25✔
244
      | Some (`Document as c) -> k (c, None)
56✔
245
      | None -> detect tokens throw k)
65✔
246
    (fun (detected_context, deciding_token) ->
247

248
      let context =
146✔
249
        match detected_context with
250
        | `Document -> `Document
61✔
251
        | `Fragment "math" -> `Fragment (`MathML, "math")
×
252
        | `Fragment "svg" -> `Fragment (`SVG, "svg")
5✔
253
        | `Fragment name -> `Fragment (`HTML, name)
80✔
254
      in
255

256
      let context_element =
257
        match context with
258
        | `Document -> None
61✔
259
        | `Fragment name ->
85✔
260
          let is_html_integration_point =
261
            match name with
262
            | `SVG, ("foreignObject" | "desc" | "title") -> true
×
263
            | _ -> false
85✔
264
          in
265

266
          Some (Element.create
85✔
267
            ~is_html_integration_point ~suppress:true name (1, 1))
268
      in
269

270
      state := context, context_element, deciding_token;
271

272
      k ())
273

274
  let the_context {contents = (c, _, _)} = c
820✔
275
  let element {contents = (_, e, _)} = e
1,499✔
276
  let token {contents = (_, _, t)} = t
146✔
277
end
278

279

280

281
(* Heplers for foreign content. *)
282
module Foreign :
283
sig
284
  val is_mathml_text_integration_point : qname -> bool
285
  val is_html_integration_point :
286
    ns -> string -> (string * string) list -> bool
287

288
  val adjust_mathml_attributes :
289
    ((string * string) * string) list -> ((string * string) * string) list
290
  val adjust_svg_attributes :
291
    ((string * string) * string) list -> ((string * string) * string) list
292
  val adjust_svg_tag_name : string -> string
293
end =
294
struct
295
  let is_mathml_text_integration_point qname =
296
    list_mem_qname qname
7✔
297
      [`MathML, "mi"; `MathML, "mo"; `MathML, "mn"; `MathML, "ms";
298
       `MathML, "mtext"]
299

300
  let is_html_integration_point namespace tag_name attributes =
301
    match namespace with
461✔
302
    | `HTML | `Other _ -> false
×
303
    | `MathML ->
×
304
      tag_name = "annotation-xml" &&
305
      attributes |> List.exists (function
×
306
        | "encoding", "text/html" -> true
×
307
        | "encoding", "application/xhtml+xml" -> true
×
308
        | _ -> false)
×
309
    | `SVG ->
11✔
310
      list_mem_string tag_name ["foreignObject"; "desc"; "title"]
311

312
  let adjust_mathml_attributes attributes =
313
    attributes |> List.map (fun ((ns, name), value) ->
×
314
      let name =
×
315
        if ns = mathml_ns && name = "definitionurl" then "definitionURL"
×
316
        else name
×
317
      in
318
      (ns, name), value)
319

320
  let adjust_svg_attributes attributes =
321
    attributes |> List.map (fun ((ns, name), value) ->
11✔
322
      let name =
1✔
323
        match name with
324
        | "attributename" -> "attributeName"
×
325
        | "attributetype" -> "attributeType"
×
326
        | "basefrequency" -> "baseFrequency"
×
327
        | "baseprofile" -> "baseProfile"
×
328
        | "calcmode" -> "calcMode"
×
329
        | "clippathunits" -> "clipPathUnits"
×
330
        | "contentscripttype" -> "contentScriptType"
×
331
        | "contentstyletype" -> "contentStyleType"
×
332
        | "diffuseconstant" -> "diffuseConstant"
×
333
        | "edgemode" -> "edgeMode"
×
334
        | "externalresourcesrequired" -> "externalResourcesRequired"
×
335
        | "filterres" -> "filterRes"
×
336
        | "filterunits" -> "filterUnits"
×
337
        | "glyphref" -> "glyphRef"
×
338
        | "gradienttransform" -> "gradientTransform"
×
339
        | "gradientunits" -> "gradientUnits"
×
340
        | "kernelmatrix" -> "kernelMatrix"
×
341
        | "kernelunitlength" -> "kernelUnitLength"
×
342
        | "keypoints" -> "keyPoints"
×
343
        | "keysplines" -> "keySplines"
×
344
        | "keytimes" -> "keyTimes"
×
345
        | "lengthadjust" -> "lengthAdjust"
×
346
        | "limitingconeangle" -> "limitingConeAngle"
×
347
        | "markerheight" -> "markerHeight"
×
348
        | "markerunits" -> "markerUnits"
×
349
        | "markerwidth" -> "markerWidth"
×
350
        | "maskcontentunits" -> "maskContentUnits"
×
351
        | "maskunits" -> "maskUnits"
×
352
        | "numoctaves" -> "numOctaves"
×
353
        | "pathlength" -> "pathLength"
×
354
        | "patterncontentunits" -> "patternContentUnits"
×
355
        | "patterntransform" -> "patternTransform"
×
356
        | "patternunits" -> "patternUnits"
×
357
        | "pointsatx" -> "pointsAtX"
×
358
        | "pointsaty" -> "pointsAtY"
×
359
        | "pointsatz" -> "pointsAtZ"
×
360
        | "preservealpha" -> "preserveAlpha"
×
361
        | "preserveaspectratio" -> "preserveAspectRatio"
×
362
        | "primitiveunits" -> "primitiveUnits"
×
363
        | "refx" -> "refX"
1✔
364
        | "refy" -> "refY"
×
365
        | "repeatcount" -> "repeatCount"
×
366
        | "repeatdur" -> "repeatDur"
×
367
        | "requiredextensions" -> "requiredExtensions"
×
368
        | "requiredfeatures" -> "requiredFeatures"
×
369
        | "specularconstant" -> "specularConstant"
×
370
        | "specularexponent" -> "specularExponent"
×
371
        | "spreadmethod" -> "spreadMethod"
×
372
        | "startoffset" -> "startOffset"
×
373
        | "stddeviation" -> "stdDeviation"
×
374
        | "stitchtiles" -> "stitchTiles"
×
375
        | "surfacescale" -> "surfaceScale"
×
376
        | "systemlanguage" -> "systemLanguage"
×
377
        | "tablevalues" -> "tableValues"
×
378
        | "targetx" -> "targetX"
×
379
        | "targety" -> "targetY"
×
380
        | "textlength" -> "textLength"
×
381
        | "viewbox" -> "viewBox"
×
382
        | "viewtarget" -> "viewTarget"
×
383
        | "xchannelselector" -> "xChannelSelector"
×
384
        | "ychannelselector" -> "yChannelSelector"
×
385
        | "zoomandpan" -> "zoomAndPan"
×
386
        | _ -> name
×
387
      in
388
      (ns, name), value)
389

390
  let adjust_svg_tag_name = function
391
    | "altglyph" -> "altGlyph"
×
392
    | "altglyphdef" -> "altGlyphDef"
×
393
    | "altglyphitem" -> "altGlyphItem"
×
394
    | "animatecolor" -> "animateColor"
×
395
    | "animatemotion" -> "animateMotion"
×
396
    | "animatetransform" -> "animateTransform"
×
397
    | "clippath" -> "clipPath"
×
398
    | "feblend" -> "feBlend"
×
399
    | "fecolormatrix" -> "feColorMatrix"
×
400
    | "fecomponenttransfer" -> "feComponentTransfer"
×
401
    | "fecomposite" -> "feComposite"
×
402
    | "feconvolvematrix" -> "feConvolveMatrix"
×
403
    | "fediffuselighting" -> "feDiffuseLighting"
×
404
    | "fedisplacementmap" -> "feDisplacementMap"
×
405
    | "fedistantlight" -> "feDistantLight"
×
406
    | "fedropshadow" -> "feDropShadow"
×
407
    | "feflood" -> "feFlood"
×
408
    | "fefunca" -> "feFuncA"
×
409
    | "fefuncb" -> "feFuncB"
×
410
    | "fefuncg" -> "feFuncG"
×
411
    | "fefuncr" -> "feFuncR"
×
412
    | "fegaussianblur" -> "feGaussianBlur"
×
413
    | "feimage" -> "feImage"
×
414
    | "femerge" -> "feMerge"
×
415
    | "femergenode" -> "feMergeNode"
×
416
    | "femorphology" -> "feMorphology"
×
417
    | "feoffset" -> "feOffset"
×
418
    | "fepointlight" -> "fePointLight"
×
419
    | "fespecularlighting" -> "feSpecularLighting"
×
420
    | "fespotlight" -> "feSpotLight"
×
421
    | "fetile" -> "feTile"
1✔
422
    | "feturbulence" -> "feTurbulence"
×
423
    | "foreignobject" -> "foreignObject"
×
424
    | "glyphref" -> "glyphRef"
×
425
    | "lineargradient" -> "linearGradient"
×
426
    | "radialgradient" -> "radialGradient"
×
427
    | "textpath" -> "textPath"
×
428
    | s -> s
10✔
429
end
430

431

432

433
(* Stack of open elements. *)
434
module Stack :
435
sig
436
  type t = element list ref
437

438
  val create : unit -> t
439

440
  val current_element : t -> element option
441
  val require_current_element : t -> element
442
  val adjusted_current_element : Context.t -> t -> element option
443
  val current_element_is : t -> string list -> bool
444
  val current_element_is_foreign : Context.t -> t -> bool
445

446
  val has : t -> string -> bool
447

448
  val in_scope : t -> string -> bool
449
  val in_button_scope : t -> string -> bool
450
  val in_list_item_scope : t -> string -> bool
451
  val in_table_scope : t -> string -> bool
452
  val in_select_scope : t -> string -> bool
453
  val one_in_scope : t -> string list -> bool
454
  val one_in_table_scope : t -> string list -> bool
455
  val target_in_scope : t -> element -> bool
456

457
  val remove : t -> element -> unit
458
  val replace : t -> old:element -> new_:element -> unit
459
  val insert_below : t -> anchor:element -> new_:element -> unit
460
end =
461
struct
462
  type t = element list ref
463

464
  let create () = ref []
146✔
465

466
  let current_element open_elements =
467
    match !open_elements with
276✔
468
    | [] -> None
×
469
    | element::_ -> Some element
276✔
470

471
  let require_current_element open_elements =
472
    match current_element open_elements with
125✔
473
    | None -> failwith "require_current_element: None"
×
474
    | Some element -> element
125✔
475

476
  let adjusted_current_element context open_elements =
477
    match !open_elements, Context.element context with
1,499✔
478
    | [_], Some element -> Some element
315✔
479
    | [], _ -> None
140✔
480
    | element::_, _ -> Some element
1,044✔
481

482
  let current_element_is open_elements names =
483
    match !open_elements with
38✔
484
    | {element_name = `HTML, name}::_ -> list_mem_string name names
38✔
485
    | _ -> false
×
486

487
  let current_element_is_foreign context open_elements =
488
    match adjusted_current_element context open_elements with
1✔
489
    | Some {element_name = ns, _} when ns <> `HTML -> true
1✔
490
    | _ -> false
×
491

492
  let has open_elements name =
493
    List.exists
7✔
494
      (fun {element_name = ns, name'} ->
495
        ns = `HTML && name' = name) !open_elements
10✔
496

497
  let in_scope_general scope_delimiters open_elements name' =
498
    let rec scan = function
234✔
499
      | [] -> false
×
500
      | {element_name = ns, name'' as name}::more ->
325✔
501
        if ns = `HTML && name'' = name' then true
134✔
502
        else
503
          if list_mem_qname name scope_delimiters then false
100✔
504
          else scan more
91✔
505
    in
506
    scan !open_elements
507

508
  let scope_delimiters =
509
    [`HTML, "applet"; `HTML, "caption"; `HTML, "html";
510
     `HTML, "table"; `HTML, "td"; `HTML, "th";
511
     `HTML, "marquee"; `HTML, "object"; `HTML, "template";
512
     `MathML, "mi"; `MathML, "mo"; `MathML, "mn";
513
     `MathML, "ms"; `MathML, "mtext"; `MathML, "annotation-xml";
514
     `SVG, "foreignObject"; `SVG, "desc"; `SVG, "title"]
515

516
  let in_scope = in_scope_general scope_delimiters
2✔
517

518
  let in_button_scope = in_scope_general ((`HTML, "button")::scope_delimiters)
2✔
519

520
  let in_list_item_scope =
521
    in_scope_general ((`HTML, "ol")::(`HTML, "ul")::scope_delimiters)
2✔
522

523
  let in_table_scope =
524
    in_scope_general [`HTML, "html"; `HTML, "table"; `HTML, "template"]
2✔
525

526
  let in_select_scope open_elements name =
527
    let rec scan = function
2✔
528
      | [] -> false
×
529
      | {element_name = ns, name'}::more ->
3✔
530
        if ns <> `HTML then false
×
531
        else
532
          if name' = name then true
2✔
533
          else
534
            if name' = "optgroup" || name' = "option" then scan more
×
535
            else false
×
536
    in
537
    scan !open_elements
538

539
  let one_in_scope open_elements names =
540
    let rec scan = function
7✔
541
      | [] -> false
×
542
      | {element_name = ns, name' as name}::more ->
7✔
543
        if ns = `HTML && list_mem_string name' names then true
7✔
544
        else
545
          if list_mem_qname name scope_delimiters then false
×
546
          else scan more
×
547
    in
548
    scan !open_elements
549

550
  let one_in_table_scope open_elements names =
551
    let rec scan = function
3✔
552
      | [] -> false
×
553
      | {element_name = ns, name' as name}::more ->
3✔
554
        if ns = `HTML && list_mem_string name' names then true
3✔
555
        else
556
          if list_mem_qname name
×
557
              [`HTML, "html"; `HTML, "table"; `HTML, "template"] then
558
            false
×
559
          else scan more
×
560
    in
561
    scan !open_elements
562

563
  let target_in_scope open_elements node =
564
    let rec scan = function
8✔
565
      | [] -> false
×
566
      | e::more ->
14✔
567
        if e == node then true
8✔
568
        else
569
          if list_mem_qname node.element_name scope_delimiters then false
×
570
          else scan more
6✔
571
    in
572
    scan !open_elements
573

574
  let remove open_elements element =
575
    open_elements := List.filter ((!=) element) !open_elements;
4✔
576
    element.is_open <- false
577

578
  let replace open_elements ~old ~new_ =
579
    open_elements :=
2✔
580
      List.map (fun e ->
2✔
581
        if e == old then (e.is_open <- false; new_) else e) !open_elements
2✔
582

583
  let insert_below open_elements ~anchor ~new_ =
584
    let rec insert prefix = function
2✔
585
      | [] -> List.rev prefix
×
586
      | e::more when e == anchor -> (List.rev prefix) @ (new_::e::more)
2✔
587
      | e::more -> insert (e::prefix) more
×
588
    in
589
    open_elements := insert [] !open_elements
2✔
590
end
591

592

593

594
(* List of active formatting elements. *)
595
module Active :
596
sig
597
  type entry =
598
    | Marker
599
    | Element_ of element * location * Token_tag.t
600

601
  type t = entry list ref
602

603
  val create : unit -> t
604

605
  val add_marker : t -> unit
606
  val clear_until_marker : t -> unit
607

608
  val has : t -> element -> bool
609
  val remove : t -> element -> unit
610
  val replace : t -> old:element -> new_:element -> unit
611
  val insert_after : t -> anchor:element -> new_:element -> unit
612

613
  val has_before_marker : t -> string -> element option
614
end =
615
struct
616
  type entry =
617
    | Marker
618
    | Element_ of element * location * Token_tag.t
619

620
  type t = entry list ref
621

622
  let create () = ref []
146✔
623

624
  let add_marker active_formatting_elements =
625
    active_formatting_elements := Marker::!active_formatting_elements
7✔
626

627
  let clear_until_marker active_formatting_elements =
628
    let rec iterate = function
7✔
629
      | Marker::rest -> rest
7✔
630
      | (Element_ _)::rest -> iterate rest
×
631
      | [] -> []
×
632
    in
633
    active_formatting_elements := iterate !active_formatting_elements
7✔
634

635
  let has active_formatting_elements element =
636
    !active_formatting_elements |> List.exists (function
2✔
637
      | Element_ (e, _, _) when e == element -> true
2✔
638
      | _ -> false)
1✔
639

640
  let remove active_formatting_elements element =
641
    active_formatting_elements :=
22✔
642
      !active_formatting_elements |> List.filter (function
22✔
643
        | Element_ (e, _, _) when e == element -> false
20✔
644
        | _ -> true)
7✔
645

646
  let replace active_formatting_elements ~old ~new_ =
647
    active_formatting_elements :=
3✔
648
      !active_formatting_elements |> List.map (function
3✔
649
        | Element_ (e, l, t) when e == old -> Element_ (new_, l, t)
3✔
650
        | e -> e)
4✔
651

652
  let insert_after active_formatting_elements ~anchor ~new_ =
653
    let rec insert prefix = function
1✔
654
      | [] -> List.rev prefix
×
655
      | (Element_ (e, l, t) as v)::more when e == anchor ->
1✔
656
        let new_entry = Element_ (new_, l, t) in
1✔
657
        (List.rev prefix) @ (v::new_entry::more)
1✔
658
      | v::more -> insert (v::prefix) more
×
659
    in
660
    active_formatting_elements := insert [] !active_formatting_elements
1✔
661

662
  let has_before_marker active_formatting_elements name =
663
    let rec scan = function
8✔
664
      | [] | Marker::_ -> None
×
665
      | Element_ (n, _, _)::_ when n.element_name = (`HTML, name) -> Some n
2✔
666
      | _::more -> scan more
×
667
    in
668
    scan !active_formatting_elements
669
end
670

671

672

673
type mode = unit -> unit
674

675
(* Stack of template insertion modes. *)
676
module Template :
677
sig
678
  type t = mode list ref
679

680
  val create : unit -> t
681

682
  val push : t -> mode -> unit
683
  val pop : t -> unit
684
end =
685
struct
686
  type t = (unit -> unit) list ref
687

688
  let create () = ref []
146✔
689

690
  let push template_insertion_modes mode =
691
    template_insertion_modes := mode::!template_insertion_modes
×
692

693
  let pop template_insertion_modes =
694
    match !template_insertion_modes with
×
695
    | [] -> ()
×
696
    | _::rest -> template_insertion_modes := rest
×
697
end
698

699

700

701
(* Subtree buffers. HTML specifies the "adoption agency algorithm" for
702
   recovering from certain kinds of errors. This algorithm is (apparently)
703
   incompatible with streaming parsers that do not maintain a DOM - such as
704
   Markup.ml. So, when the Markup.ml parser encounters a situation in which it
705
   may be necessary to later run the adoption agency algorithm, it buffers its
706
   signal output. Instead of being emitted, the signals are used to construct a
707
   DOM subtree. If the algorithm is run, it is run on this subtree. Whenever the
708
   parser can "prove" that the subtree can no longer be involved in the adoption
709
   agency algorithm, it serializes the subtree into the signal stream. In
710
   practice, this means that buffering begins when a formatting element is
711
   encountered, and ends when the parent of the formatting element is popped off
712
   the open element stack. *)
713
module Subtree :
714
sig
715
  type t
716

717
  val create : Stack.t -> t
718

719
  val accumulate : t -> location -> signal -> bool
720

721
  val enable : t -> unit
722
  val disable : t -> (location * signal) list
723

724
  val adoption_agency_algorithm :
725
    t -> Active.t -> location -> string -> bool * (location * Error.t) list
726
end =
727
struct
728
  type t =
729
    {open_elements    : Stack.t;
730
     mutable enabled  : bool;
731
     mutable position : element}
732

733
  let create open_elements =
734
    {open_elements;
146✔
735
     enabled  = false;
736
     position = Element.dummy}
737

738
  let accumulate subtree_buffer l s =
739
    if not subtree_buffer.enabled then true
1,078✔
740
    else begin
104✔
741
      begin match s with
742
      | `Start_element (_, attributes) ->
37✔
743
        let parent = subtree_buffer.position in
744
        let child =
745
          Stack.require_current_element subtree_buffer.open_elements in
746

747
        child.attributes <- attributes;
37✔
748
        child.parent <- parent;
749
        parent.children <- (l, Element child)::parent.children;
750

751
        subtree_buffer.position <- child
752

753
      | `End_element ->
16✔
754
        subtree_buffer.position.end_location <- l;
755
        subtree_buffer.position <-
756
          Stack.require_current_element subtree_buffer.open_elements
16✔
757

758
      | `Text ss ->
51✔
759
        subtree_buffer.position.children <-
760
          (l, Text ss)::subtree_buffer.position.children
761

762
      | `PI (t, s) ->
×
763
        subtree_buffer.position.children <-
764
          (l, PI (t, s))::subtree_buffer.position.children
765

766
      | `Comment s ->
×
767
        subtree_buffer.position.children <-
768
          (l, Comment s)::subtree_buffer.position.children
769

770
      | `Xml _ | `Doctype _ -> ()
×
771
      end;
772

773
      false
774
    end
775

776
  let enable subtree_buffer =
777
    if subtree_buffer.enabled then ()
33✔
778
    else
779
      match Stack.current_element subtree_buffer.open_elements with
24✔
780
      | None -> ()
×
781
      | Some element ->
24✔
782
        element.buffering <- true;
783
        subtree_buffer.position <- element;
784
        subtree_buffer.enabled <- true
785

786
  let disable subtree_buffer =
787
    let rec traverse acc = function
24✔
788
      | l, Element {element_name; attributes; end_location; children} ->
41✔
789
        let name = Ns.to_string (fst element_name), snd element_name in
41✔
790
        let start_signal = l, `Start_element (name, attributes) in
791
        let end_signal = end_location, `End_element in
792
        start_signal::(List.fold_left traverse (end_signal::acc) children)
41✔
793

794
      | l, Text ss ->
51✔
795
        begin match acc with
796
        | (_, `Text ss')::rest -> (l, `Text (ss @ ss'))::rest
3✔
797
        | _ -> (l, `Text ss)::acc
48✔
798
        end
799

800
      | l, PI (t, s) -> (l, `PI (t, s))::acc
×
801
      | l, Comment s -> (l, `Comment s)::acc
×
802
    in
803

804
    let result =
805
      List.fold_left traverse []
806
        (Stack.require_current_element subtree_buffer.open_elements).children
24✔
807
    in
808

809
    subtree_buffer.enabled <- false;
24✔
810

811
    result
812

813
  (* Part of 8.2.5.4.7. *)
814
  let adoption_agency_algorithm
815
      subtree_buffer active_formatting_elements l subject =
816

817
    let open_elements = subtree_buffer.open_elements in
23✔
818

819
    let above_removed_nodes = ref [] in
820

821
    let rec above_in_stack node = function
822
      | e::e'::_ when e == node -> e'
6✔
823
      | _::more -> above_in_stack node more
7✔
824
      | [] -> failwith "above_in_stack: not found"
×
825
    in
826

827
    let above_node node =
828
      if node.is_open then above_in_stack node !open_elements
4✔
829
      else
830
        try List.find (fun (e, _) -> e == node) !above_removed_nodes |> snd
×
831
        with Not_found -> failwith "above_node: not found"
×
832
    in
833

834
    let remove_node node =
835
      above_removed_nodes :=
×
836
        (node, above_in_stack node !open_elements)::!above_removed_nodes;
×
837
      Stack.remove open_elements node
838
    in
839

840
    let reparent node new_parent =
841
      let old_parent = node.parent in
6✔
842

843
      let entry, filtered_children =
844
        let rec remove prefix = function
845
          | (_, Element e as entry)::rest when e == node ->
2✔
846
            entry, (List.rev prefix) @ rest
2✔
847
          | e::rest -> remove (e::prefix) rest
×
848
          | [] -> (node.location, Element node), old_parent.children
4✔
849
        in
850
        remove [] old_parent.children
6✔
851
      in
852

853
      old_parent.children <- filtered_children;
854
      new_parent.children <- entry::new_parent.children;
855
      node.parent <- new_parent
856
    in
857

858
    let inner_loop formatting_element furthest_block =
859
      let rec repeat inner_loop_counter node last_node bookmark =
2✔
860
        let node = above_node node in
4✔
861

862
        if node == formatting_element then last_node, bookmark
2✔
863
        else begin
2✔
864
          if inner_loop_counter > 3 then
865
            Active.remove active_formatting_elements node;
×
866

867
          if not @@ Active.has active_formatting_elements node then begin
×
868
            remove_node node;
869
            repeat (inner_loop_counter + 1) node last_node bookmark
×
870
          end
871
          else begin
2✔
872
            let new_node =
873
              {node with is_open = true; children = []; parent = Element.dummy}
874
            in
875

876
            node.end_location <- l;
877

878
            Stack.replace open_elements ~old:node ~new_:new_node;
879
            Active.replace active_formatting_elements ~old:node ~new_:new_node;
2✔
880

881
            reparent last_node new_node;
2✔
882

883
            repeat (inner_loop_counter + 1) new_node new_node
2✔
884
              (if last_node == furthest_block then Some new_node else bookmark)
1✔
885
          end
886
        end
887

888
      in
889
      repeat 1 furthest_block furthest_block None
890
    in
891

892
    let find_formatting_element () =
893
      let rec scan = function
10✔
894
        | [] -> None
4✔
895
        | Active.Marker::_ -> None
×
896
        | (Active.Element_ ({element_name = `HTML, n} as e, _, _))::_
12✔
897
            when n = subject -> Some e
6✔
898
        | _::rest -> scan rest
6✔
899
      in
900
      scan !active_formatting_elements
901
    in
902

903
    let find_furthest_block formatting_element =
904
      let rec scan furthest = function
6✔
905
        | [] -> furthest
×
906
        | e::_ when e == formatting_element -> furthest
6✔
907
        | e::more when Element.is_special e.element_name -> scan (Some e) more
2✔
908
        | _::more -> scan furthest more
4✔
909
      in
910
      scan None !open_elements
911
    in
912

913
    let pop_to_formatting_element formatting_element =
914
      let rec pop () =
4✔
915
        match !open_elements with
6✔
916
        | [] -> ()
×
917
        | e::more ->
6✔
918
          open_elements := more;
919
          e.is_open <- false;
920
          e.end_location <- l;
921
          if e != formatting_element then pop ()
2✔
922
      in
923
      pop ();
924
      subtree_buffer.position <- Stack.require_current_element open_elements
4✔
925
    in
926

927
    let rec outer_loop outer_loop_counter errors =
928
      let outer_loop_counter = outer_loop_counter + 1 in
10✔
929

930
      if outer_loop_counter >= 8 then true, List.rev errors
×
931
      else begin
10✔
932
        match find_formatting_element () with
933
        | None -> false, List.rev errors
4✔
934
        | Some formatting_element ->
6✔
935
          if not formatting_element.is_open then begin
×
936
            Active.remove active_formatting_elements formatting_element;
937
            true, List.rev ((l, `Unmatched_end_tag subject)::errors)
×
938
          end
939
          else begin
6✔
940
            if not @@ Stack.target_in_scope open_elements
6✔
941
                        formatting_element then begin
×
942
              true, List.rev ((l, `Unmatched_end_tag subject)::errors)
×
943
            end
944
            else begin
6✔
945
              let errors =
946
                if Stack.require_current_element open_elements ==
6✔
947
                   formatting_element then
948
                  errors
2✔
949
                else (l, `Unmatched_end_tag subject)::errors
4✔
950
              in
951

952
              match find_furthest_block formatting_element with
953
              | None ->
4✔
954
                pop_to_formatting_element formatting_element;
955
                Active.remove active_formatting_elements formatting_element;
4✔
956
                true, List.rev errors
4✔
957

958
              | Some furthest_block ->
2✔
959
                formatting_element.end_location <- l;
960

961
                let common_ancestor =
962
                  above_in_stack formatting_element !open_elements in
963

964
                let last_node, bookmark =
2✔
965
                  inner_loop formatting_element furthest_block in
966

967
                reparent last_node common_ancestor;
2✔
968

969
                let new_node =
2✔
970
                  {formatting_element with
971
                    is_open = true; children = []; parent = Element.dummy}
972
                in
973

974
                new_node.children <- furthest_block.children;
975
                furthest_block.children <- [];
976
                new_node.children |> List.iter (function
977
                  | _, Element child -> child.parent <- new_node
×
978
                  | _ -> ());
2✔
979

980
                reparent new_node furthest_block;
2✔
981

982
                begin match bookmark with
2✔
983
                | None ->
1✔
984
                  Active.replace active_formatting_elements
1✔
985
                    ~old:formatting_element ~new_:new_node
986
                | Some node ->
1✔
987
                  Active.remove active_formatting_elements formatting_element;
988
                  Active.insert_after
1✔
989
                    active_formatting_elements ~anchor:node ~new_:new_node
990
                end;
991

992
                Stack.remove open_elements formatting_element;
993
                Stack.insert_below
2✔
994
                  open_elements ~anchor:furthest_block ~new_:new_node;
995

996
                outer_loop outer_loop_counter errors
2✔
997
            end
998
          end
999
      end
1000
    in
1001

1002
    let current_node = Stack.require_current_element open_elements in
1003
    if current_node.element_name = (`HTML, subject) then begin
15✔
1004
      open_elements := List.tl !open_elements;
15✔
1005
      current_node.is_open <- false;
1006
      current_node.end_location <- l;
1007
      subtree_buffer.position <- Stack.require_current_element open_elements;
15✔
1008
      Active.remove active_formatting_elements current_node;
1009
      true, []
15✔
1010
    end
1011
    else outer_loop 0 []
8✔
1012
end
1013

1014

1015

1016
let parse requested_context report (tokens, set_tokenizer_state, set_foreign) =
1017
  let context = Context.uninitialized () in
146✔
1018

1019
  let throw = ref (fun _ -> ()) in
×
1020
  let ended = ref (fun _ -> ()) in
×
1021
  let output = ref (fun _ -> ()) in
×
1022

1023
  let report_if = Error.report_if report in
1024
  let unmatched_end_tag l name k =
146✔
1025
    report l (`Unmatched_end_tag name) !throw k in
1✔
1026
  let misnested_tag l t context_name k =
1027
    report l (`Misnested_tag (t.name, context_name, t.Token_tag.attributes)) !throw k in
44✔
1028

1029
  let open_elements = Stack.create () in
1030
  let active_formatting_elements = Active.create () in
146✔
1031
  let subtree_buffer = Subtree.create open_elements in
146✔
1032
  let text = Text.prepare () in
146✔
1033
  let template_insertion_modes = Template.create () in
146✔
1034
  let frameset_ok = ref true in
146✔
1035
  let head_seen = ref false in
1036
  let form_element_pointer = ref None in
1037

1038
  let add_character = Text.add text in
1039

1040
  set_foreign (fun () ->
146✔
1041
    Stack.current_element_is_foreign context open_elements);
1✔
1042

1043
  let report_if_stack_has_other_than names k =
146✔
1044
    let rec iterate = function
130✔
1045
      | [] -> k ()
130✔
1046
      | {element_name = ns, name; location}::more ->
220✔
1047
        report_if (not (ns = `HTML && list_mem_string name names))
220✔
1048
          location (fun () -> `Unmatched_start_tag name) !throw (fun () ->
12✔
1049
        iterate more)
220✔
1050
    in
1051
    iterate !open_elements
1052
  in
1053

1054
  let rec current_mode = ref initial_mode
1055

1056
  and constructor throw_ k =
1057
    Context.initialize tokens requested_context context throw_ (fun () ->
146✔
1058

1059
    let initial_tokenizer_state =
146✔
1060
      match Context.the_context context with
1061
      | `Fragment (`HTML, ("title" | "textarea")) -> `RCDATA
1✔
1062
      | `Fragment
1✔
1063
          (`HTML, ("style" | "xmp" | "iframe" | "noembed" | "noframes")) ->
×
1064
        `RAWTEXT
1065
      | `Fragment (`HTML, "script") -> `Script_data
1✔
1066
      | `Fragment (`HTML, "plaintext") -> `PLAINTEXT
1✔
1067
      | _ -> `Data
141✔
1068
    in
1069

1070
    set_tokenizer_state initial_tokenizer_state;
1071

1072
    begin match Context.the_context context with
146✔
1073
    | `Document -> ()
61✔
1074
    | `Fragment _ ->
85✔
1075
      let notional_root =
1076
        Element.create ~suppress:true (`HTML, "html") (1, 1) in
1077
      open_elements := [notional_root]
85✔
1078
    end;
1079

1080
    begin match Context.the_context context with
1081
    | `Fragment (`HTML, "template") ->
×
1082
      Template.push template_insertion_modes in_template_mode
×
1083
    | _ -> ()
146✔
1084
    end;
1085

1086
    (* The following is a deviation from conformance. The goal is to avoid
1087
       insertion of a <head> element into a fragment beginning with a <body> or
1088
       <frameset> element. *)
1089
    begin match Context.token context with
1090
    | Some ("body" | "frameset") -> head_seen := true
2✔
1091
    | _ -> ()
139✔
1092
    end;
1093

1094
    current_mode :=
1095
      begin match Context.the_context context with
1096
      | `Fragment _ -> reset_mode ()
85✔
1097
      | `Document -> initial_mode
61✔
1098
      end;
1099

1100
    (fun throw_ e k ->
1101
      throw := throw_;
1,225✔
1102
      ended := e;
1103
      output := k;
1104
      !current_mode ())
1105
    |> make
1106
    |> k)
146✔
1107

1108
  (* 8.2.3.1. *)
1109
  and reset_mode () =
1110
    let rec iterate last = function
100✔
1111
      | [e] when not last && Context.the_context context <> `Document ->
98✔
1112
        begin match Context.the_context context with
98✔
1113
        | `Document -> assert false
1114
        | `Fragment name -> iterate true [{e with element_name = name}]
98✔
1115
        end
1116
      | {element_name = _, "select"}::ancestors ->
2✔
1117
        let rec iterate' = function
1118
          | [] -> in_select_mode
2✔
1119
          | {element_name = _, "template"}::_ -> in_select_mode
×
1120
          | {element_name = _, "table"}::_ -> in_select_in_table_mode
×
1121
          | _::ancestors -> iterate' ancestors
×
1122
        in
1123
        iterate' ancestors
1124
      | {element_name = _, ("tr" | "th")}::_::_ -> in_cell_mode
×
1125
      | {element_name = _, "tr"}::_ -> in_row_mode
×
1126
      | {element_name = _, ("tbody" | "thead" | "tfoot")}::_ ->
×
1127
        in_table_body_mode
1128
      | {element_name = _, "caption"}::_ -> in_caption_mode
×
1129
      | {element_name = _, "colgroup"}::_ -> in_column_group_mode
×
1130
      | {element_name = _, "table"}::_ -> in_table_mode
1✔
1131
      | {element_name = _, "template"}::_ ->
×
1132
        begin match !template_insertion_modes with
1133
        | [] -> initial_mode (* This is an internal error, actually. *)
×
1134
        | mode::_ -> mode
×
1135
        end
1136
      (* The next case corresponds to item 12 of "Resetting the insertion mode
1137
         appropriately." It is commented out as deliberate deviation from the
1138
         specification, because that makes parsing of fragments intended for
1139
         <head> elements more intuitive. For conformance, the pattern in the
1140
         following case would have to end with ::_::_, not ::_. *)
1141
      (* | [{element_name = _, "head"}] -> in_body_mode *)
1142
      | {element_name = _, "head"}::_ -> in_head_mode
9✔
1143
      | {element_name = _, "body"}::_ -> in_body_mode
68✔
1144
      | {element_name = _, "frameset"}::_ -> in_frameset_mode
1✔
1145
      | {element_name = _, "html"}::_ ->
9✔
1146
        if !head_seen then after_head_mode else before_head_mode
2✔
1147
      | _::rest -> iterate last rest
10✔
1148
      | [] -> in_body_mode
10✔
1149
    in
1150
    iterate false !open_elements
1151

1152
  and emit' l s m =
1153
    if Subtree.accumulate subtree_buffer l s then begin
1,078✔
1154
      current_mode := m;
1155
      !output (l, s)
1156
    end
1157
    else m ()
104✔
1158

1159
  and emit_list ss m =
1160
    match ss with
154✔
1161
    | [] -> m ()
24✔
1162
    | (l, s)::more -> emit' l s (fun () -> emit_list more m)
130✔
1163

1164
  and emit_text m =
1165
    match Text.emit text with
1,180✔
1166
    | None -> m ()
1,053✔
1167
    | Some (l', strings) ->
127✔
1168
      emit' l' (`Text strings) m
1169

1170
  and emit l s m = emit_text (fun () -> emit' l s m)
485✔
1171

1172
  and push_and_emit
1173
      ?(formatting = false) ?(acknowledge = false) ?(namespace = `HTML)
426✔
1174
      ?(set_form_element_pointer = false) location
459✔
1175
      ({Token_tag.name; attributes; self_closing} as tag) mode =
1176

1177
    report_if (self_closing && not acknowledge) location (fun () ->
7✔
1178
      `Bad_token ("/>", "tag", "should not be self-closing"))
1✔
1179
      !throw (fun () ->
1180

1181
    let namespace_string = Ns.to_string namespace in
461✔
1182

1183
    let tag_name =
461✔
1184
      match namespace with
1185
      | `SVG -> Foreign.adjust_svg_tag_name name
11✔
1186
      | _ -> name
450✔
1187
    in
1188

1189
    let is_html_integration_point =
1190
      Foreign.is_html_integration_point namespace tag_name attributes in
1191

1192
    let attributes =
461✔
1193
      List.map (fun (n, v) -> Namespace.Parsing.parse n, v) attributes in
9✔
1194
    let attributes =
461✔
1195
      match namespace with
1196
      | `HTML | `Other _ -> attributes
×
1197
      | `MathML -> Foreign.adjust_mathml_attributes attributes
×
1198
      | `SVG -> Foreign.adjust_svg_attributes attributes
11✔
1199
    in
1200

1201
    let element_entry =
1202
      Element.create ~is_html_integration_point (namespace, name) location
1203
    in
1204
    open_elements := element_entry::!open_elements;
461✔
1205

1206
    if set_form_element_pointer then
1207
      form_element_pointer := Some element_entry;
2✔
1208

1209
    if formatting then
461✔
1210
      active_formatting_elements :=
35✔
1211
        Active.Element_ (element_entry, location, tag)::
1212
          !active_formatting_elements;
1213

1214
    emit location
461✔
1215
      (`Start_element ((namespace_string, tag_name), attributes)) mode)
1216

1217
  and push_implicit location name mode =
1218
    push_and_emit location
150✔
1219
      {Token_tag.name = name; attributes = []; self_closing = false} mode
1220

1221
  and pop location mode =
1222
    match !open_elements with
525✔
1223
    | [] -> mode ()
×
1224
    | element::more ->
525✔
1225
      emit_text (fun () ->
1226
      (fun k ->
525✔
1227
        if not element.buffering then k ()
501✔
1228
        else emit_list (Subtree.disable subtree_buffer) k)
24✔
1229
      (fun () ->
1230
        open_elements := more;
525✔
1231
        element.is_open <- false;
1232
        if element.suppress then mode ()
85✔
1233
        else emit' location `End_element mode))
440✔
1234

1235
  and pop_until condition location mode =
1236
    let rec iterate () =
405✔
1237
      match !open_elements with
643✔
1238
      | [] -> mode ()
147✔
1239
      | element::_ ->
496✔
1240
        if condition element then mode ()
259✔
1241
        else pop location iterate
237✔
1242
    in
1243
    iterate ()
1244

1245
  and close_element ?(ns = `HTML) l name mode =
115✔
1246
    pop_until
123✔
1247
      (fun {element_name = ns', name'} -> ns' = ns && name' = name) l
130✔
1248
      (fun () ->
1249
    pop l mode)
123✔
1250

1251
  and pop_until_and_raise_errors names location mode =
1252
    let rec iterate () =
7✔
1253
      match !open_elements with
7✔
1254
      | [] -> mode ()
×
1255
      | {element_name = ns, name}::_ ->
7✔
1256
        if ns = `HTML && list_mem_string name names then pop location mode
7✔
1257
        else
1258
          report location (`Unmatched_start_tag name) !throw (fun () ->
×
1259
          pop location iterate)
×
1260
    in
1261
    iterate ()
1262

1263
  and pop_implied ?(except = "") location mode =
9✔
1264
    pop_until (fun {element_name = _, name} ->
112✔
1265
      name = except ||
98✔
1266
        not @@ list_mem_string name
18✔
1267
          ["dd"; "dt"; "li"; "option"; "optgroup"; "p"; "rb"; "rp"; "rt";
1268
           "rtc"]) location mode
1269

1270
  and pop_to_table_context location mode =
1271
    pop_until (function
11✔
1272
      | {element_name = `HTML, ("table" | "template" | "html")} -> true
×
1273
      | _ -> false) location mode
×
1274

1275
  and pop_to_table_body_context location mode =
1276
    pop_until (function
7✔
1277
      | {element_name =
7✔
1278
          `HTML, ("tbody" | "thead" | "tfoot" | "template" | "html")} -> true
×
1279
      | _ -> false) location mode
×
1280

1281
  and pop_to_table_row_context location mode =
1282
    pop_until (function
5✔
1283
      | {element_name = `HTML, ("tr" | "template" | "html")} -> true
×
1284
      | _ -> false) location mode
×
1285

1286
  and close_element_with_implied name location mode =
1287
    pop_implied ~except:name location (fun () ->
97✔
1288
    let check_element k =
97✔
1289
      match Stack.current_element open_elements with
97✔
1290
      | Some {element_name = `HTML, name'} when name' = name -> k ()
92✔
1291
      | Some {element_name = _, name; location} ->
5✔
1292
        report location (`Unmatched_start_tag name) !throw k
1293
      | None ->
×
1294
        unmatched_end_tag location name k
1295
    in
1296
    check_element (fun () ->
1297
    close_element location name mode))
97✔
1298

1299
  and close_cell location mode =
1300
    pop_implied location (fun () ->
×
1301
    (fun mode ->
×
1302
      match Stack.current_element open_elements with
×
1303
      | Some {element_name = `HTML, ("td" | "th")} -> mode ()
×
1304
      | Some {element_name = _, name} ->
×
1305
        unmatched_end_tag location name mode
1306
      | None ->
×
1307
        unmatched_end_tag location "" mode)
1308
    @@ (fun () ->
1309
    pop_until (function
×
1310
      | {element_name = `HTML, ("td" | "th")} -> true
×
1311
      | _ -> false) location (fun () ->
×
1312
    pop location mode)))
×
1313

1314
  and close_current_p_element l mode =
1315
    if Stack.in_button_scope open_elements "p" then
136✔
1316
      close_element_with_implied "p" l mode
46✔
1317
    else mode ()
90✔
1318

1319
  and close_preceding_tag names l mode =
1320
    let rec scan = function
6✔
1321
      | [] -> mode ()
×
1322
      | {element_name = (ns, name) as name'}::more ->
7✔
1323
        if ns = `HTML && list_mem_string name names then
7✔
1324
          close_element_with_implied name l mode
2✔
1325
        else
1326
          if Element.is_special name' &&
5✔
1327
            not @@ list_mem_qname name'
5✔
1328
              [`HTML, "address"; `HTML, "div"; `HTML, "p"] then
1329
            mode ()
4✔
1330
          else
1331
            scan more
1✔
1332
    in
1333
    scan !open_elements
1334

1335
  and emit_end l =
1336
    pop_until (fun _ -> false) l (fun () ->
146✔
1337
    emit_text (fun () ->
147✔
1338
    !ended ()))
147✔
1339

1340
  and reconstruct_active_formatting_elements mode =
1341
    let rec get_prefix prefix = function
411✔
1342
      | [] -> prefix, []
287✔
1343
      | Active.Marker::_ as l -> prefix, l
12✔
1344
      | Active.Element_ ({is_open = true}, _, _)::_ as l -> prefix, l
112✔
1345
      | Active.Element_ ({is_open = false}, l, tag)::more ->
8✔
1346
        get_prefix ((l, tag)::prefix) more
1347
    in
1348
    let to_reopen, remainder = get_prefix [] !active_formatting_elements in
1349
    active_formatting_elements := remainder;
411✔
1350

1351
    begin match to_reopen with
1352
    | [] -> ()
404✔
1353
    | _::_ -> Subtree.enable subtree_buffer
7✔
1354
    end;
1355

1356
    let rec reopen = function
1357
      | [] -> mode ()
411✔
1358
      | (l, tag)::more ->
8✔
1359
        push_and_emit ~formatting:true l tag (fun () -> reopen more)
8✔
1360
    in
1361
    reopen to_reopen
1362

1363
  (* 8.2.5. *)
1364
  and dispatch tokens rules =
1365
    next tokens !throw (fun () -> !ended ()) begin fun ((_, t) as v) ->
×
1366
      let foreign =
1,491✔
1367
        match Stack.adjusted_current_element context open_elements, t with
1,491✔
1368
        | None, _ -> false
140✔
1369
        | Some {element_name = `HTML, _}, _ -> false
1,324✔
1370
        | Some {element_name}, `Start {name}
7✔
1371
            when Foreign.is_mathml_text_integration_point element_name
7✔
1372
            && name <> "mglyph" && name <> "malignmark" -> false
×
1373
        | Some {element_name = `MathML, "annotation-xml"},
×
1374
            `Start {name = "svg"} -> false
1375
        | Some {is_html_integration_point = true}, `Start _ -> false
×
1376
        | Some {is_html_integration_point = true}, `Char _ -> false
×
1377
        | _, `EOF -> false
5✔
1378
        | _ -> true
22✔
1379
      in
1380

1381
      if not foreign then rules v
1,469✔
1382
      else foreign_content !current_mode (fun () -> rules v) v
×
1383
    end
1384

1385
  (* 8.2.5.4.1. *)
1386
  and initial_mode () =
1387
    dispatch tokens begin function
64✔
1388
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) ->
×
1389
        initial_mode ()
1390

1391
      | l, `Comment s ->
1✔
1392
        emit l (`Comment s) initial_mode
1393

1394
      | l, `Doctype d ->
14✔
1395
        emit l (`Doctype d) before_html_mode
1396

1397
      | v ->
47✔
1398
        push tokens v;
1399
        before_html_mode ()
47✔
1400
    end
1401

1402
  (* 8.2.5.4.2. *)
1403
  and before_html_mode () =
1404
    dispatch tokens begin function
66✔
1405
      | l, `Doctype _ ->
1✔
1406
        report l (`Bad_document "doctype should be first") !throw
1407
          before_html_mode
1408

1409
      | l, `Comment s ->
1✔
1410
        emit l (`Comment s) before_html_mode
1411

1412
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) ->
×
1413
        before_html_mode ()
1414

1415
      | l, `Start ({name = "html"} as t) ->
13✔
1416
        push_and_emit l t before_head_mode
1417

1418
      | l, `End {name}
1✔
1419
          when not @@ list_mem_string name ["head"; "body"; "html"; "br"] ->
1✔
1420
        unmatched_end_tag l name before_html_mode
1✔
1421

1422
      | l, _ as v ->
48✔
1423
        push tokens v;
1424
        push_implicit l "html" before_head_mode
48✔
1425
    end
1426

1427
  (* 8.2.5.4.3. *)
1428
  and before_head_mode () =
1429
    dispatch tokens begin function
69✔
1430
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) ->
×
1431
        before_head_mode ()
1432

1433
      | l, `Comment s ->
1✔
1434
        emit l (`Comment s) before_head_mode
1435

1436
      | l, `Doctype _ ->
1✔
1437
        report l (`Bad_document "doctype should be first") !throw
1438
          before_head_mode
1439

1440
      | _, `Start {name = "html"} as v ->
1✔
1441
        in_body_mode_rules "html" before_head_mode v
1442

1443
      | l, `Start ({name = "head"} as t) ->
15✔
1444
        head_seen := true;
1445
        push_and_emit l t in_head_mode
1446

1447
      | l, `End {name}
6✔
1448
          when not @@ list_mem_string name ["head"; "body"; "html"; "br"] ->
6✔
1449
        report l (`Unmatched_end_tag name) !throw before_head_mode
1✔
1450

1451
      | l, _ as v ->
48✔
1452
        head_seen := true;
1453
        push tokens v;
1454
        push_implicit l "head" in_head_mode
48✔
1455
    end
1456

1457
  (* 8.2.5.4.4. *)
1458
  and in_head_mode () =
1459
    dispatch tokens (fun v -> in_head_mode_rules in_head_mode v)
100✔
1460

1461
  (* 8.2.5.4.4. *)
1462
  and in_head_mode_rules mode = function
1463
    | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
×
1464
      add_character l c;
1465
      mode ()
5✔
1466

1467
    | l, `Comment s ->
2✔
1468
      emit l (`Comment s) mode
1469

1470
    | l, `Doctype _ ->
1✔
1471
      report l (`Bad_document "doctype should be first") !throw mode
1472

1473
    | _, `Start {name = "html"} as v ->
1✔
1474
      in_body_mode_rules "head" in_head_mode v
1475

1476
    | l, `Start ({name =
13✔
1477
        "base" | "basefont" | "bgsound" | "link" | "meta"} as t) ->
1✔
1478
      push_and_emit ~acknowledge:true l t (fun () ->
1479
      pop l mode)
13✔
1480

1481
    | l, `Start ({name = "title"} as t) ->
4✔
1482
      push_and_emit l t (fun () ->
1483
      parse_rcdata mode)
4✔
1484

1485
    | l, `Start ({name = "noframes" | "style"} as t) ->
2✔
1486
      push_and_emit l t (fun () ->
1487
      parse_rawtext mode)
6✔
1488

1489
    | l, `Start ({name = "noscript"} as t) ->
3✔
1490
      push_and_emit l t in_head_noscript_mode
1491

1492
    | l, `Start ({name = "script"} as t) ->
2✔
1493
      push_and_emit l t (fun () ->
1494
      set_tokenizer_state `Script_data;
2✔
1495
      text_mode mode)
2✔
1496

1497
    | l, `End {name = "head"} ->
14✔
1498
      pop l after_head_mode
1499

1500
    | l, `Start ({name = "template"} as t) ->
×
1501
      Active.add_marker active_formatting_elements;
1502
      frameset_ok := false;
×
1503
      Template.push template_insertion_modes in_template_mode;
1504
      push_and_emit l t in_template_mode
×
1505

1506
    | l, `End {name = "template"} ->
×
1507
      if not @@ Stack.has open_elements "template" then
×
1508
        report l (`Unmatched_end_tag "template") !throw mode
×
1509
      else begin
×
1510
        Active.clear_until_marker active_formatting_elements;
1511
        Template.pop template_insertion_modes;
×
1512
        close_element_with_implied "template" l (fun () -> reset_mode () ())
×
1513
      end
1514

1515
    | l, `Start ({name = "head"} as t) ->
1✔
1516
      misnested_tag l t "head" mode
1517

1518
    | l, `End {name} when not @@ list_mem_string name ["body"; "html"; "br"] ->
6✔
1519
      report l (`Unmatched_end_tag name) !throw mode
1✔
1520

1521
    | l, _ as v ->
58✔
1522
      push tokens v;
1523
      pop l after_head_mode
58✔
1524

1525
  (* 8.2.5.4.5. *)
1526
  and in_head_noscript_mode () =
1527
    dispatch tokens begin function
14✔
1528
      | l, `Doctype _ ->
1✔
1529
        report l (`Bad_document "doctype should be first") !throw
1530
          in_head_noscript_mode
1531

1532
      | _, `Start {name = "html"} as v ->
1✔
1533
        in_body_mode_rules "noscript" in_head_noscript_mode v
1534

1535
      | l, `End {name = "noscript"} ->
2✔
1536
        pop l in_head_mode
1537

1538
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020)
×
1539
      | _, `Comment _
1✔
1540
      | _, `Start {name =
1✔
1541
          "basefont" | "bgsound" | "link" | "meta" | "noframes" |
×
1542
          "style"} as v ->
×
1543
        in_head_mode_rules in_head_noscript_mode v
1544

1545
      | l, `Start ({name = "head" | "noscript"} as t) ->
1✔
1546
        misnested_tag l t "noscript" in_head_noscript_mode
1547

1548
      | l, `End {name} when name <> "br" ->
1✔
1549
        report l (`Unmatched_end_tag name) !throw in_head_noscript_mode
1✔
1550

1551
      | l, _ as v ->
1✔
1552
        report l (`Bad_content "noscript") !throw (fun () ->
1553
        push tokens v;
1✔
1554
        pop l in_head_mode)
1✔
1555
    end
1556

1557
  (* 8.2.5.4.6. *)
1558
  and after_head_mode () =
1559
    dispatch tokens begin function
88✔
1560
      | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
×
1561
        add_character l c;
1562
        after_head_mode ()
3✔
1563

1564
      | l, `Comment s ->
1✔
1565
        emit l (`Comment s) after_head_mode
1566

1567
      | l, `Doctype _ ->
1✔
1568
        report l (`Bad_document "doctype should be first") !throw
1569
          after_head_mode
1570

1571
      | _, `Start {name = "html"} as v ->
1✔
1572
        in_body_mode_rules "html" after_head_mode v
1573

1574
      | l, `Start ({name = "body"} as t) ->
14✔
1575
        frameset_ok := false;
1576
        push_and_emit l t in_body_mode
1577

1578
      | l, `Start ({name = "frameset"} as t) ->
6✔
1579
        push_and_emit l t in_frameset_mode
1580

1581
      | l, `Start ({name =
1✔
1582
          "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" |
×
1583
          "script" | "style" | "template" | "title"} as t) as v ->
×
1584
        misnested_tag l t "html" (fun () ->
1585
        in_head_mode_rules after_head_mode v)
1✔
1586

1587
      | _, `End {name = "template"} as v ->
×
1588
        in_head_mode_rules after_head_mode v
1589

1590
      | l, `Start {name = "head"} ->
1✔
1591
        report l (`Bad_document "duplicate head element") !throw
1592
          after_head_mode
1593

1594
      | l, `End {name}
10✔
1595
          when not @@ list_mem_string name ["body"; "html"; "br"] ->
10✔
1596
        report l (`Unmatched_end_tag name) !throw after_head_mode
1✔
1597

1598
      (* This case is not found in the specification. It is a deliberate
1599
         deviation from conformance, so that fragments "<head>...</head>" don't
1600
         get an implicit <body> element generated after the <head> element. *)
1601
      | l, `EOF
21✔
1602
          when (Context.the_context context = `Fragment (`HTML, "html")
2✔
1603
             || Context.the_context context = `Fragment (`HTML, "head")) ->
8✔
1604
        emit_end l
10✔
1605

1606
      | l, _ as t ->
49✔
1607
        push tokens t;
1608
        push_implicit l "body" in_body_mode
49✔
1609
    end
1610

1611
  (* 8.2.5.4.7. *)
1612
  and in_body_mode () =
1613
    dispatch tokens (fun v -> in_body_mode_rules "body" in_body_mode v)
775✔
1614

1615
  (* 8.2.5.4.7. *)
1616
  and in_body_mode_rules context_name mode = function
1617
    | l, `Char 0 ->
1✔
1618
      report l (`Bad_token ("U+0000", "body", "null")) !throw mode
1619

1620
    | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
1✔
1621
      reconstruct_active_formatting_elements (fun () ->
1622
      add_character l c;
26✔
1623
      mode ())
26✔
1624

1625
    | l, `Char c ->
333✔
1626
      frameset_ok := false;
1627
      reconstruct_active_formatting_elements (fun () ->
1628
      add_character l c;
333✔
1629
      mode ())
333✔
1630

1631
    | l, `Comment s ->
2✔
1632
      emit l (`Comment s) mode
1633

1634
    | l, `Doctype _ ->
2✔
1635
      report l (`Bad_document "doctype should be first") !throw mode
1636

1637
    | l, `Start ({name = "html"} as t) ->
9✔
1638
      misnested_tag l t context_name mode
1639

1640
    | _, `Start {name =
2✔
1641
        "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" |
×
1642
        "script" | "style" | "template" | "title"}
×
1643
    | _, `End {name = "template"} as v ->
×
1644
      in_head_mode_rules mode v
1645

1646
    | l, `Start ({name = "body"} as t) ->
2✔
1647
      misnested_tag l t context_name mode
1648

1649
    | l, `Start ({name = "frameset"} as t) ->
4✔
1650
      misnested_tag l t context_name (fun () ->
1651
      match !open_elements with
4✔
1652
      | [_] -> mode ()
1✔
1653
      | _ ->
3✔
1654
        let rec second_is_body = function
1655
          | [{element_name = `HTML, "body"}; _] -> true
2✔
1656
          | [] -> false
1✔
1657
          | _::more -> second_is_body more
4✔
1658
        in
1659
        if not @@ second_is_body !open_elements then mode ()
1✔
1660
        else
1661
          if not !frameset_ok then mode ()
1✔
1662
          else
1663
            (* There is a deviation here due to the nature of the parser: if a
1664
               body element has been emitted, it can't be suppressed. *)
1665
            pop_until
1✔
1666
              (fun _ -> match !open_elements with [_] -> true | _ -> false)
1✔
1667
              l (fun () ->
1668
            push_and_emit l t in_frameset_mode))
1✔
1669

1670
    | l, `EOF as v ->
109✔
1671
      report_if_stack_has_other_than
1672
        ["dd"; "dt"; "li"; "p"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr";
1673
         "body"; "html"] (fun () ->
1674
      match !template_insertion_modes with
109✔
1675
      | [] -> emit_end l
109✔
1676
      | _ -> in_template_mode_rules mode v)
×
1677

1678
    | l, `End {name = "body"} ->
11✔
1679
      if not @@ Stack.in_scope open_elements "body" then
11✔
1680
        report l (`Unmatched_end_tag "body") !throw mode
1✔
1681
      else
1682
        report_if_stack_has_other_than
10✔
1683
          ["dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt";
1684
           "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"; "body";
1685
           "html"] (fun () ->
1686
        after_body_mode ())
10✔
1687

1688
    | l, `End {name = "html"} as v ->
12✔
1689
      if not @@ Stack.in_scope open_elements "body" then
12✔
1690
        report l (`Unmatched_end_tag "html") !throw mode
1✔
1691
      else
1692
        report_if_stack_has_other_than
11✔
1693
          ["dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt";
1694
           "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"; "body";
1695
           "html"] (fun () ->
1696
        push tokens v;
11✔
1697
        after_body_mode ())
11✔
1698

1699
    | l, `Start ({name =
97✔
1700
        "address" | "article" | "aside" | "blockquote" | "center" |
1✔
1701
        "details" | "dialog" | "dir" | "div" | "dl" | "fieldset" |
1✔
1702
        "figcaption" | "figure" | "footer" | "header" | "hgroup" | "main" |
1✔
1703
        "nav" | "ol" | "p" | "section" | "summary" | "ul"} as t) ->
1✔
1704
      close_current_p_element l (fun () ->
1705
      push_and_emit l t mode)
97✔
1706

1707
    | l, `Start ({name =
13✔
1708
        "h1" | "h2" | "h3" | "h4" | "h5" | "h6"} as t) ->
2✔
1709
      close_current_p_element l (fun () ->
1710
      (fun mode' ->
13✔
1711
        match Stack.current_element open_elements with
13✔
1712
        | Some {element_name = `HTML,
6✔
1713
            ("h1" | "h2" | "h3" | "h4" | "h5" | "h6" as name')} ->
1✔
1714
          misnested_tag l t name' (fun () ->
1715
          pop l mode')
6✔
1716
        | _ -> mode' ()) (fun () ->
7✔
1717
      push_and_emit l t mode))
13✔
1718

1719
    | l, `Start ({name = "pre" | "listing"} as t) ->
1✔
1720
      frameset_ok := false;
1721
      close_current_p_element l (fun () ->
1722
      push_and_emit l t (fun () ->
3✔
1723
      next_expected tokens !throw (function
3✔
1724
        | _, `Char 0x000A -> mode ()
2✔
1725
        | v ->
1✔
1726
          push tokens v;
1727
          mode ())))
1✔
1728

1729
    | l, `Start ({name = "form"} as t) ->
3✔
1730
      if !form_element_pointer <> None &&
1731
         not @@ Stack.has open_elements "template" then
1✔
1732
        misnested_tag l t "form" mode
1✔
1733
      else begin
2✔
1734
        close_current_p_element l (fun () ->
1735
        let in_template = Stack.has open_elements "template" in
2✔
1736
        push_and_emit ~set_form_element_pointer:(not in_template) l t mode)
2✔
1737
      end
1738

1739
    | l, `Start ({name = "li"} as t) ->
3✔
1740
      frameset_ok := false;
1741
      close_preceding_tag ["li"] l (fun () ->
1742
      close_current_p_element l (fun () ->
3✔
1743
      push_and_emit l t mode))
3✔
1744

1745
    | l, `Start ({name = "dd" | "dt"} as t) ->
1✔
1746
      frameset_ok := false;
1747
      close_preceding_tag ["dd"; "dt"] l (fun () ->
1748
      close_current_p_element l (fun () ->
3✔
1749
      push_and_emit l t mode))
3✔
1750

1751
    | l, `Start ({name = "plaintext"} as t) ->
1✔
1752
      close_current_p_element l (fun () ->
1753
      set_tokenizer_state `PLAINTEXT;
1✔
1754
      push_and_emit l t mode)
1✔
1755

1756
    | l, `Start ({name = "button"} as t) ->
2✔
1757
      (fun mode' ->
1758
        if Stack.in_scope open_elements "button" then
2✔
1759
          misnested_tag l t "button" (fun () ->
1✔
1760
          close_element_with_implied "button" l mode')
1✔
1761
        else mode' ())
1✔
1762
      (fun () ->
1763
        frameset_ok := false;
2✔
1764
        reconstruct_active_formatting_elements (fun () ->
1765
        push_and_emit l t mode))
2✔
1766

1767
    | l, `End {name =
35✔
1768
        "address" | "article" | "aside" | "blockquote" | "button" |
1✔
1769
        "center" | "details" | "dialog" | "dir" | "div" | "dl" | "fieldset" |
1✔
1770
        "figcaption" | "figure" | "footer" | "header" | "hgroup" | "listing" |
1✔
1771
        "main" | "nav" | "ol" | "pre" | "section" | "summary" | "ul"
1✔
1772
        as name} ->
1773
      if not @@ Stack.in_scope open_elements name then
35✔
1774
        report l (`Unmatched_end_tag name) !throw mode
1✔
1775
      else
1776
        close_element_with_implied name l mode
34✔
1777

1778
    | l, `End {name = "form"} ->
3✔
1779
      if not @@ Stack.has open_elements "template" then begin
3✔
1780
        let form_element = !form_element_pointer in
1781
        form_element_pointer := None;
1782
        match form_element with
1783
        | Some element when Stack.target_in_scope open_elements element ->
2✔
1784
          pop_implied l (fun () ->
2✔
1785
          match Stack.current_element open_elements with
2✔
1786
          | Some element' when element' == element ->
2✔
1787
            pop l mode
2✔
1788
          | _ ->
×
1789
            report element.location (`Unmatched_start_tag "form") !throw
1790
              (fun () ->
1791
            pop_until (fun element' -> element' == element) l (fun () ->
×
1792
            pop l mode)))
×
1793
        | _ ->
1✔
1794
          report l (`Unmatched_end_tag "form") !throw mode
1795
      end
1796
      else
1797
        if not @@ Stack.in_scope open_elements "form" then
×
1798
          report l (`Unmatched_end_tag "form") !throw mode
×
1799
        else
1800
          close_element_with_implied "form" l mode
×
1801

1802
    | l, `End {name = "p"} ->
7✔
1803
      (fun mode' ->
1804
        if not @@ Stack.in_button_scope open_elements "p" then
7✔
1805
          report l (`Unmatched_end_tag "p") !throw (fun () ->
1✔
1806
          push_implicit l "p" mode')
1✔
1807
        else mode' ())
6✔
1808
      (fun () -> close_element_with_implied "p" l mode)
7✔
1809

1810
    | l, `End {name = "li"} ->
2✔
1811
      if not @@ Stack.in_list_item_scope open_elements "li" then
2✔
1812
        report l (`Unmatched_end_tag "li") !throw mode
1✔
1813
      else
1814
        close_element_with_implied "li" l mode
1✔
1815

1816
    | l, `End {name = "dd" | "dt" as name} ->
×
1817
      if not @@ Stack.in_scope open_elements name then
2✔
1818
        report l (`Unmatched_end_tag name) !throw mode
1✔
1819
      else
1820
        close_element_with_implied name l mode
1✔
1821

1822
    | l, `End {name = "h1" | "h2" | "h3" | "h4" | "h5" | "h6" as name} ->
1✔
1823
      if not @@ Stack.one_in_scope open_elements
7✔
1824
          ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] then
1825
        report l (`Unmatched_end_tag name) !throw mode
×
1826
      else
1827
        pop_implied l (fun () ->
7✔
1828
          (fun next ->
7✔
1829
            match Stack.current_element open_elements with
7✔
1830
            | Some {element_name = `HTML, name'}
7✔
1831
                when list_mem_string
7✔
1832
                  name' ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] ->
1833
              next ()
7✔
1834
            | _ ->
×
1835
              report l (`Unmatched_end_tag name) !throw next)
1836
          @@ (fun () ->
1837
            pop_until_and_raise_errors
7✔
1838
              ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"] l mode))
1839

1840
    | l, `Start ({name = "a"} as t) ->
8✔
1841
      (fun k ->
1842
        match Active.has_before_marker active_formatting_elements "a" with
8✔
1843
        | None -> k ()
6✔
1844
        | Some existing ->
2✔
1845
          misnested_tag l t "a" (fun () ->
1846
          adoption_agency_algorithm l "a" (fun () ->
2✔
1847
          Stack.remove open_elements existing;
2✔
1848
          Active.remove active_formatting_elements existing;
2✔
1849
          k ())))
2✔
1850
      (fun () ->
1851
        Subtree.enable subtree_buffer;
8✔
1852
        reconstruct_active_formatting_elements (fun () ->
8✔
1853
        push_and_emit ~formatting:true l t mode))
8✔
1854

1855
    | l, `Start ({name =
17✔
1856
        "b" | "big" | "code" | "em" | "font" | "i" | "s" | "small" |
×
1857
        "strike" | "strong" | "tt" | "u"} as t) ->
×
1858
      Subtree.enable subtree_buffer;
1859
      reconstruct_active_formatting_elements (fun () ->
17✔
1860
      push_and_emit ~formatting:true l t mode)
17✔
1861

1862
    | l, `Start ({name = "nobr"} as t) ->
2✔
1863
      Subtree.enable subtree_buffer;
1864
      reconstruct_active_formatting_elements (fun () ->
2✔
1865
      (fun k ->
2✔
1866
        if not @@ Stack.in_scope open_elements "nobr" then k ()
1✔
1867
        else
1868
          misnested_tag l t "nobr" (fun () ->
1✔
1869
          adoption_agency_algorithm l "nobr" (fun () ->
1✔
1870
          reconstruct_active_formatting_elements k)))
1✔
1871
      (fun () -> push_and_emit ~formatting:true l t mode))
2✔
1872

1873
    | l, `End {name =
20✔
1874
        "a" | "b" | "big" | "code" | "em" | "font" | "i" | "nobr" | "s" |
×
1875
        "small" | "strike" | "strong" | "tt" | "u" as name} ->
×
1876
      adoption_agency_algorithm l name mode
1877

1878
    | l, `Start ({name = "applet" | "marquee" | "object"} as t) ->
×
1879
      frameset_ok := false;
1880
      reconstruct_active_formatting_elements (fun () ->
1881
      Active.add_marker active_formatting_elements;
×
1882
      push_and_emit l t mode)
×
1883

1884
    | l, `End {name = "applet" | "marquee" | "object" as name} ->
×
1885
      if not @@ Stack.in_scope open_elements name then
×
1886
        report l (`Unmatched_end_tag name) !throw mode
×
1887
      else begin
×
1888
        Active.clear_until_marker active_formatting_elements;
1889
        close_element_with_implied name l mode
×
1890
      end
1891

1892
    | l, `Start ({name = "table"} as t) ->
13✔
1893
      frameset_ok := false;
1894
      close_current_p_element l (fun () ->
1895
      push_and_emit l t in_table_mode)
13✔
1896

1897
    | l, `End {name = "br"} ->
1✔
1898
      report l (`Unmatched_end_tag "br") !throw (fun () ->
1899
      in_body_mode_rules context_name mode
1✔
1900
        (l, `Start
1901
          {Token_tag.name = "br"; attributes = []; self_closing = false}))
1902

1903
    | l, `Start ({name =
4✔
1904
        "area" | "br" | "embed" | "img" | "keygen" | "wbr"} as t) ->
×
1905
      frameset_ok := false;
1906
      reconstruct_active_formatting_elements (fun () ->
1907
      push_and_emit ~acknowledge:true l t (fun () ->
4✔
1908
      pop l mode))
4✔
1909

1910
    | l, `Start ({name = "input"} as t) ->
1✔
1911
      if Element.is_not_hidden t then frameset_ok := false;
1✔
1912
      reconstruct_active_formatting_elements (fun () ->
1✔
1913
      push_and_emit ~acknowledge:true l t (fun () ->
1✔
1914
      pop l mode))
1✔
1915

1916
    | l, `Start ({name = "param" | "source" | "track"} as t) ->
×
1917
      push_and_emit ~acknowledge:true l t (fun () ->
1918
      pop l mode)
×
1919

1920
    | l, `Start ({name = "hr"} as t) ->
1✔
1921
      frameset_ok := false;
1922
      close_current_p_element l (fun () ->
1923
      push_and_emit ~acknowledge:true l t (fun () ->
1✔
1924
      pop l mode))
1✔
1925

1926
    | l, `Start ({name = "image"} as t) ->
1✔
1927
      report l (`Bad_token ("image", "tag", "should be 'img'")) !throw
1928
        (fun () ->
1929
      push tokens (l, `Start {t with name = "img"});
1✔
1930
      mode ())
1✔
1931

1932
    | l, `Start ({name = "textarea"} as t) ->
3✔
1933
      frameset_ok := false;
1934
      push_and_emit l t (fun () ->
1935
      set_tokenizer_state `RCDATA;
3✔
1936
      next_expected tokens !throw (function
3✔
1937
        | _, `Char 0x000A -> text_mode mode
1✔
1938
        | v ->
2✔
1939
          push tokens v;
1940
          text_mode mode))
2✔
1941

1942
    | l, `Start {name = "xmp"} ->
×
1943
      frameset_ok := false;
1944
      close_current_p_element l (fun () ->
1945
      reconstruct_active_formatting_elements (fun () ->
×
1946
      parse_rawtext mode))
×
1947

1948
    | l, `Start ({name = "iframe"} as t) ->
1✔
1949
      frameset_ok := false;
1950
      push_and_emit l t (fun () ->
1951
      parse_rawtext mode)
1✔
1952

1953
    | l, `Start ({name = "noembed"} as t) ->
1✔
1954
      push_and_emit l t (fun () ->
1955
      parse_rawtext mode)
1✔
1956

1957
    | l, `Start ({name = "select"} as t) ->
2✔
1958
      frameset_ok := false;
1959
      select_in_body l t in_select_mode
1960

1961
    | l, `Start ({name = "optgroup" | "option"} as t) ->
1✔
1962
      (fun mode' ->
1963
        if Stack.current_element_is open_elements ["option"] then
6✔
1964
          pop l mode'
3✔
1965
        else mode' ())
3✔
1966
      (fun () ->
1967
        reconstruct_active_formatting_elements (fun () ->
6✔
1968
        push_and_emit l t mode))
6✔
1969

1970
    | l, `Start ({name = "rb" | "rtc"} as t) ->
×
1971
      (fun mode' ->
1972
        let finish () =
1✔
1973
          if Stack.current_element_is open_elements ["ruby"] then
1✔
1974
            mode' ()
×
1975
          else
1976
            misnested_tag l t context_name mode'
1✔
1977
        in
1978
        if Stack.in_scope open_elements "ruby" then
1979
          pop_implied l finish
×
1980
        else
1981
          finish ())
1✔
1982
      (fun () ->
1983
        push_and_emit l t mode)
1✔
1984

1985
    | l, `Start ({name = "rp" | "rt"} as t) ->
×
1986
      (fun mode' ->
1987
        let finish () =
1✔
1988
          if Stack.current_element_is open_elements ["ruby"; "rtc"] then
1✔
1989
            mode' ()
×
1990
          else
1991
            misnested_tag l t context_name mode'
1✔
1992
        in
1993
        if Stack.in_scope open_elements "ruby" then
1994
          pop_implied ~except:"rtc" l finish
×
1995
        else
1996
          finish ())
1✔
1997
      (fun () ->
1998
        push_and_emit l t mode)
1✔
1999

2000
    | l, `Start ({name = "math"} as t) ->
×
2001
      reconstruct_active_formatting_elements (fun () ->
2002
      push_and_emit ~acknowledge:true ~namespace:`MathML l t (fun () ->
×
2003
      if t.self_closing then pop l mode
×
2004
      else mode ()))
×
2005

2006
    | l, `Start ({name = "svg"} as t) ->
4✔
2007
      reconstruct_active_formatting_elements (fun () ->
2008
      push_and_emit ~acknowledge:true ~namespace:`SVG l t (fun () ->
4✔
2009
      if t.self_closing then pop l mode
×
2010
      else mode ()))
15✔
2011

2012
    | l, `Start ({name =
9✔
2013
        "caption" | "col" | "colgroup" | "frame" | "head" | "tbody" | "td" |
×
2014
        "tfoot" | "th" | "thead" | "tr"} as t) ->
1✔
2015
      misnested_tag l t context_name mode
2016

2017
    | l, `Start t ->
5✔
2018
      reconstruct_active_formatting_elements (fun () ->
2019
      push_and_emit l t mode)
5✔
2020

2021
    | l, `End {name} ->
7✔
2022
      any_other_end_tag_in_body l name mode
2023

2024
  (* Part of 8.2.5.4.7. *)
2025
  and any_other_end_tag_in_body l name mode =
2026
    let rec close = function
11✔
2027
      | [] -> mode ()
×
2028
      | {element_name = (ns, name') as name''}::rest ->
14✔
2029
        if ns = `HTML && name' = name then
14✔
2030
          pop_implied ~except:name l (fun () ->
6✔
2031
          pop l mode)
6✔
2032
        else
2033
          if Element.is_special name'' then
8✔
2034
            report l (`Unmatched_end_tag name) !throw mode
5✔
2035
          else close rest
3✔
2036
    in
2037
    close !open_elements
2038

2039
  (* Part of 8.2.5.4.7. *)
2040
  and adoption_agency_algorithm l name mode =
2041
    Subtree.enable subtree_buffer;
23✔
2042
    emit_text (fun () ->
23✔
2043
    let handled, errors =
23✔
2044
      Subtree.adoption_agency_algorithm
2045
        subtree_buffer active_formatting_elements l name
2046
    in
2047
    let rec report_all errors k =
23✔
2048
      match errors with
27✔
2049
      | [] -> k ()
23✔
2050
      | (l, error)::more ->
4✔
2051
        report l error !throw (fun () -> report_all more k)
4✔
2052
    in
2053
    report_all errors (fun () ->
2054
    if not handled then any_other_end_tag_in_body l name mode
4✔
2055
    else mode ()))
19✔
2056

2057
  (* Part of 8.2.5.4.7. *)
2058
  and select_in_body l t next_mode =
2059
    frameset_ok := false;
2✔
2060
    reconstruct_active_formatting_elements (fun () ->
2061
    push_and_emit l t next_mode)
2✔
2062

2063
  (* 8.2.5.4.8. *)
2064
  and text_mode original_mode =
2065
    dispatch tokens begin function
132✔
2066
      | l, `Char c ->
115✔
2067
        add_character l c;
2068
        text_mode original_mode
115✔
2069

2070
      | l, `EOF as v ->
×
2071
        report l (`Unexpected_eoi "content") !throw (fun () ->
2072
        push tokens v;
×
2073
        pop l original_mode)
×
2074

2075
      | l, `End _ ->
17✔
2076
        pop l original_mode
2077

2078
      | _ ->
×
2079
        text_mode original_mode
2080
    end
2081

2082
  (* 8.2.5.2. *)
2083
  and parse_rcdata original_mode =
2084
    set_tokenizer_state `RCDATA;
4✔
2085
    text_mode original_mode
4✔
2086

2087
  (* 8.2.5.2. *)
2088
  and parse_rawtext original_mode =
2089
    set_tokenizer_state `RAWTEXT;
8✔
2090
    text_mode original_mode
8✔
2091

2092
  and anything_else_in_table mode (l, _ as v) =
2093
    report l (`Bad_content "table") !throw (fun () ->
3✔
2094
    in_body_mode_rules "table" mode v)
3✔
2095

2096
  (* 8.2.5.4.9. *)
2097
  and in_table_mode () =
2098
    dispatch tokens (fun v -> in_table_mode_rules in_table_mode v)
26✔
2099

2100
  and in_table_mode_rules mode = function
2101
    | _, `Char _ as v
1✔
2102
        when Stack.current_element_is open_elements
1✔
2103
               ["table"; "tbody"; "tfoot"; "thead"; "tr"] ->
2104
      push tokens v;
1✔
2105
      in_table_text_mode true [] mode
1✔
2106

2107
    | l, `Comment s ->
×
2108
      emit l (`Comment s) mode
2109

2110
    | l, `Doctype _ ->
×
2111
      report l (`Bad_document "doctype should be first") !throw mode
2112

2113
    | l, `Start ({name = "caption"} as t) ->
4✔
2114
      pop_to_table_context l (fun () ->
2115
      Active.add_marker active_formatting_elements;
4✔
2116
      push_and_emit l t in_caption_mode)
4✔
2117

2118
    | l, `Start ({name = "colgroup"} as t) ->
1✔
2119
      pop_to_table_context l (fun () ->
2120
      push_and_emit l t in_column_group_mode)
1✔
2121

2122
    | l, `Start {name = "col"} as v ->
1✔
2123
      pop_to_table_context l (fun () ->
2124
      push tokens v;
1✔
2125
      push_implicit l "colgroup" in_column_group_mode)
1✔
2126

2127
    | l, `Start ({name = "tbody" | "tfoot" | "thead"} as t) ->
×
2128
      pop_to_table_context l (fun () ->
2129
      push_and_emit l t in_table_body_mode)
3✔
2130

2131
    | l, `Start {name = "td" | "th" | "tr"} as v ->
×
2132
      pop_to_table_context l (fun () ->
2133
      push tokens v;
2✔
2134
      push_implicit l "tbody" in_table_body_mode)
2✔
2135

2136
    | l, `Start ({name = "table"} as t) as v ->
1✔
2137
      misnested_tag l t "table" (fun () ->
2138
      if not @@ Stack.has open_elements "table" then mode ()
×
2139
      else begin
1✔
2140
        push tokens v;
2141
        close_element l "table" (fun () -> reset_mode () ())
1✔
2142
      end)
2143

2144
    | l, `End {name = "table"} ->
12✔
2145
      if not @@ Stack.in_table_scope open_elements "table" then
12✔
2146
        report l (`Unmatched_end_tag "table") !throw mode
×
2147
      else
2148
        close_element l "table" (fun () -> reset_mode () ())
12✔
2149

2150
    | l, `End {name =
×
2151
      "body" | "caption" | "col" | "colgroup" | "html" | "tbody" | "td" |
×
2152
      "tfoot" | "th" | "thead" | "tr" as name} ->
×
2153
      report l (`Unmatched_end_tag name) !throw mode
2154

2155
    | _, `Start {name = "style" | "script" | "template"}
×
2156
    | _, `End {name = "template"} as v ->
×
2157
      in_head_mode_rules mode v
2158

2159
    | l, `Start ({name = "input"} as t) when Element.is_not_hidden t ->
×
2160
      misnested_tag l t "table" (fun () ->
×
2161
      push_and_emit ~acknowledge:true l t (fun () ->
×
2162
      pop l mode))
×
2163

2164
    | l, `Start ({name = "form"} as t) ->
×
2165
      misnested_tag l t "table" (fun () ->
2166
      push_and_emit l t (fun () ->
×
2167
      pop l mode))
×
2168

2169
    | _, `EOF as v ->
1✔
2170
      in_body_mode_rules "table" mode v
2171

2172
    | v ->
×
2173
      anything_else_in_table mode v
2174

2175
  (* 8.2.5.4.10. *)
2176
  and in_table_text_mode only_space cs mode =
2177
    dispatch tokens begin function
5✔
2178
      | l, `Char 0 ->
1✔
2179
        report l (`Bad_token ("U+0000", "table", "null")) !throw (fun () ->
2180
        in_table_text_mode only_space cs mode)
1✔
2181

2182
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) as v ->
×
2183
        in_table_text_mode only_space (v::cs) mode
2184

2185
      | _, `Char _ as v ->
3✔
2186
        in_table_text_mode false (v::cs) mode
2187

2188
      | v ->
1✔
2189
        push tokens v;
2190
        if not only_space then
1✔
2191
          let rec reprocess = function
1✔
2192
            | [] -> mode ()
1✔
2193
            | v::more -> anything_else_in_table (fun () -> reprocess more) v
3✔
2194
          in
2195
          reprocess (List.rev cs)
1✔
2196
        else begin
×
2197
          List.rev cs |> List.iter (function
×
2198
            | l, `Char c -> add_character l c
×
2199
            | _ -> ());
×
2200
          mode ()
×
2201
        end
2202
    end
2203

2204
  (* 8.2.5.4.11. *)
2205
  and in_caption_mode () =
2206
    dispatch tokens begin function
11✔
2207
      | l, `End {name = "caption"} ->
2✔
2208
        if not @@ Stack.in_table_scope open_elements "caption" then
2✔
2209
          report l (`Unmatched_end_tag "caption") !throw in_caption_mode
×
2210
        else begin
2✔
2211
          Active.clear_until_marker active_formatting_elements;
2212
          close_element_with_implied "caption" l in_table_mode
2✔
2213
        end
2214

2215
      | l, `Start ({name =
1✔
2216
          "caption" | "col" | "colgroup" | "tbody" | "td" | "tfoot" | "th" |
×
2217
          "thead" | "tr"} as t) as v ->
×
2218
        misnested_tag l t "caption" (fun () ->
2219
        if not @@ Stack.in_table_scope open_elements "caption" then
1✔
2220
          in_caption_mode ()
×
2221
        else begin
1✔
2222
          Active.clear_until_marker active_formatting_elements;
2223
          push tokens v;
1✔
2224
          close_element l "caption" in_table_mode
1✔
2225
        end)
2226

2227
      | l, `End {name = "table"} as v ->
1✔
2228
        report l (`Unmatched_end_tag "table") !throw (fun () ->
2229
        if not @@ Stack.in_table_scope open_elements "caption" then
1✔
2230
          in_caption_mode ()
×
2231
        else begin
1✔
2232
          Active.clear_until_marker active_formatting_elements;
2233
          push tokens v;
1✔
2234
          close_element l "caption" in_table_mode
1✔
2235
        end)
2236

2237
      | l, `End {name =
×
2238
          ("body" | "col" | "colgroup" | "html" | "tbody" | "td" | "tfoot" |
×
2239
           "th" | "thead" | "tr") as name} ->
×
2240
        report l (`Unmatched_end_tag name) !throw in_caption_mode
2241

2242
      | l, `Start ({name = "select"} as t) ->
×
2243
        select_in_body l t in_select_in_table_mode
2244

2245
      | v ->
7✔
2246
        in_body_mode_rules "caption" in_caption_mode v
2247
    end
2248

2249
  (* 8.2.5.4.12. *)
2250
  and in_column_group_mode () =
2251
    dispatch tokens begin function
4✔
2252
      | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
×
2253
        add_character l c;
2254
        in_column_group_mode ()
×
2255

2256
      | l, `Comment s ->
×
2257
        emit l (`Comment s) in_column_group_mode
2258

2259
      | l, `Doctype _ ->
×
2260
        report l (`Bad_document "doctype should be first") !throw
2261
          in_column_group_mode
2262

2263
      | _, `Start {name = "html"} as v ->
×
2264
        in_body_mode_rules "colgroup" in_column_group_mode v
2265

2266
      | l, `Start ({name = "col"} as t) ->
2✔
2267
        push_and_emit ~acknowledge:true l t (fun () ->
2268
        pop l in_column_group_mode)
2✔
2269

2270
      | l, `End {name = "colgroup"} ->
1✔
2271
        if not @@ Stack.current_element_is open_elements ["colgroup"] then
1✔
2272
          report l (`Unmatched_end_tag "colgroup") !throw in_column_group_mode
×
2273
        else
2274
          pop l in_table_mode
1✔
2275

2276
      | l, `End {name = "col"} ->
×
2277
        report l (`Unmatched_end_tag "col") !throw in_column_group_mode
2278

2279
      | _, `Start {name = "template"}
×
2280
      | _, `End {name = "template"} as v ->
×
2281
        in_head_mode_rules in_column_group_mode v
2282

2283
      | _, `EOF as v ->
×
2284
        in_body_mode_rules "colgroup" in_column_group_mode v
2285

2286
      | l, _ as v ->
1✔
2287
        if not @@ Stack.current_element_is open_elements ["colgroup"] then
1✔
2288
          report l (`Bad_content "colgroup") !throw in_table_mode
×
2289
        else begin
1✔
2290
          push tokens v;
2291
          pop l in_table_mode
1✔
2292
        end
2293
    end
2294

2295
  (* 8.2.5.4.13. *)
2296
  and in_table_body_mode () =
2297
    dispatch tokens begin function
7✔
2298
      | l, `Start ({name = "tr"} as t) ->
1✔
2299
        pop_to_table_body_context l (fun () ->
2300
        push_and_emit l t in_row_mode)
1✔
2301

2302
      | l, `Start ({name = "th" | "td"} as t) as v ->
×
2303
        misnested_tag l t "table" (fun () ->
2304
        pop_to_table_body_context l (fun () ->
1✔
2305
        push tokens v;
1✔
2306
        push_implicit l "tr" in_row_mode))
1✔
2307

2308
      | l, `End {name = "tbody" | "tfoot" | "thead" as name} ->
×
2309
        if not @@ Stack.in_table_scope open_elements name then
2✔
2310
          report l (`Unmatched_end_tag name) !throw in_table_body_mode
×
2311
        else
2312
          pop_to_table_body_context l (fun () ->
2✔
2313
          pop l in_table_mode)
2✔
2314

2315
      | l, `Start ({name =
1✔
2316
          "caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead"} as t)
×
2317
          as v ->
2318
        if not @@ Stack.one_in_table_scope open_elements
1✔
2319
            ["tbody"; "thead"; "tfoot"] then
2320
          misnested_tag l t "table" in_table_body_mode
×
2321
        else begin
1✔
2322
          push tokens v;
2323
          pop_to_table_body_context l (fun () ->
1✔
2324
          pop l in_table_mode)
1✔
2325
        end
2326

2327
      | l, `End {name = "table" as name} as v ->
2✔
2328
        if not @@ Stack.one_in_table_scope open_elements
2✔
2329
            ["tbody"; "thead"; "tfoot"] then
2330
          report l (`Unmatched_end_tag name) !throw in_table_body_mode
×
2331
        else begin
2✔
2332
          push tokens v;
2333
          pop_to_table_body_context l (fun () ->
2✔
2334
          pop l in_table_mode)
2✔
2335
        end
2336

2337
      | l, `End {name =
×
2338
          "body" | "caption" | "col" | "colgroup" | "html" | "td" | "th" |
×
2339
          "tr" as name} ->
×
2340
        report l (`Unmatched_end_tag name) !throw in_table_body_mode
2341

2342
      | v ->
×
2343
        in_table_mode_rules in_table_body_mode v
2344
    end
2345

2346
  (* 8.2.5.4.14. *)
2347
  and in_row_mode () =
2348
    dispatch tokens begin function
5✔
2349
      | l, `Start ({name = "th" | "td"} as t) ->
×
2350
        Active.add_marker active_formatting_elements;
2351
        pop_to_table_row_context l (fun () ->
3✔
2352
        push_and_emit l t in_cell_mode)
3✔
2353

2354
      | l, `End {name = "tr"} ->
1✔
2355
        if not @@ Stack.in_table_scope open_elements "tr" then
1✔
2356
          report l (`Unmatched_end_tag "tr") !throw in_row_mode
×
2357
        else
2358
          pop_to_table_row_context l (fun () ->
1✔
2359
          pop l in_table_body_mode)
1✔
2360

2361
      | l, `Start {name =
×
2362
          ("caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead" |
×
2363
           "tr")}
×
2364
      | l, `End {name = "table"} as v ->
1✔
2365
        if not @@ Stack.in_table_scope open_elements "tr" then
1✔
2366
          match snd v with
×
2367
          | `Start t ->
×
2368
            misnested_tag l t "tr" in_row_mode
2369
          | `End {name} ->
×
2370
            report l (`Unmatched_end_tag name) !throw in_row_mode
2371
        else
2372
          pop_to_table_row_context l (fun () ->
1✔
2373
          push tokens v;
1✔
2374
          pop l in_table_body_mode)
1✔
2375

2376
      | l, `End {name = "tbody" | "tfoot" | "thead" as name} as v ->
×
2377
        if not @@ Stack.in_table_scope open_elements name then
×
2378
          report l (`Unmatched_end_tag name) !throw in_row_mode
×
2379
        else
2380
          if not @@ Stack.in_table_scope open_elements "tr" then in_row_mode ()
×
2381
          else
2382
            pop_to_table_row_context l (fun () ->
×
2383
            push tokens v;
×
2384
            pop l in_table_body_mode)
×
2385

2386
      | l, `End {name =
×
2387
          "body" | "caption" | "col" | "colgroup" | "html" | "td" | "th"
×
2388
          as name} ->
2389
        report l (`Unmatched_end_tag name) !throw in_row_mode
2390

2391
      | v ->
×
2392
        in_table_mode_rules in_row_mode v
2393
    end
2394

2395
  (* 8.2.5.4.15. *)
2396
  and in_cell_mode () =
2397
    dispatch tokens begin function
9✔
2398
      | l, `End {name = "td" | "th" as name} ->
×
2399
        if not @@ Stack.in_table_scope open_elements name then
3✔
2400
          report l (`Unmatched_end_tag name) !throw in_cell_mode
×
2401
        else
2402
          close_element_with_implied name l (fun () ->
3✔
2403
          Active.clear_until_marker active_formatting_elements;
3✔
2404
          in_row_mode ())
3✔
2405

2406
      | l, `Start ({name =
×
2407
          "caption" | "col" | "colgroup" | "tbody" | "td" | "tfoot" | "th" |
×
2408
          "thead" | "tr"} as t) as v ->
×
2409
        if not @@ Stack.one_in_table_scope open_elements ["td"; "th"] then
×
2410
          misnested_tag l t "td/th" in_cell_mode
×
2411
        else
2412
          close_cell l (fun () ->
×
2413
          Active.clear_until_marker active_formatting_elements;
×
2414
          push tokens v;
×
2415
          in_row_mode ())
×
2416

2417
      | l, `End {name =
×
2418
          "body" | "caption" | "col" | "colgroup" | "html" as name} ->
×
2419
        report l (`Unmatched_end_tag name) !throw in_cell_mode
2420

2421
      | l, `End {name =
×
2422
          "table" | "tbody" | "tfoot" | "thead" | "tr" as name} as v ->
×
2423
        if not @@ Stack.in_table_scope open_elements name then
×
2424
          report l (`Unmatched_end_tag name) !throw in_cell_mode
×
2425
        else
2426
          close_cell l (fun () ->
×
2427
          Active.clear_until_marker active_formatting_elements;
×
2428
          push tokens v;
×
2429
          in_row_mode ())
×
2430

2431
      | l, `Start ({name = "select"} as t) ->
×
2432
        select_in_body l t in_select_in_table_mode
2433

2434
      | v ->
6✔
2435
        in_body_mode_rules "td" in_cell_mode v
2436
    end
2437

2438
  (* 8.2.5.4.16. *)
2439
  and in_select_mode () =
2440
    dispatch tokens (fun v -> in_select_mode_rules in_select_mode v)
23✔
2441

2442
  and in_select_mode_rules mode = function
2443
    | l, `Char 0 ->
1✔
2444
      report l (`Bad_token ("U+0000", "select", "null")) !throw mode
2445

2446
    | l, `Char c ->
9✔
2447
      add_character l c;
2448
      mode ()
9✔
2449

2450
    | l, `Comment s ->
×
2451
      emit l (`Comment s) mode
2452

2453
    | l, `Doctype _ ->
×
2454
      report l (`Bad_document "doctype should be first") !throw mode
2455

2456
    | _, `Start {name = "html"} as v ->
×
2457
      in_body_mode_rules "select" mode v
2458

2459
    | l, `Start ({name = "option"} as t) ->
5✔
2460
      (fun mode' ->
2461
        if Stack.current_element_is open_elements ["option"] then pop l mode'
1✔
2462
        else mode' ())
4✔
2463
      (fun () -> push_and_emit l t mode)
5✔
2464

2465
    | l, `Start ({name = "optgroup"} as t) ->
2✔
2466
      (fun mode' ->
2467
        if Stack.current_element_is open_elements ["option"] then pop l mode'
1✔
2468
        else mode' ())
1✔
2469
      @@ (fun mode' () ->
2470
        if Stack.current_element_is open_elements ["optgroup"] then pop l mode'
1✔
2471
        else mode' ())
1✔
2472
      @@ (fun () -> push_and_emit l t mode)
2✔
2473

2474
    | l, `End {name = "optgroup"} ->
1✔
2475
      (fun mode' ->
2476
        match !open_elements with
1✔
2477
        | {element_name = `HTML, "option"}::
1✔
2478
            {element_name = `HTML, "optgroup"}::_ ->
2479
          pop l mode'
2480
        | _ -> mode' ())
×
2481
      (fun () ->
2482
        if Stack.current_element_is open_elements ["optgroup"] then
1✔
2483
          pop l mode
1✔
2484
        else
2485
          report l (`Unmatched_end_tag "optgroup") !throw mode)
×
2486

2487
    | l, `End {name = "option"} ->
1✔
2488
      if Stack.current_element_is open_elements ["option"] then
2489
        pop l mode
1✔
2490
      else
2491
        report l (`Unmatched_end_tag "option") !throw mode
×
2492

2493
    | l, `End {name = "select"} ->
2✔
2494
      if not @@ Stack.in_select_scope open_elements "select" then
2✔
2495
        report l (`Unmatched_end_tag "select") !throw mode
×
2496
      else
2497
        close_element l "select" (fun () -> reset_mode () ())
2✔
2498

2499
    | l, `Start ({name = "select"} as t) ->
×
2500
      misnested_tag l t "select" (fun () ->
2501
      close_element l "select" (fun () -> reset_mode () ()))
×
2502

2503
    | l, `Start ({name = "input" | "keygen" | "textarea"} as t) as v ->
×
2504
      misnested_tag l t "select" (fun () ->
2505
      if not @@ Stack.in_select_scope open_elements "select" then
×
2506
        mode ()
×
2507
      else begin
×
2508
        push tokens v;
2509
        close_element l "select" (fun () -> reset_mode () ())
×
2510
      end)
2511

2512
    | _, (`Start {name = "script" | "template"} |
×
2513
          `End {name = "template"}) as v ->
×
2514
      in_head_mode_rules mode v
2515

2516
    | _, `EOF as v ->
2✔
2517
      in_body_mode_rules "select" mode v
2518

2519
    | l, _ ->
×
2520
      report l (`Bad_content "select") !throw mode
2521

2522
  (* 8.2.5.4.17. *)
2523
  and in_select_in_table_mode () =
2524
    dispatch tokens begin function
×
2525
      | l, `Start ({name =
×
2526
          "caption" | "table" | "tbody" | "tfoot" | "thead" | "tr" | "td" |
×
2527
          "th"} as t) as v ->
×
2528
        misnested_tag l t "table" (fun () ->
2529
        push tokens v;
×
2530
        close_element l "select" (fun () -> reset_mode () ()))
×
2531

2532
      | l, `End {name =
×
2533
          "caption" | "table" | "tbody" | "tfoot" | "thead" | "tr" | "td" |
×
2534
          "th" as name} as v ->
×
2535
        report l (`Unmatched_end_tag "name") !throw (fun () ->
2536
        if not @@ Stack.in_table_scope open_elements name then
×
2537
          in_select_in_table_mode ()
×
2538
        else begin
×
2539
          push tokens v;
2540
          close_element l "select" (fun () -> reset_mode () ())
×
2541
        end)
2542

2543
      | v ->
×
2544
        in_select_mode_rules in_select_in_table_mode v
2545
    end
2546

2547
  (* 8.2.5.4.18. *)
2548
  and in_template_mode () =
2549
    dispatch tokens (fun v -> in_table_mode_rules in_template_mode v)
×
2550

2551
  (* 8.2.5.4.18. *)
2552
  and in_template_mode_rules mode = function
2553
    | _, (`Char _ | `Comment _ | `Doctype _) as v ->
×
2554
      in_body_mode_rules "template" mode v
2555

2556
    | _, `Start {name =
×
2557
        "base" | "basefont" | "bgsound" | "link" | "meta" | "noframes" |
×
2558
        "script" | "style" | "template" | "title"}
×
2559
    | _, `End {name = "template"} as v ->
×
2560
      in_head_mode_rules mode v
2561

2562
    | _, `Start {name =
×
2563
        "caption" | "colgroup" | "tbody" | "tfoot" | "thead"} as v ->
×
2564
      Template.pop template_insertion_modes;
2565
      Template.push template_insertion_modes in_table_mode;
×
2566
      push tokens v;
×
2567
      in_table_mode ()
×
2568

2569
    | _, `Start {name = "col"} as v ->
×
2570
      Template.pop template_insertion_modes;
2571
      Template.push template_insertion_modes in_column_group_mode;
×
2572
      push tokens v;
×
2573
      in_column_group_mode ()
×
2574

2575
    | _, `Start {name = "tr"} as v ->
×
2576
      Template.pop template_insertion_modes;
2577
      Template.push template_insertion_modes in_table_body_mode;
×
2578
      push tokens v;
×
2579
      in_table_body_mode ()
×
2580

2581
    | _, `Start {name = "td" | "th"} as v ->
×
2582
      Template.pop template_insertion_modes;
2583
      Template.push template_insertion_modes in_row_mode;
×
2584
      push tokens v;
×
2585
      in_row_mode ()
×
2586

2587
    | _, `Start _ as v ->
×
2588
      Template.pop template_insertion_modes;
2589
      Template.push template_insertion_modes in_body_mode;
×
2590
      push tokens v;
×
2591
      in_body_mode ()
×
2592

2593
    | l, `End {name} ->
×
2594
      report l (`Unmatched_end_tag name) !throw mode
2595

2596
    | l, `EOF as v ->
×
2597
      if not @@ Stack.has open_elements "template" then emit_end l
×
2598
      else begin
×
2599
        report l (`Unmatched_end_tag "template") !throw (fun () ->
2600
        Active.clear_until_marker active_formatting_elements;
×
2601
        Template.pop template_insertion_modes;
×
2602
        push tokens v;
×
2603
        close_element l "template" (fun () -> reset_mode () ()))
×
2604
      end
2605

2606
  (* 8.2.5.4.19. *)
2607
  and after_body_mode () =
2608
    dispatch tokens begin function
21✔
2609
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020) as v ->
×
2610
        in_body_mode_rules "html" after_body_mode v
2611

2612
      | l, `Comment s ->
×
2613
        emit l (`Comment s) after_body_mode
2614

2615
      | l, `Doctype _ ->
×
2616
        report l (`Bad_document "doctype should be first") !throw
2617
          after_body_mode
2618

2619
      | _, `Start {name = "html"} as v ->
×
2620
        in_body_mode_rules "html" after_body_mode v
2621

2622
      | _, `End {name = "html"} ->
13✔
2623
        after_after_body_mode ()
2624

2625
      | l, `EOF ->
7✔
2626
        emit_end l
2627

2628
      | l, _ as v ->
1✔
2629
        report l (`Bad_document "content after body") !throw (fun () ->
2630
        push tokens v;
1✔
2631
        in_body_mode ())
1✔
2632
    end
2633

2634
  (* 8.2.5.4.20. *)
2635
  and in_frameset_mode () =
2636
    dispatch tokens begin function
21✔
2637
      | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
×
2638
        add_character l c;
2639
        in_frameset_mode ()
3✔
2640

2641
      | l, `Comment s ->
1✔
2642
        emit l (`Comment s) in_frameset_mode
2643

2644
      | l, `Doctype _ ->
1✔
2645
        report l (`Bad_document "doctype should be first") !throw
2646
          in_frameset_mode
2647

2648
      | _, `Start {name = "html"} as v ->
1✔
2649
        in_body_mode_rules "frameset" in_frameset_mode v
2650

2651
      | l, `Start ({name = "frameset"} as t) ->
1✔
2652
        push_and_emit l t in_frameset_mode
2653

2654
      | l, `End {name = "frameset"} ->
7✔
2655
        (fun mode' ->
2656
          if Stack.current_element_is open_elements ["html"] then
7✔
2657
            report l (`Unmatched_end_tag "frameset") !throw mode'
×
2658
          else
2659
            pop l mode')
7✔
2660
        (fun () ->
2661
          if Stack.current_element_is open_elements ["frameset"] then
7✔
2662
            in_frameset_mode ()
1✔
2663
          else after_frameset_mode ())
6✔
2664

2665
      | l, `Start ({name = "frame"} as t) ->
3✔
2666
        push_and_emit ~acknowledge:true l t (fun () ->
2667
        pop l in_frameset_mode)
3✔
2668

2669
      | _, `Start {name = "noframes"} as v ->
1✔
2670
        in_head_mode_rules in_frameset_mode v
2671

2672
      | l, `EOF ->
2✔
2673
        (fun mode' ->
2674
          if not @@ Stack.current_element_is open_elements ["html"] then
2✔
2675
            report l (`Unexpected_eoi "frameset") !throw mode'
1✔
2676
          else mode' ())
1✔
2677
        (fun () -> emit_end l)
2✔
2678

2679
      | l, _ ->
1✔
2680
        report l (`Bad_content "frameset") !throw in_frameset_mode
2681
    end
2682

2683
  (* 8.2.5.4.21. *)
2684
  and after_frameset_mode () =
2685
    dispatch tokens begin function
14✔
2686
      | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
×
2687
        add_character l c;
2688
        after_frameset_mode ()
3✔
2689

2690
      | l, `Comment s ->
1✔
2691
        emit l (`Comment s) after_frameset_mode
2692

2693
      | l, `Doctype _ ->
1✔
2694
        report l (`Bad_document "doctype should be first") !throw
2695
          after_frameset_mode
2696

2697
      | _, `Start {name = "html"} as v ->
1✔
2698
        in_body_mode_rules "html" after_frameset_mode v
2699

2700
      | l, `End {name = "html"} ->
1✔
2701
        close_element l "html" after_after_frameset_mode
2702

2703
      | _, `Start {name = "noframes"} as v ->
1✔
2704
        in_head_mode_rules after_frameset_mode v
2705

2706
      | l, `EOF ->
5✔
2707
        emit_end l
2708

2709
      | l, _ ->
1✔
2710
        report l (`Bad_content "html") !throw after_frameset_mode
2711
    end
2712

2713
  (* 8.2.5.4.22. *)
2714
  and after_after_body_mode () =
2715
    dispatch tokens begin function
14✔
2716
      | l, `Comment s ->
×
2717
        emit l (`Comment s) after_after_body_mode
2718

2719
      | _, `Doctype _
×
2720
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020)
×
2721
      | _, `Start {name = "html"} as v ->
×
2722
        in_body_mode_rules "html" after_after_body_mode v
2723

2724
      | l, `EOF ->
12✔
2725
        emit_end l
2726

2727
      | l, _ as v ->
1✔
2728
        push tokens v;
2729
        report l (`Bad_content "html") !throw in_body_mode
1✔
2730
    end
2731

2732
  (* 8.2.5.4.23. *)
2733
  and after_after_frameset_mode () =
2734
    dispatch tokens begin function
1✔
2735
      | l, `Comment s ->
×
2736
        emit l (`Comment s) after_after_frameset_mode
2737

2738
      | _, `Doctype _
×
2739
      | _, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020)
×
2740
      | _, `Start {name = "html"} as v ->
×
2741
        in_body_mode_rules "html" after_after_frameset_mode v
2742

2743
      | l, `EOF ->
1✔
2744
        emit_end l
2745

2746
      | _, `Start {name = "noframes"} as v ->
×
2747
        in_head_mode_rules after_after_frameset_mode v
2748

2749
      | l, _ ->
×
2750
        report l (`Bad_content "html") !throw after_after_frameset_mode
2751
    end
2752

2753
  (* 8.2.5.5. *)
2754
  and foreign_start_tag mode l tag =
2755
    let namespace =
7✔
2756
      match Stack.adjusted_current_element context open_elements with
2757
      | None -> `HTML
×
2758
      | Some {element_name = ns, _} -> ns
7✔
2759
    in
2760

2761
    push_and_emit ~acknowledge:true ~namespace l tag (fun () ->
2762
    if tag.self_closing then pop l mode
3✔
2763
    else mode ())
9✔
2764

2765
  and is_html_font_tag tag =
2766
    tag.Token_tag.attributes |> List.exists (function
×
2767
      | ("color" | "face" | "size"), _ -> true
×
2768
      | _ -> false)
×
2769

2770
  and foreign_content mode force_html v =
2771
    match v with
22✔
2772
    | l, `Char 0 ->
1✔
2773
      report l (`Bad_token ("U+0000", "foreign content", "null")) !throw
2774
        (fun () ->
2775
      add_character l u_rep;
1✔
2776
      mode ())
1✔
2777

2778
    | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
×
2779
      add_character l c;
2780
      mode ()
×
2781

2782
    | l, `Char c ->
6✔
2783
      frameset_ok := false;
2784
      add_character l c;
2785
      mode ()
6✔
2786

2787
    | l, `Comment s ->
×
2788
      emit l (`Comment s) mode
2789

2790
    | l, `Doctype _ ->
×
2791
      report l (`Bad_document "doctype should be first") !throw mode
2792

2793
    | l, `Start ({name =
×
2794
        "b" | "big" | "blockquote" | "body" | "br" | "center" | "code" |
×
2795
        "dd" | "div" | "dl" | "dt" | "em" | "embed" | "font" | "h1" | "h2" |
×
2796
        "h3" | "h4" | "h5" | "h6" | "head" | "hr" | "i" | "img" | "li" |
×
2797
        "listing" | "main" | "meta" | "nobr" | "ol" | "p" | "pre" | "ruby" |
×
2798
        "s" | "small" | "span" | "strong" | "strike" | "sub" | "sup" |
×
2799
        "table" | "tt" | "u" | "ul" | "var" as name} as t) as v ->
×
2800
      if name = "font" && not @@ is_html_font_tag t then
×
2801
        foreign_start_tag mode l t
×
2802
      else
2803
        misnested_tag l t "xml tag" (fun () ->
×
2804
        push tokens v;
×
2805
        pop l (fun () ->
×
2806
        pop_until (function
×
2807
          | {element_name = `HTML, _} -> true
×
2808
          | {is_html_integration_point = true} -> true
×
2809
          | {element_name} ->
×
2810
            Foreign.is_mathml_text_integration_point element_name)
2811
          l mode))
2812

2813
    | l, `Start t ->
7✔
2814
      foreign_start_tag mode l t
2815

2816
    | l, `End {name = "script"}
×
2817
        when
2818
          match Stack.current_element open_elements with
2819
          | Some {element_name = `SVG, "script"} -> true
×
2820
          | _ -> false ->
×
2821
      pop l mode
×
2822

2823
    | l, `End {name} ->
8✔
2824
      (fun mode' ->
2825
        match Stack.current_element open_elements with
8✔
2826
        | Some {element_name = _, name'} when String.lowercase_ascii name' = name ->
8✔
2827
          mode' ()
8✔
2828
        | _ ->
×
2829
          report l (`Unmatched_end_tag name) !throw (fun () ->
2830
          mode' ()))
×
2831
      (fun () ->
2832
        let rec scan = function
8✔
2833
          | [] -> mode ()
×
2834
          | {element_name = ns, name'}::_
8✔
2835
              when String.lowercase_ascii name' = name ->
8✔
2836
            close_element ~ns l name mode
8✔
2837
          | {element_name = `HTML, _}::_ -> force_html ()
×
2838
          | _::rest -> scan rest
×
2839
        in
2840
        scan !open_elements)
2841

2842
    | _, `EOF -> force_html ()
×
2843

2844
  in
2845

2846
  construct constructor
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc