• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tbranyen / diffhtml / 12189631411

05 Dec 2024 11:09PM CUT coverage: 98.581%. Remained the same
12189631411

Pull #349

github

web-flow
Merge 3ee60590a into f25d1c4ac
Pull Request #349: Bump path-to-regexp and express in /packages/diffhtml-static-sync

840 of 896 branches covered (93.75%)

4795 of 4864 relevant lines covered (98.58%)

427.67 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.94
/packages/diffhtml/lib/util/parse.js
1
/**
2✔
2
 * @typedef {import('./types').TransactionConfig} TransactionConfig
2✔
3
 * @typedef {import('./types').ParserConfig} ParserConfig
2✔
4
 * @typedef {import('./types').VTree} VTree
2✔
5
 */
2✔
6
import createTree from '../tree/create';
2✔
7
import getConfig from './config';
2✔
8
import { NODE_TYPE, EMPTY } from './types';
2✔
9

2✔
10
const rawElementsDefaults = [
2✔
11
  'script',
2✔
12
  'noscript',
2✔
13
  'style',
2✔
14
  'template',
2✔
15
];
2✔
16

2✔
17
/**
2✔
18
 *
2✔
19
 * @see https://developer.mozilla.org/en-US/docs/Glossary/Void_element
2✔
20
 */
2✔
21
const voidElementsDefaults = [
2✔
22
  'area',
2✔
23
  'base',
2✔
24
  'br',
2✔
25
  'col',
2✔
26
  'embed',
2✔
27
  'hr',
2✔
28
  'img',
2✔
29
  'input',
2✔
30
  'link',
2✔
31
  'meta',
2✔
32
  'param',
2✔
33
  'source',
2✔
34
  'track',
2✔
35
  'wbr',
2✔
36
];
2✔
37

2✔
38
/**
2✔
39
 * These are elements that support omitting a closing tag when certain criteria
2✔
40
 * are met.
2✔
41
 *
2✔
42
 * @see https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
2✔
43
 * @type {any}
2✔
44
 */
2✔
45
const endTagOmissionRules = {
2✔
46
  li: { li: EMPTY.NUM },
2✔
47
  dt: { dt: EMPTY.NUM, dd: EMPTY.NUM },
2✔
48
  dd: { dt: EMPTY.NUM, dd: EMPTY.NUM },
2✔
49
  td: { td: EMPTY.NUM, th: EMPTY.NUM },
2✔
50
  th: { td: EMPTY.NUM, th: EMPTY.NUM },
2✔
51
  tbody: { tbody: EMPTY.NUM, tfoot: EMPTY.NUM },
2✔
52
  tfoot: { tbody: EMPTY.NUM, tfoot: EMPTY.NUM },
2✔
53
};
2✔
54

2✔
55
// List of regular expressions to match various HTML features.
2✔
56
export const openComment = /<!--/g;
2✔
57
export const closeComment = /-->/g;
2✔
58

2✔
59
// Extract the tagName from an opening tag. There must not be any whitespace
2✔
60
// between the opening angle bracket and the word/namespace.
2✔
61
export const openTag = /<([^\s\\/>]*)\s?/g;
2✔
62
export const closeTag = /\s?(\/>)|(<\/(.*?)>)/g;
2✔
63

2✔
64
// Find all values within quotes and up to `>`. Support interpolated values
2✔
65
// with __DIFFHTML_TOKEN__ format.
2✔
66
export const attribute = /([^>\/\n ])*?(["'])(?:(?=(\\?))\3.)*?\2|.*?(?=\/>|>|\n| )/gsm;
2✔
67
export const parseAttr = /([^=]*)=(.*)|([^>])/gsm;
2✔
68

2✔
69
// Text nodes are anything that isn't <.
2✔
70
export const text = /([^<]*)/g;
2✔
71

2✔
72
/**
2✔
73
 * Removes "", '', or `` wrapping quotes from attributes when they are
2✔
74
 * parsed directly from the markup.
2✔
75
 *
2✔
76
 * @param {string} value
2✔
77
 * @return {string}
2✔
78
 */
2✔
79
function removeQuotes(value) {
192✔
80
  if (typeof value !== 'string') {
192!
81
    return value;
×
82
  }
×
83

192✔
84
  const quotes = ['"', '\''];
192✔
85
  const rootQuote = quotes.indexOf(value[0]);
192✔
86
  const hasRootQuote = rootQuote !== -1;
192✔
87
  const trailingQuote = hasRootQuote && quotes.indexOf(value[value.length - 1]);
192✔
88

192✔
89
  if (rootQuote !== -1 && trailingQuote === rootQuote) {
192✔
90
    return value.slice(1, value.length - 1);
181✔
91
  }
181✔
92

11✔
93
  return value;
11✔
94
}
192✔
95

2✔
96
/**
2✔
97
 * Parses HTML and returns a root element
2✔
98
 *
2✔
99
 * @param {String} html - String of HTML markup to parse into a Virtual Tree
2✔
100
 * @param {TransactionConfig=} options - Contains additional options
2✔
101
 * @return {VTree} - Parsed Virtual Tree Element
2✔
102
 */
2✔
103
export default function parse(html, options = {}) {
2✔
104
  // Always start with a fragment container when parsing.
570✔
105
  const root = createTree('#document-fragment', null, []);
570✔
106

570✔
107
  // If no markup is provided, return an empty text node. This is a fast path
570✔
108
  // to circumvent extra work in this case.
570✔
109
  if (!html) {
570✔
110
    root.childNodes.push(createTree('#text', EMPTY.STR));
15✔
111
    return root;
15✔
112
  }
15✔
113

555✔
114
  // If there are no parser configuration options passed, use an empty object.
555✔
115
  if (!options.parser) {
570✔
116
    /** @type {ParserConfig} */
334✔
117
    options.parser = EMPTY.OBJ;
334✔
118
  }
334✔
119

555✔
120
  // Elements that have all nested children converted into text, like script
555✔
121
  // and style tags.
555✔
122
  const blockText = new Set(
555✔
123
    /** @type {string[]} */(
555✔
124
      getConfig(
555✔
125
        'rawElements',
555✔
126
        rawElementsDefaults,
555✔
127
        'array',
555✔
128
        options.parser,
555✔
129
      )
555✔
130
    ),
555✔
131
  );
555✔
132

555✔
133
  // Elements that are automatically self closed, and never contain children.
555✔
134
  const voidElements = new Set(
555✔
135
    /** @type {string[]} */(
555✔
136
      getConfig(
555✔
137
        'voidElements',
555✔
138
        voidElementsDefaults,
555✔
139
        'array',
555✔
140
        options.parser,
555✔
141
      )
555✔
142
    ),
555✔
143
  );
555✔
144

555✔
145
  // If there are no brackets, we can avoid some extra work by treating this as
555✔
146
  // text. This is a fast path for text.
555✔
147
  if (!html.includes('<') && !html.includes('>')) {
570✔
148
    const newTree = createTree('#text', html);
35✔
149
    root.childNodes.push(newTree);
35✔
150
    return root;
35✔
151
  }
35✔
152

520✔
153
  // Contains the active hierarchy, its length will be that of the deepest
520✔
154
  // element crawled.
520✔
155
  const stack = [root];
520✔
156

520✔
157
  // Cursor into the markup that we use when parsing.
520✔
158
  let i = 0;
520✔
159

520✔
160
  /**
520✔
161
   * The active element being crawled.
520✔
162
   * @type {VTree}
520✔
163
   */
520✔
164
  let pointer = root;
520✔
165

520✔
166
  // The pointer is open when looking for attributes, self closing, or open
520✔
167
  // tag closing.
520✔
168
  let isOpen = false;
520✔
169

520✔
170
  /**
520✔
171
   * Allow short-circuiting if never found.
520✔
172
   * @type {number|null}
520✔
173
   */
520✔
174
  let lastCommentIndex = html.indexOf('<!--');
520✔
175

520✔
176
  // Closes the current element and calls createTree to allow middleware to tap
520✔
177
  // into it. Resets the pointer to the parent. This function should never be
520✔
178
  // called with the root element, otherwise it will set a null pointer.
520✔
179
  const resetPointer = () => {
520✔
180
    // Create tree is called to normalize the stack into VTree and allow
748✔
181
    // middleware to hook into the parser.
748✔
182
    const newTree = createTree(stack.pop());
748✔
183

748✔
184
    // Reset the pointer to the parent.
748✔
185
    pointer = stack[stack.length - 1];
748✔
186
    pointer.childNodes.push(newTree);
748✔
187
  };
520✔
188

520✔
189
  // This loop treats the `i` as a cursor into the markup determining what is
520✔
190
  // being parsed. This is useful for setting the `lastIndex` values of the
520✔
191
  // regular expressions defined above. Once this value matches the length of
520✔
192
  // the input markup, we know we have finished parsing.
520✔
193
  while (i < html.length) {
570✔
194
    // Set the lastIndex for all stateful regexes to avoid slicing the html
3,209✔
195
    // string and getting the latest match each time.
3,209✔
196
    openComment.lastIndex = i;
3,209✔
197
    closeComment.lastIndex = i;
3,209✔
198
    openTag.lastIndex = i;
3,209✔
199
    closeTag.lastIndex = i;
3,209✔
200
    attribute.lastIndex = i;
3,209✔
201
    text.lastIndex = i;
3,209✔
202

3,209✔
203
    // Reset parseAttr for each iteration.
3,209✔
204
    parseAttr.lastIndex = 0;
3,209✔
205

3,209✔
206
    /**
3,209✔
207
     * First check for open comments this allows bypassing any other parsing
3,209✔
208
     * if a comment has been opened.
3,209✔
209
     * @type {Boolean}
3,209✔
210
     */
3,209✔
211
    const shouldSeekComment = Boolean(
3,209✔
212
      lastCommentIndex !== null && lastCommentIndex <= i
3,209✔
213
    );
3,209✔
214

3,209✔
215
    const {
3,209✔
216
      // @ts-ignore
3,209✔
217
      index: openCommentIndex,
3,209✔
218
    } = shouldSeekComment && openComment.exec(html) || EMPTY.OBJ;
3,209✔
219

3,209✔
220
    // There are no remaining comments, so skip this check. This is very
3,209✔
221
    // important for performance reasons, otherwise on every loop tick we are
3,209✔
222
    // crawling the entire markup for something we know isn't there.
3,209✔
223
    if (shouldSeekComment) {
3,209✔
224
      lastCommentIndex = openCommentIndex;
530✔
225
    }
530✔
226
    if (openCommentIndex === -1) {
3,209!
227
      lastCommentIndex = null;
×
228
    }
×
229

3,209✔
230
    const isNotRoot = pointer !== root;
3,209✔
231

3,209✔
232
    // If an element is a block text element (such as script) we should not
3,209✔
233
    // parse anything under it, except as text.
3,209✔
234
    const isBlockElement = pointer && blockText.has(pointer.nodeName);
3,209✔
235

3,209✔
236
    // If a comment exists, search for the close and treat everything between
3,209✔
237
    // as a string. There may be dynamic supplemental values to interpolate,
3,209✔
238
    // these will be toString'd before injection.
3,209✔
239
    if (openCommentIndex === i) {
3,209✔
240
      // Find the first close comment instance.
16✔
241
      let { index: closeCommentIndex } = closeComment.exec(html) || EMPTY.OBJ;
16✔
242

16✔
243
      // Default to the end of the markup if no end comment is found.
16✔
244
      if (closeCommentIndex === -1) {
16!
245
        closeCommentIndex = html.length;
×
246
      }
×
247

16✔
248
      const comment = createTree('#comment');
16✔
249
      comment.nodeValue = html.slice(i + 4, closeCommentIndex);
16✔
250
      pointer.childNodes.push(comment);
16✔
251

16✔
252
      i = closeCommentIndex + 3;
16✔
253
      continue;
16✔
254
    }
16✔
255

3,193✔
256
    // Open tags.
3,193✔
257
    const {
3,193✔
258
      0: fullOpenTagMatch,
3,193✔
259
      1: tagName,
3,193✔
260
      index: openTagIndex,
3,193✔
261
    } = openTag.exec(html) || EMPTY.OBJ;
3,209✔
262

3,209✔
263
    // Only open a tag if it contains a tag name.
3,209✔
264
    if (openTagIndex === i && tagName && !isBlockElement) {
3,209✔
265
      // If a doctype, skip to the end, we don't parse these.
749✔
266
      if (tagName[0] === '!') {
749✔
267
        // Find the next > since the open tag.
1✔
268
        i = html.indexOf('>', openTagIndex) + 1;
1✔
269
        continue;
1✔
270
      }
1✔
271

748✔
272
      // Don't call createTree yet, otherwise we won't have access to the
748✔
273
      // completed element. So create a fake VTree, to build up the object
748✔
274
      // until we have attributes and child nodes.
748✔
275
      const newTree = {
748✔
276
        rawNodeName: tagName,
748✔
277
        nodeName: tagName,
748✔
278
        childNodes: [],
748✔
279
        attributes: {},
748✔
280
        nodeType: EMPTY.NUM,
748✔
281
        nodeValue: EMPTY.STR,
748✔
282
        key: EMPTY.STR,
748✔
283
      };
748✔
284
      const supportsEndTagOmission = endTagOmissionRules[tagName];
748✔
285

748✔
286
      // We can't nested a div inside a p, we can't nest an li inside an li
748✔
287
      if (supportsEndTagOmission && supportsEndTagOmission[pointer.nodeName]) {
749✔
288
        resetPointer();
5✔
289
      }
5✔
290

748✔
291
      pointer = newTree;
748✔
292
      stack.push(pointer);
748✔
293

748✔
294
      isOpen = true;
748✔
295
      i = openTagIndex + fullOpenTagMatch.length;
748✔
296
      continue;
748✔
297
    }
748✔
298

2,444✔
299
    // Attributes.
2,444✔
300
    const {
2,444✔
301
      0: fullAttributeMatch,
2,444✔
302
      index: attributeIndex,
2,444✔
303
    } = attribute.exec(html) || EMPTY.OBJ;
3,209✔
304

3,209✔
305
    const attributeMatchTrim = attributeIndex === i && fullAttributeMatch.trim();
3,209✔
306

3,209✔
307
    if (isOpen && attributeIndex === i) {
3,209✔
308
      // Skip whitespace
1,503✔
309
      if (!attributeMatchTrim) {
1,503✔
310
        i = i + fullAttributeMatch.length + 1;
1,282✔
311

1,282✔
312
        // TBD Refactor this so its not duplicated
1,282✔
313
        if (html[i - 1] === '>') {
1,282✔
314
          const isEnd = i === html.length;
748✔
315

748✔
316
          // Self closing
748✔
317
          if (html[i - 2] === '/' || voidElements.has(pointer.nodeName) || isEnd) {
748✔
318
            resetPointer();
166✔
319
          }
166✔
320

748✔
321
          isOpen = false;
748✔
322
        }
748✔
323

1,282✔
324
        continue;
1,282✔
325
      }
1,282✔
326

221✔
327
      const {
221✔
328
        1: key = fullAttributeMatch,
221✔
329
        2: value = fullAttributeMatch,
221✔
330
      } = parseAttr.exec(attributeMatchTrim) || EMPTY.OBJ;
1,503!
331

1,503✔
332
      const isBoolean = key === value || value === undefined;
1,503✔
333
      const trimKey = key.trim();
1,503✔
334

1,503✔
335
      pointer.attributes[trimKey] = isBoolean ? Boolean(value) : removeQuotes(value);
1,503✔
336

1,503✔
337
      i = attributeIndex + fullAttributeMatch.length;
1,503✔
338
      continue;
1,503✔
339
    }
1,503✔
340

941✔
341
    // When in a block element, find the nearest closing element, otherwise
941✔
342
    // use the entire input.
941✔
343
    if (isBlockElement) {
3,209✔
344
      const closeTag = `</${pointer.nodeName}>`;
51✔
345
      let closeTagIndex = html.indexOf(closeTag, i);
51✔
346

51✔
347
      if (closeTagIndex === -1) {
51!
348
        closeTagIndex = html.length;
×
349
      }
×
350

51✔
351
      const innerText = html.slice(i, closeTagIndex);
51✔
352

51✔
353
      if (innerText) {
51✔
354
        pointer.childNodes.push(createTree('#text', innerText));
42✔
355
      }
42✔
356

51✔
357
      i = closeTagIndex + closeTag.length;
51✔
358
      isOpen = false;
51✔
359
      resetPointer();
51✔
360
      continue;
51✔
361
    }
51✔
362

890✔
363
    // Close opened tags.
890✔
364
    if (html[i] === '>') {
3,209✔
365
      isOpen = false;
4✔
366
      i = i + 1;
4✔
367

4✔
368
      // Automatically close void elements.
4✔
369
      if (voidElements.has(pointer.nodeName)) {
4!
370
        resetPointer();
×
371
      }
×
372

4✔
373
      continue;
4✔
374
    }
4✔
375

886✔
376
    // Close tags.
886✔
377
    const {
886✔
378
      0: fullCloseTagMatch,
886✔
379
      3: closeTagName,
886✔
380
      index: closeTagIndex,
886✔
381
    } = closeTag.exec(html) || EMPTY.OBJ;
3,209✔
382

3,209✔
383
    // Look for closing tags
3,209✔
384
    if (closeTagIndex === i && fullCloseTagMatch) {
3,209✔
385
      const isVoidElement = voidElements.has(closeTagName);
523✔
386
      if (fullCloseTagMatch[1] === '/' && isNotRoot && !isVoidElement) {
523✔
387
        resetPointer();
520✔
388
      }
520✔
389
      isOpen = false;
523✔
390

523✔
391
      i = closeTagIndex + fullCloseTagMatch.length;
523✔
392
      continue;
523✔
393
    }
523✔
394

363✔
395
    // Text.
363✔
396
    const {
363✔
397
      0: fullTextMatch,
363✔
398
      index: textIndex,
363✔
399
    } = text.exec(html) || EMPTY.OBJ;
3,209!
400

3,209✔
401
    if (!isOpen && textIndex === i && fullTextMatch.length) {
3,209✔
402
      const newTree = createTree('#text', fullTextMatch);
362✔
403
      pointer.childNodes.push(newTree);
362✔
404
      i = textIndex + fullTextMatch.length;
362✔
405

362✔
406
      if (i === html.length && isNotRoot) {
362✔
407
        resetPointer();
6✔
408
      }
6✔
409

362✔
410
      continue;
362✔
411
    }
362✔
412

1✔
413
    // Use remaining values as text
1✔
414
    pointer.childNodes.push(createTree('#text', html.slice(i, html.length)));
1✔
415
    i = html.length;
1✔
416
  }
1✔
417

520✔
418
  // This is an entire document, so only allow the HTML children to be
520✔
419
  // body or head.
520✔
420
  if (root.childNodes.length && root.childNodes[0].nodeName === 'html') {
570✔
421
    // Store elements from before body end and after body end.
12✔
422
    /** @type {{ [name: string]: VTree[] }} */
12✔
423
    const head = { before: [], after: [] };
12✔
424
    /** @type {{ [name: string]: VTree[] }} */
12✔
425
    const body = { after: [] };
12✔
426
    const HTML = root.childNodes[0];
12✔
427

12✔
428
    let beforeHead = true;
12✔
429
    let beforeBody = true;
12✔
430

12✔
431
    // Iterate the children and store elements in the proper array for
12✔
432
    // later concat, replace the current childNodes with this new array.
12✔
433
    HTML.childNodes = HTML.childNodes.filter(el => {
12✔
434
      // If either body or head, allow as a valid element.
29✔
435
      if (el.nodeName === 'body' || el.nodeName === 'head') {
29✔
436
        if (el.nodeName === 'head') beforeHead = false;
15✔
437
        if (el.nodeName === 'body') beforeBody = false;
15✔
438

15✔
439
        return true;
15✔
440
      }
15✔
441
      // Not a valid nested HTML tag element, move to respective container.
14✔
442
      else if (el.nodeType === NODE_TYPE.ELEMENT) {
14✔
443
        if (beforeHead && beforeBody) head.before.push(el);
4✔
444
        else if (!beforeHead && beforeBody) head.after.push(el);
3✔
445
        else if (!beforeBody) body.after.push(el);
2✔
446
      }
4✔
447
    });
12✔
448

12✔
449
    // Ensure the first element is the HEAD tag.
12✔
450
    if (!HTML.childNodes[0] || HTML.childNodes[0].nodeName !== 'head') {
12✔
451
      const headInstance = createTree('head', null, []);
2✔
452

2✔
453
      if (headInstance) {
2✔
454
        const existing = headInstance.childNodes;
2✔
455

2✔
456
        existing.unshift.apply(existing, head.before);
2✔
457
        existing.push.apply(existing, head.after);
2✔
458
        HTML.childNodes.unshift(headInstance);
2✔
459
      }
2✔
460
    }
2✔
461
    else {
10✔
462
      const existing = HTML.childNodes[0].childNodes;
10✔
463

10✔
464
      existing.unshift.apply(existing, head.before);
10✔
465
      existing.push.apply(existing, head.after);
10✔
466
    }
10✔
467

12✔
468
    // Ensure the second element is the body tag.
12✔
469
    if (!HTML.childNodes[1] || HTML.childNodes[1].nodeName !== 'body') {
12✔
470
      const bodyInstance = createTree('body', null, []);
7✔
471

7✔
472
      if (bodyInstance) {
7✔
473
        const existing = bodyInstance.childNodes;
7✔
474

7✔
475
        existing.push.apply(existing, body.after);
7✔
476
        HTML.childNodes.push(bodyInstance);
7✔
477
      }
7✔
478
    }
7✔
479
    else {
5✔
480
      const existing = HTML.childNodes[1].childNodes;
5✔
481
      existing.push.apply(existing, body.after);
5✔
482
    }
5✔
483
  }
12✔
484

520✔
485
  return root;
520✔
486
}
570✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc