• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

microlinkhq / html-get / 9315052343

31 May 2024 08:11AM UTC coverage: 97.479% (-0.004%) from 97.483%
9315052343

push

github

Kikobeats
build: tweaks

107 of 116 branches covered (92.24%)

Branch coverage included in aggregate %.

473 of 479 relevant lines covered (98.75%)

21.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.2
/src/html.js
1
'use strict'
10✔
2

10✔
3
const { get, split, nth, castArray, forEach } = require('lodash')
10✔
4
const localhostUrl = require('localhost-url-regex')
10✔
5
const { TAGS: URL_TAGS } = require('html-urls')
10✔
6
const isHTML = require('is-html-content')
10✔
7
const cssUrl = require('css-url-regex')
10✔
8
const execall = require('execall')
10✔
9
const cheerio = require('cheerio')
10✔
10
const { URL } = require('url')
10✔
11
const path = require('path')
10✔
12

10✔
13
const {
10✔
14
  date: toDate,
10✔
15
  isMime,
10✔
16
  isUrl,
10✔
17
  mimeExtension,
10✔
18
  parseUrl
10✔
19
} = require('@metascraper/helpers')
10✔
20

10✔
21
const has = el => el.length !== 0
10✔
22

10✔
23
const upsert = (el, collection, item) => !has(el) && collection.push(item)
10✔
24

10✔
25
/**
10✔
26
 * Infer timestamp from `last-modified`, `date`, or `age` response headers.
10✔
27
 */
10✔
28
const getDate = headers => {
10✔
29
  const timestamp = get(headers, 'last-modified') || get(headers, 'date')
60✔
30
  return timestamp
60✔
31
    ? toDate(timestamp)
60✔
32
    : toDate(Date.now() - Number(get(headers, 'age')) * 1000)
60✔
33
}
60✔
34

10✔
35
const addHead = ({ $, url, headers }) => {
10✔
36
  const tags = []
56✔
37
  const contentType = get(headers, 'content-type')
56✔
38
  const charset = nth(split(contentType, 'charset='), 1)
56✔
39
  const date = getDate(headers)
56✔
40
  const { domain } = parseUrl(url)
56✔
41
  const head = $('head')
56✔
42

56✔
43
  upsert(head.find('title'), tags, `<title>${path.basename(url)}</title>`)
56✔
44

56✔
45
  if (domain) {
56✔
46
    upsert(
44✔
47
      head.find('meta[property="og:site_name"]'),
44✔
48
      tags,
44✔
49
      `<meta property="og:site_name" content="${domain}">`
44✔
50
    )
44✔
51
  }
44✔
52

56✔
53
  if (date) {
56✔
54
    upsert(
29✔
55
      head.find('meta[property="article:published_time"]'),
29✔
56
      tags,
29✔
57
      `<meta name="date" content="${date}" />`
29✔
58
    )
29✔
59
  }
29✔
60

56✔
61
  upsert(
56✔
62
    head.find('link[rel="canonical"]'),
56✔
63
    tags,
56✔
64
    `<link rel="canonical" href="${url}">`
56✔
65
  )
56✔
66

56✔
67
  if (charset) {
56✔
68
    upsert(head.find('meta[charset]'), tags, `<meta charset="${charset}">`)
21✔
69
  }
21✔
70

56✔
71
  tags.forEach(tag => head.append(tag))
56✔
72
}
56✔
73

10✔
74
const addBody = ({ url, headers, html }) => {
10✔
75
  const contentType = get(headers, 'content-type')
18✔
76

18✔
77
  let element = ''
18✔
78

18✔
79
  if (isMime(contentType, 'image')) {
18✔
80
    element = `<img src="${url}"></img>`
3✔
81
  } else if (isMime(contentType, 'video')) {
18✔
82
    element = `<video><source src="${url}" type="${contentType}"></source></video>`
2✔
83
  } else if (isMime(contentType, 'audio')) {
15✔
84
    element = `<audio><source src="${url}" type="${contentType}"></source></audio>`
2✔
85
  } else if (mimeExtension(contentType) === 'json') {
13✔
86
    element = `<pre>${html}</pre>`
1✔
87
  }
1✔
88

18✔
89
  return `<!DOCTYPE html><html><head></head><body>${element}</body></html>`
18✔
90
}
18✔
91

10✔
92
const rewriteHtmlUrls = ({ $, url }) => {
10✔
93
  forEach(URL_TAGS, (tagName, urlAttr) => {
9✔
94
    $(tagName.join(',')).each(function () {
72✔
95
      const el = $(this)
34✔
96
      const attr = el.attr(urlAttr)
34✔
97

34✔
98
      if (localhostUrl().test(attr)) {
34✔
99
        el.remove()
3✔
100
      } else if (typeof attr === 'string' && !attr.startsWith('http')) {
34✔
101
        try {
17✔
102
          const newAttr = new URL(attr, url).toString()
17✔
103
          el.attr(urlAttr, newAttr)
17✔
104
        } catch (_) {}
17✔
105
      }
17✔
106
    })
72✔
107
  })
9✔
108
}
9✔
109

10✔
110
const rewriteCssUrls = ({ html, url }) => {
10✔
111
  const cssUrls = Array.from(
9✔
112
    execall(cssUrl(), html).reduce((acc, match) => {
9✔
113
      match.subMatches.forEach(match => acc.add(match))
3✔
114
      return acc
3✔
115
    }, new Set())
9✔
116
  )
9✔
117

9✔
118
  cssUrls.forEach(cssUrl => {
9✔
119
    if (cssUrl.startsWith('/')) {
2✔
120
      try {
2✔
121
        const absoluteUrl = new URL(cssUrl, url).toString()
2✔
122
        html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`)
2✔
123
      } catch (_) {}
2!
124
    }
2✔
125
  })
9✔
126

9✔
127
  return html
9✔
128
}
9✔
129

10✔
130
const injectStyle = ({ $, styles }) =>
10✔
131
  castArray(styles).forEach(style =>
3✔
132
    $('head').append(
4✔
133
      isUrl(style)
4✔
134
        ? `<link rel="stylesheet" type="text/css" href="${style}">`
4✔
135
        : `<style type="text/css">${style}</style>`
4✔
136
    )
4✔
137
  )
3✔
138

10✔
139
const injectScripts = ({ $, scripts, type }) =>
10✔
140
  castArray(scripts).forEach(script =>
1✔
141
    $('head').append(
2✔
142
      isUrl(script)
2✔
143
        ? `<script src="${script}" type="${type}"></script>`
2✔
144
        : `<script type="${type}">${script}</script>`
2✔
145
    )
2✔
146
  )
1✔
147

10✔
148
const addDocType = html =>
10✔
149
  html.startsWith('<!') ? html : `<!DOCTYPE html>${html}`
56✔
150

10✔
151
module.exports = ({
10✔
152
  html,
56✔
153
  url,
56✔
154
  headers = {},
56✔
155
  styles,
56✔
156
  hide,
56✔
157
  remove,
56✔
158
  rewriteUrls,
56✔
159
  scripts,
56✔
160
  modules
56✔
161
}) => {
56✔
162
  const content = addDocType(
56✔
163
    isHTML(html) ? html : addBody({ url, headers, html })
56✔
164
  )
56✔
165

56✔
166
  const $ = cheerio.load(content)
56✔
167

56✔
168
  if (rewriteUrls) rewriteHtmlUrls({ $, url })
56✔
169

56✔
170
  addHead({ $, url, headers })
56✔
171

56✔
172
  if (styles) injectStyle({ $, styles })
56✔
173

56✔
174
  if (hide) {
56✔
175
    injectStyle({
1✔
176
      $,
1✔
177
      styles: `${castArray(hide).join(', ')} { visibility: hidden !important; }`
1✔
178
    })
1✔
179
  }
1✔
180

56✔
181
  if (remove) {
56✔
182
    injectStyle({
1✔
183
      $,
1✔
184
      styles: `${castArray(remove).join(', ')} { display: none !important; }`
1✔
185
    })
1✔
186
  }
1✔
187

56✔
188
  if (scripts) injectScripts({ $, scripts, type: 'text/javascript' })
56✔
189
  if (modules) injectScripts({ $, modules, type: 'module' })
56!
190

56✔
191
  return rewriteUrls ? rewriteCssUrls({ html: $.html(), url }) : $.html()
56✔
192
}
56✔
193

10✔
194
module.exports.getDate = getDate
10✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc