• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

microlinkhq / html-get / 13597013192

28 Feb 2025 09:43PM UTC coverage: 97.409%. First build
13597013192

Pull #213

github

web-flow
Merge f1830c3cf into d68398ade
Pull Request #213: chore(html): better url rewrite

121 of 131 branches covered (92.37%)

Branch coverage included in aggregate %.

10 of 11 new or added lines in 1 file covered. (90.91%)

518 of 525 relevant lines covered (98.67%)

24.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.63
/src/html.js
1
'use strict'
12✔
2

12✔
3
const { get, split, nth, castArray, forEach } = require('lodash')
12✔
4
const debug = require('debug-logfmt')('html-get:rewrite')
12✔
5
const isLocalAddress = require('is-local-address')
12✔
6
const { TAGS: URL_TAGS } = require('html-urls')
12✔
7
const isHTML = require('is-html-content')
12✔
8
const cssUrl = require('css-url-regex')
12✔
9
const execall = require('execall')
12✔
10
const cheerio = require('cheerio')
12✔
11
const { URL } = require('url')
12✔
12
const path = require('path')
12✔
13

12✔
14
const {
12✔
15
  date: toDate,
12✔
16
  isMime,
12✔
17
  isUrl,
12✔
18
  mimeExtension,
12✔
19
  parseUrl
12✔
20
} = require('@metascraper/helpers')
12✔
21

12✔
22
const has = el => el.length !== 0
12✔
23

12✔
24
const upsert = (el, collection, item) => !has(el) && collection.push(item)
12✔
25

12✔
26
/**
12✔
27
 * Infer timestamp from `last-modified`, `date`, or `age` response headers.
12✔
28
 */
12✔
29
const getDate = headers => {
12✔
30
  const timestamp = get(headers, 'last-modified') || get(headers, 'date')
74✔
31
  return timestamp
74✔
32
    ? toDate(timestamp)
74✔
33
    : toDate(Date.now() - Number(get(headers, 'age')) * 1000)
74✔
34
}
74✔
35

12✔
36
const addHead = ({ $, url, headers }) => {
12✔
37
  const tags = []
70✔
38
  const contentType = get(headers, 'content-type')
70✔
39
  const charset = nth(split(contentType, 'charset='), 1)
70✔
40
  const date = getDate(headers)
70✔
41
  const { domain } = parseUrl(url)
70✔
42
  const head = $('head')
70✔
43

70✔
44
  upsert(head.find('title'), tags, `<title>${path.basename(url)}</title>`)
70✔
45

70✔
46
  if (domain) {
70✔
47
    upsert(
58✔
48
      head.find('meta[property="og:site_name"]'),
58✔
49
      tags,
58✔
50
      `<meta property="og:site_name" content="${domain}">`
58✔
51
    )
58✔
52
  }
58✔
53

70✔
54
  if (date) {
70✔
55
    upsert(
31✔
56
      head.find('meta[property="article:published_time"]'),
31✔
57
      tags,
31✔
58
      `<meta name="date" content="${date}" />`
31✔
59
    )
31✔
60
  }
31✔
61

70✔
62
  upsert(
70✔
63
    head.find('link[rel="canonical"]'),
70✔
64
    tags,
70✔
65
    `<link rel="canonical" href="${url}">`
70✔
66
  )
70✔
67

70✔
68
  if (charset) {
70✔
69
    upsert(head.find('meta[charset]'), tags, `<meta charset="${charset}">`)
26✔
70
  }
26✔
71

70✔
72
  tags.forEach(tag => head.append(tag))
70✔
73
}
70✔
74

12✔
75
const addBody = ({ url, headers, html }) => {
12✔
76
  const contentType = get(headers, 'content-type')
18✔
77

18✔
78
  let element = ''
18✔
79

18✔
80
  if (isMime(contentType, 'image')) {
18✔
81
    element = `<img src="${url}"></img>`
3✔
82
  } else if (isMime(contentType, 'video')) {
18✔
83
    element = `<video><source src="${url}" type="${contentType}"></source></video>`
2✔
84
  } else if (isMime(contentType, 'audio')) {
15✔
85
    element = `<audio><source src="${url}" type="${contentType}"></source></audio>`
2✔
86
  } else if (mimeExtension(contentType) === 'json') {
13✔
87
    element = `<pre>${html}</pre>`
1✔
88
  }
1✔
89

18✔
90
  return `<!DOCTYPE html><html><head></head><body>${element}</body></html>`
18✔
91
}
18✔
92

12✔
93
const isOpenGraph = (prop = '') =>
12✔
94
  ['og:', 'fb:', 'al:'].some(prefix => prop.startsWith(prefix))
44✔
95

12✔
96
const rewriteMetaTags = ({ $ }) => {
12✔
97
  $('meta').each((_, element) => {
12✔
98
    const el = $(element)
43✔
99
    if (!el.attr('content')) return
43✔
100

26✔
101
    const name = el.attr('name')
26✔
102
    const property = el.attr('property')
26✔
103

26✔
104
    // Convert 'name' to 'property' for Open Graph tags if 'property' is not already set correctly
26✔
105
    if (property !== name && isOpenGraph(name)) {
43✔
106
      el.removeAttr('name').attr('property', name)
8✔
107
      debug('og', el.attr())
8✔
108
      // Convert 'property' to 'name' for non-Open Graph tags
8✔
109
    } else if (property && !isOpenGraph(property)) {
43✔
110
      el.removeAttr('property').attr('name', property)
6✔
111
      debug('meta', el.attr())
6✔
112
    }
6✔
113
  })
12✔
114
}
12✔
115

12✔
116
const rewriteHtmlUrls = ({ $, url }) => {
12✔
117
  forEach(URL_TAGS, (tagName, urlAttr) => {
9✔
118
    $(tagName.join(',')).each(function () {
72✔
119
      const el = $(this)
40✔
120
      const attr = el.attr(urlAttr)
40✔
121
      if (typeof attr !== 'string' || attr.startsWith('http')) return
40✔
122
      try {
21✔
123
        const urlObj = new URL(attr, url)
21✔
124
        if (!urlObj.protocol.startsWith('http')) return
40✔
125
        if (isLocalAddress(urlObj.hostname)) {
40!
NEW
126
          el.remove()
×
127
        } else {
40✔
128
          el.attr(urlAttr, urlObj.toString())
10✔
129
        }
10✔
130
      } catch (_) {}
40✔
131
    })
72✔
132
  })
9✔
133
}
9✔
134

12✔
135
const rewriteCssUrls = ({ html, url }) => {
12✔
136
  const cssUrls = Array.from(
9✔
137
    execall(cssUrl(), html).reduce((acc, match) => {
9✔
138
      match.subMatches.forEach(match => acc.add(match))
3✔
139
      return acc
3✔
140
    }, new Set())
9✔
141
  )
9✔
142

9✔
143
  cssUrls.forEach(cssUrl => {
9✔
144
    if (cssUrl.startsWith('/')) {
2✔
145
      try {
2✔
146
        const absoluteUrl = new URL(cssUrl, url).toString()
2✔
147
        html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`)
2✔
148
      } catch (_) {}
2!
149
    }
2✔
150
  })
9✔
151

9✔
152
  return html
9✔
153
}
9✔
154

12✔
155
const injectStyle = ({ $, styles }) =>
12✔
156
  castArray(styles).forEach(style =>
3✔
157
    $('head').append(
4✔
158
      isUrl(style)
4✔
159
        ? `<link rel="stylesheet" type="text/css" href="${style}">`
4✔
160
        : `<style type="text/css">${style}</style>`
4✔
161
    )
4✔
162
  )
3✔
163

12✔
164
const injectScripts = ({ $, scripts, type }) =>
12✔
165
  castArray(scripts).forEach(script =>
1✔
166
    $('head').append(
2✔
167
      isUrl(script)
2✔
168
        ? `<script src="${script}" type="${type}"></script>`
2✔
169
        : `<script type="${type}">${script}</script>`
2✔
170
    )
2✔
171
  )
1✔
172

12✔
173
const addDocType = html =>
12✔
174
  html.startsWith('<!') ? html : `<!DOCTYPE html>${html}`
70✔
175

12✔
176
module.exports = ({
12✔
177
  html,
70✔
178
  url,
70✔
179
  headers = {},
70✔
180
  styles,
70✔
181
  hide,
70✔
182
  remove,
70✔
183
  rewriteUrls,
70✔
184
  rewriteHtml,
70✔
185
  scripts,
70✔
186
  modules
70✔
187
}) => {
70✔
188
  const content = addDocType(
70✔
189
    isHTML(html) ? html : addBody({ url, headers, html })
70✔
190
  )
70✔
191

70✔
192
  const $ = cheerio.load(content)
70✔
193

70✔
194
  if (rewriteUrls) rewriteHtmlUrls({ $, url })
70✔
195

70✔
196
  if (rewriteHtml) rewriteMetaTags({ $, url })
70✔
197

70✔
198
  addHead({ $, url, headers })
70✔
199

70✔
200
  if (styles) injectStyle({ $, styles })
70✔
201

70✔
202
  if (hide) {
70✔
203
    injectStyle({
1✔
204
      $,
1✔
205
      styles: `${castArray(hide).join(', ')} { visibility: hidden !important; }`
1✔
206
    })
1✔
207
  }
1✔
208

70✔
209
  if (remove) {
70✔
210
    injectStyle({
1✔
211
      $,
1✔
212
      styles: `${castArray(remove).join(', ')} { display: none !important; }`
1✔
213
    })
1✔
214
  }
1✔
215

70✔
216
  if (scripts) injectScripts({ $, scripts, type: 'text/javascript' })
70✔
217
  if (modules) injectScripts({ $, modules, type: 'module' })
70!
218

70✔
219
  return rewriteUrls ? rewriteCssUrls({ html: $.html(), url }) : $.html()
70✔
220
}
70✔
221

12✔
222
module.exports.getDate = getDate
12✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc