• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cheeriojs / cheerio / 10257211317

05 Aug 2024 10:57PM UTC coverage: 99.481% (+0.8%) from 98.68%
10257211317

Pull #3970

github

web-flow
Merge 3b62f5eae into e6c0988a4
Pull Request #3970: Update build tooling, update imports, require Node 16

1673 of 1691 branches covered (98.94%)

Branch coverage included in aggregate %.

318 of 329 new or added lines in 17 files covered. (96.66%)

3 existing lines in 2 files now uncovered.

7712 of 7743 relevant lines covered (99.6%)

82.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.14
/src/index.ts
1
/**
1✔
2
 * @file Batteries-included version of Cheerio. This module includes several
3
 *   convenience methods for loading documents from various sources.
4
 */
5

6
export * from './index-browser.js';
1✔
7
// TODO: Remove this
8
export { default } from './index-browser.js';
9

10
/* eslint-disable n/no-unsupported-features/node-builtins */
11

12
import type { CheerioAPI, CheerioOptions } from './index-browser.js';
13
import { load } from './index-browser.js';
1✔
14
import { flattenOptions, type InternalOptions } from './options.js';
1✔
15
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
1✔
16

17
import * as htmlparser2 from 'htmlparser2';
1✔
18
import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
1✔
19
import {
20
  decodeBuffer,
21
  DecodeStream,
22
  type SnifferOptions,
23
} from 'encoding-sniffer';
1✔
24
import * as undici from 'undici';
1✔
25
import MIMEType from 'whatwg-mimetype';
1✔
26
import { Writable, finished } from 'node:stream';
1✔
27

28
/**
29
 * Sniffs the encoding of a buffer, then creates a querying function bound to a
30
 * document created from the buffer.
31
 *
32
 * @category Loading
33
 * @example
34
 *
35
 * ```js
36
 * import * as cheerio from 'cheerio';
37
 *
38
 * const buffer = fs.readFileSync('index.html');
39
 * const $ = cheerio.fromBuffer(buffer);
40
 * ```
41
 *
42
 * @param buffer - The buffer to sniff the encoding of.
43
 * @param options - The options to pass to Cheerio.
44
 * @returns The loaded document.
45
 */
46
export function loadBuffer(
1✔
47
  buffer: Buffer,
2✔
48
  options: DecodeStreamOptions = {},
2✔
49
): CheerioAPI {
2✔
50
  const opts = flattenOptions(options);
2✔
51
  const str = decodeBuffer(buffer, {
2✔
52
    defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252',
2!
53
    ...options.encoding,
2✔
54
  });
2✔
55

56
  return load(str, opts);
2✔
57
}
2✔
58

59
function _stringStream(
8✔
60
  options: InternalOptions | undefined,
8✔
61
  cb: (err: Error | null | undefined, $: CheerioAPI) => void,
8✔
62
): Writable {
8✔
63
  if (options?._useHtmlParser2) {
8✔
64
    const parser = htmlparser2.createDocumentStream(
3✔
65
      (err, document) => cb(err, load(document)),
3✔
66
      options,
3✔
67
    );
3✔
68

69
    return new Writable({
3✔
70
      decodeStrings: false,
3✔
71
      write(chunk, _encoding, callback) {
3✔
72
        if (typeof chunk !== 'string') {
3!
NEW
73
          throw new TypeError('Expected a string');
×
NEW
74
        }
×
75

76
        parser.write(chunk);
3✔
77
        callback();
3✔
78
      },
3✔
79
      final(callback) {
3✔
80
        parser.end();
3✔
81
        callback();
3✔
82
      },
3✔
83
    });
3✔
84
  }
3✔
85

86
  options ??= {};
5✔
87
  options.treeAdapter ??= htmlparser2Adapter;
5✔
88

89
  if (options.scriptingEnabled !== false) {
5✔
90
    options.scriptingEnabled = true;
5✔
91
  }
5✔
92

93
  const stream = new Parse5Stream(options);
5✔
94

95
  finished(stream, (err) => cb(err, load(stream.document)));
5✔
96

97
  return stream;
5✔
98
}
5✔
99

100
/**
101
 * Creates a stream that parses a sequence of strings into a document.
102
 *
103
 * The stream is a `Writable` stream that accepts strings. When the stream is
104
 * finished, the callback is called with the loaded document.
105
 *
106
 * @category Loading
107
 * @example
108
 *
109
 * ```js
110
 * import * as cheerio from 'cheerio';
111
 * import * as fs from 'fs';
112
 *
113
 * const writeStream = cheerio.stringStream({}, (err, $) => {
114
 *   if (err) {
115
 *     // Handle error
116
 *   }
117
 *
118
 *   console.log($('h1').text());
119
 *   // Output: Hello, world!
120
 * });
121
 *
122
 * fs.createReadStream('my-document.html', { encoding: 'utf8' }).pipe(
123
 *   writeStream,
124
 * );
125
 * ```
126
 *
127
 * @param options - The options to pass to Cheerio.
128
 * @param cb - The callback to call when the stream is finished.
129
 * @returns The writable stream.
130
 */
131
export function stringStream(
1✔
132
  options: CheerioOptions,
3✔
133
  cb: (err: Error | null | undefined, $: CheerioAPI) => void,
3✔
134
): Writable {
3✔
135
  return _stringStream(flattenOptions(options), cb);
3✔
136
}
3✔
137

138
export interface DecodeStreamOptions extends CheerioOptions {
139
  encoding?: SnifferOptions;
140
}
141

142
/**
143
 * Parses a stream of buffers into a document.
144
 *
145
 * The stream is a `Writable` stream that accepts buffers. When the stream is
146
 * finished, the callback is called with the loaded document.
147
 *
148
 * @category Loading
149
 * @param options - The options to pass to Cheerio.
150
 * @param cb - The callback to call when the stream is finished.
151
 * @returns The writable stream.
152
 */
153
export function decodeStream(
1✔
154
  options: DecodeStreamOptions,
5✔
155
  cb: (err: Error | null | undefined, $: CheerioAPI) => void,
5✔
156
): Writable {
5✔
157
  const { encoding = {}, ...cheerioOptions } = options;
5✔
158
  const opts = flattenOptions(cheerioOptions);
5✔
159

160
  // Set the default encoding to UTF-8 for XML mode
161
  encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252';
5✔
162

163
  const decodeStream = new DecodeStream(encoding);
5✔
164
  const loadStream = _stringStream(opts, cb);
5✔
165

166
  decodeStream.pipe(loadStream);
5✔
167

168
  return decodeStream;
5✔
169
}
5✔
170

171
type UndiciStreamOptions = Parameters<typeof undici.stream>[1];
172

173
export interface CheerioRequestOptions extends DecodeStreamOptions {
174
  /** The options passed to `undici`'s `stream` method. */
175
  requestOptions?: UndiciStreamOptions;
176
}
177

178
const defaultRequestOptions: UndiciStreamOptions = {
1✔
179
  method: 'GET',
1✔
180
  // Allow redirects by default
181
  maxRedirections: 5,
1✔
182
  // NOTE: `throwOnError` currently doesn't work https://github.com/nodejs/undici/issues/1753
183
  throwOnError: true,
1✔
184
  // Set an Accept header
185
  headers: {
1✔
186
    accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1✔
187
  },
1✔
188
};
1✔
189

190
/**
191
 * `fromURL` loads a document from a URL.
192
 *
193
 * By default, redirects are allowed and non-2xx responses are rejected.
194
 *
195
 * @category Loading
196
 * @example
197
 *
198
 * ```js
199
 * import * as cheerio from 'cheerio';
200
 *
201
 * const $ = await cheerio.fromURL('https://example.com');
202
 * ```
203
 *
204
 * @param url - The URL to load the document from.
205
 * @param options - The options to pass to Cheerio.
206
 * @returns The loaded document.
207
 */
208
export async function fromURL(
3✔
209
  url: string | URL,
3✔
210
  options: CheerioRequestOptions = {},
3✔
211
): Promise<CheerioAPI> {
3✔
212
  const {
3✔
213
    requestOptions = defaultRequestOptions,
3✔
214
    encoding = {},
3✔
215
    ...cheerioOptions
3✔
216
  } = options;
3✔
217
  let undiciStream: Promise<undici.Dispatcher.StreamData> | undefined;
3✔
218

219
  // Add headers if none were supplied.
220
  requestOptions.headers ??= defaultRequestOptions.headers;
3✔
221

222
  const promise = new Promise<CheerioAPI>((resolve, reject) => {
3✔
223
    undiciStream = undici.stream(url, requestOptions, (res) => {
3✔
224
      const contentType = res.headers['content-type'] ?? 'text/html';
3!
225
      const mimeType = new MIMEType(
3✔
226
        Array.isArray(contentType) ? contentType[0] : contentType,
3!
227
      );
3✔
228

229
      if (!mimeType.isHTML() && !mimeType.isXML()) {
3✔
NEW
230
        throw new RangeError(
×
NEW
231
          `The content-type "${contentType}" is neither HTML nor XML.`,
×
NEW
232
        );
×
NEW
233
      }
×
234

235
      // Forward the charset from the header to the decodeStream.
236
      encoding.transportLayerEncodingLabel = mimeType.parameters.get('charset');
3✔
237

238
      /*
239
       * If we allow redirects, we will have entries in the history.
240
       * The last entry will be the final URL.
241
       */
242
      const history = (
3✔
243
        res.context as
3✔
244
          | {
245
              history?: URL[];
246
            }
247
          | undefined
248
      )?.history;
3✔
249

250
      const opts = {
3✔
251
        encoding,
3✔
252
        // Set XML mode based on the MIME type.
253
        xmlMode: mimeType.isXML(),
3✔
254
        // Set the `baseURL` to the final URL.
255
        baseURL: history ? history[history.length - 1] : url,
3!
256
        ...cheerioOptions,
3✔
257
      };
3✔
258

259
      return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($)));
3✔
260
    });
3✔
261
  });
3✔
262

263
  // Let's make sure the request is completed before returning the promise.
264
  await undiciStream;
3✔
265

266
  return promise;
3✔
267
}
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc