• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

get-set-fetch / scraper / 4403974148

pending completion
4403974148

push

github

Andrei Sabau
dependencies update

655 of 852 branches covered (76.88%)

Branch coverage included in aggregate %.

1574 of 1796 relevant lines covered (87.64%)

1266.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.1
/src/plugins/default/NodeFetchPlugin.ts
1
import http, { IncomingMessage, OutgoingHttpHeaders } from 'http';
15✔
2
import https, { Agent, RequestOptions } from 'https';
15✔
3
import dns from 'dns';
15✔
4
import { pipeline, Writable } from 'stream';
15✔
5
import zlib from 'zlib';
15✔
6
import { SecureContextOptions } from 'tls';
7
import { getLogger } from '../../logger/Logger';
15✔
8
import { SchemaType } from '../../schema/SchemaHelper';
9
import Resource from '../../storage/base/Resource';
10
import { Protocol } from '../url-utils';
11
import BaseFetchPlugin, { FetchError } from './BaseFetchPlugin';
15✔
12

13
const enum DNS_RESOLUTION {
14
  LOOKUP = 'lookup',
15
  RESOLVE = 'resolve'
16
}
17

18
export default class NodeFetchPlugin extends BaseFetchPlugin {
15✔
19
  static get schema() {
20
    return {
285✔
21
      type: 'object',
22
      title: 'Node Fetch Plugin',
23
      description: 'fetch resources via nodejs https/http',
24
      properties: {
25
        headers: {
26
          type: 'object',
27
          additionalProperties: {
28
            type: 'string',
29
          },
30
          default: {
31
            'Accept-Encoding': 'br,gzip,deflate',
32
          },
33
        },
34
        tlsCheck: {
35
          type: 'boolean',
36
          default: true,
37
          description: 'Check server certificate, certificate and hostname match',
38
        },
39
        connectTimeout: {
40
          type: 'number',
41
          default: 10 * 1000,
42
        },
43
        readTimeout: {
44
          type: 'number',
45
          default: 20 * 1000,
46
        },
47
        dnsResolution: {
48
          type: 'string',
49
          default: 'lookup',
50
          title: 'DNS Resolution',
51
          description: 'Use "lookup" to take into account local configuration files like /etc/hosts. Use "resolve" to always perform dns queries over the network.',
52
        },
53
      },
54
    } as const;
55
  }
56

57
  logger = getLogger('NodeFetchPlugin');
285✔
58
  opts: SchemaType<typeof NodeFetchPlugin.schema>;
59
  agent: Agent;
60

61
  constructor(opts: SchemaType<typeof NodeFetchPlugin.schema> = {}) {
15✔
62
    super(opts);
285✔
63

64
    if (!this.opts.tlsCheck) {
285!
65
      this.agent = new Agent({
×
66
        // disable cert/server match for tls connections
67
        checkServerIdentity: () => undefined,
×
68
        // ignore any tls related errors
69
        rejectUnauthorized: false,
70
      });
71
    }
72
  }
73

74
  getRequestFnc(protocol: string): (options: RequestOptions, callback?: (res: http.IncomingMessage) => void) => http.ClientRequest {
75
    switch (protocol) {
469✔
76
      case 'https:':
469!
77
        return https.request;
×
78
      case 'http:':
79
        return http.request;
469✔
80
      default:
81
        throw new Error('protocol must be either https or http');
×
82
    }
83
  }
84

85
  async getRequestOptions(url:URL, resource: Resource):Promise<RequestOptions & SecureContextOptions> {
86
    const { hostname, protocol, pathname } = url;
471✔
87
    let reqHost = hostname;
471✔
88

89
    if (this.opts.dnsResolution === DNS_RESOLUTION.RESOLVE) {
471!
90
      reqHost = await new Promise((resolve, reject) => {
×
91
        dns.resolve(hostname, (err, records) => {
×
92
          /*
93
          just take the 1st return ip address,
94
          this plugin doesn't have capabilities to retry multiple urls/ips for a single resource
95
          */
96
          if (err) {
×
97
            reject(err);
×
98
          }
99
          else {
100
            this.logger.debug(`${hostname} resolved to ${records[0]}`);
×
101
            resolve(records[0]);
×
102
          }
103
        });
104
      });
105
    }
106

107
    const reqHeaders: OutgoingHttpHeaders = {
471✔
108
      Host: hostname,
109
      'Accept-Encoding': 'br,gzip,deflate',
110
      ...(<object> this.opts.headers),
111
    };
112

113
    return {
471✔
114
      method: 'GET',
115
      defaultPort: protocol === Protocol.HTTPS ? 443 : 80,
471!
116
      path: pathname,
117
      host: reqHost,
118
      headers: reqHeaders,
119
      timeout: this.opts.connectTimeout,
120
      rejectUnauthorized: this.opts.tlsCheck,
121
      agent: protocol === Protocol.HTTPS && !this.opts.tlsCheck ? this.agent : undefined,
942!
122
      ...resource.proxy,
123
    };
124
  }
125

126
  async fetch(resource: Resource): Promise<Partial<Resource>> {
127
    return new Promise(async (resolve, reject) => {
471✔
128
      try {
471✔
129
        const url = new URL(resource.url);
471✔
130
        const requestFnc = this.getRequestFnc(url.protocol);
471✔
131
        const opts = await this.getRequestOptions(url, resource);
471✔
132
        this.logger.debug(opts, 'Request Options');
471✔
133

134
        const req = requestFnc(opts, (res: IncomingMessage) => {
471✔
135
          try {
470✔
136
            const { statusCode, headers } = res;
470✔
137

138
            this.logger.debug(`status code for ${resource.url} : ${statusCode}`);
470✔
139

140
            // don't have access to initial redirect status can't chain back to the original redirect one, always put 301
141
            if (this.isRedirectStatus(statusCode)) {
470✔
142
              reject(new FetchError(statusCode, new URL(headers.location, resource.url).toString()));
18✔
143
            }
144

145
            // don't proceed further unless we have a valid status
146
            if (!this.isValidStatus(statusCode)) {
470✔
147
              reject(new FetchError(statusCode));
36✔
148
            }
149

150
            const contentType = this.getContentType(headers['content-type']);
470✔
151

152
            const chunks = [];
470✔
153
            const output = new Writable({
470✔
154
              write(chunk, encoding, done) {
155
                chunks.push(Buffer.from(chunk));
452✔
156
                done();
452✔
157
              },
158
            });
159

160
            const onComplete = err => {
470✔
161
              if (err) {
470!
162
                reject(err);
×
163
              }
164
              else {
165
                const buffer = Buffer.concat(chunks);
470✔
166
                resolve({ data: buffer, contentType, status: statusCode });
470✔
167
              }
168
            };
169
            this.logger.debug(res.headers, `response headers for ${resource.url}`);
470✔
170

171
            switch (res.headers['content-encoding']) {
470✔
172
              case 'br':
470!
173
                pipeline(res, zlib.createBrotliDecompress(), output, onComplete);
×
174
                break;
×
175
              case 'gzip':
176
                pipeline(res, zlib.createGunzip(), output, onComplete);
361✔
177
                break;
361✔
178
              case 'deflate':
179
                pipeline(res, zlib.createInflate(), output, onComplete);
×
180
                break;
×
181
              default:
182
                pipeline(res, output, onComplete);
109✔
183
                break;
109✔
184
            }
185
          }
186
          catch (err) {
187
            reject(err);
×
188
          }
189
        });
190

191
        req.setTimeout(this.opts.readTimeout, () => {
471✔
192
          req.destroy(new FetchError(408));
1✔
193
        });
194

195
        req.on('error', err => {
471✔
196
          req.destroy();
1✔
197
          reject(err);
1✔
198
        });
199

200
        req.end();
471✔
201
      }
202
      catch (err) {
203
        reject(err);
×
204
      }
205
    });
206
  }
207
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc