• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

adnsistemas / pdf-lib / #18

24 Mar 2026 08:15PM UTC coverage: 74.286% (+0.3%) from 74.001%
#18

push

David N. Abdala
Documentation change

2569 of 3981 branches covered (64.53%)

Branch coverage included in aggregate %.

7372 of 9401 relevant lines covered (78.42%)

297170.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.64
/src/core/parser/PDFObjectParser.ts
1
import {
54✔
2
  PDFObjectParsingError,
3
  PDFStreamParsingError,
4
  Position,
5
  UnbalancedParenthesisError,
6
} from '../errors';
7
import PDFArray from '../objects/PDFArray';
54✔
8
import PDFBool from '../objects/PDFBool';
54✔
9
import PDFDict, { DictMap } from '../objects/PDFDict';
54✔
10
import PDFHexString from '../objects/PDFHexString';
54✔
11
import PDFName from '../objects/PDFName';
54✔
12
import PDFNull from '../objects/PDFNull';
54✔
13
import PDFNumber from '../objects/PDFNumber';
54✔
14
import PDFObject from '../objects/PDFObject';
15
import PDFRawStream from '../objects/PDFRawStream';
54✔
16
import PDFRef from '../objects/PDFRef';
54✔
17
import PDFStream from '../objects/PDFStream';
18
import PDFString from '../objects/PDFString';
54✔
19
import BaseParser from './BaseParser';
54✔
20
import ByteStream from './ByteStream';
54✔
21
import PDFContext from '../PDFContext';
22
import PDFCatalog from '../structures/PDFCatalog';
54✔
23
import PDFPageLeaf from '../structures/PDFPageLeaf';
54✔
24
import PDFPageTree from '../structures/PDFPageTree';
54✔
25
import CharCodes from '../syntax/CharCodes';
54✔
26
import { IsDelimiter } from '../syntax/Delimiters';
54✔
27
import { Keywords } from '../syntax/Keywords';
54✔
28
import { IsDigit, IsNumeric } from '../syntax/Numeric';
54✔
29
import { IsWhitespace } from '../syntax/Whitespace';
54✔
30
import { arrayAsString, charFromCode } from '../../utils';
54✔
31
import { CipherTransformFactory } from '../crypto';
32
import { isPDFInstance, PDFClasses } from '../../api/objects';
54✔
33

34
// TODO: Throw error if eof is reached before finishing object parse...
35
class PDFObjectParser extends BaseParser {
36
  static forBytes = (
54✔
37
    bytes: Uint8Array,
38
    context: PDFContext,
39
    capNumbers?: boolean,
40
  ) => new PDFObjectParser(ByteStream.of(bytes), context, capNumbers);
214✔
41

42
  static forByteStream = (
54✔
43
    byteStream: ByteStream,
44
    context: PDFContext,
45
    capNumbers = false,
×
46
  ) => new PDFObjectParser(byteStream, context, capNumbers);
×
47

48
  protected readonly context: PDFContext;
49
  private readonly cryptoFactory?: CipherTransformFactory;
50

51
  constructor(
52
    byteStream: ByteStream,
53
    context: PDFContext,
54
    capNumbers = false,
2,228✔
55
    cryptoFactory?: CipherTransformFactory,
56
  ) {
57
    super(byteStream, capNumbers);
2,410✔
58
    this.context = context;
2,410✔
59
    this.cryptoFactory = cryptoFactory;
2,410✔
60
  }
61

62
  // TODO: Is it possible to reduce duplicate parsing for ref lookaheads?
63
  parseObject(ref?: PDFRef): PDFObject {
64
    this.skipWhitespaceAndComments();
6,422,800✔
65

66
    if (this.matchKeyword(Keywords.true)) return PDFBool.True;
6,422,800✔
67
    if (this.matchKeyword(Keywords.false)) return PDFBool.False;
6,414,580✔
68
    if (this.matchKeyword(Keywords.null)) return PDFNull;
6,392,396✔
69

70
    const byte = this.bytes.peek();
1,463,008✔
71

72
    if (
1,463,008✔
73
      byte === CharCodes.LessThan &&
1,645,678✔
74
      this.bytes.peekAhead(1) === CharCodes.LessThan
75
    ) {
76
      return this.parseDictOrStream(ref);
175,646✔
77
    }
78
    if (byte === CharCodes.LessThan) return this.parseHexString(ref);
1,287,362✔
79
    if (byte === CharCodes.LeftParen) return this.parseString(ref);
1,280,338✔
80
    if (byte === CharCodes.ForwardSlash) return this.parseName();
1,265,693✔
81
    if (byte === CharCodes.LeftSquareBracket) return this.parseArray(ref);
996,326✔
82
    if (IsNumeric[byte]) return this.parseNumberOrRef();
891,374✔
83

84
    throw new PDFObjectParsingError(this.bytes.position(), byte);
11✔
85
  }
86

87
  protected parseNumberOrRef(): PDFNumber | PDFRef {
88
    const firstNum = this.parseRawNumber();
891,363✔
89
    this.skipWhitespaceAndComments();
891,361✔
90

91
    const lookaheadStart = this.bytes.offset();
891,361✔
92
    if (IsDigit[this.bytes.peek()]) {
891,361✔
93
      const secondNum = this.parseRawNumber();
698,984✔
94
      this.skipWhitespaceAndComments();
698,984✔
95
      if (this.bytes.peek() === CharCodes.R) {
698,984✔
96
        this.bytes.assertNext(CharCodes.R);
314,813✔
97
        return PDFRef.of(firstNum, secondNum);
314,813✔
98
      }
99
    }
100

101
    this.bytes.moveTo(lookaheadStart);
576,548✔
102
    return PDFNumber.of(firstNum);
576,548✔
103
  }
104

105
  // TODO: Maybe update PDFHexString.of() logic to remove whitespace and validate input?
106
  protected parseHexString(ref?: PDFRef): PDFHexString {
107
    let value = '';
7,024✔
108

109
    this.bytes.assertNext(CharCodes.LessThan);
7,024✔
110
    while (!this.bytes.done() && this.bytes.peek() !== CharCodes.GreaterThan) {
7,024✔
111
      value += charFromCode(this.bytes.next());
358,088✔
112
    }
113
    this.bytes.assertNext(CharCodes.GreaterThan);
7,024✔
114

115
    if (this.cryptoFactory && ref) {
7,024✔
116
      const transformer = this.cryptoFactory.createCipherTransform(
2✔
117
        ref.objectNumber,
118
        ref.generationNumber,
119
      );
120
      const arr = transformer.decryptBytes(PDFHexString.of(value).asBytes());
2✔
121
      value = arr.reduce(
2✔
122
        (str: string, byte: number) => str + byte.toString(16).padStart(2, '0'),
32✔
123
        '',
124
      );
125
    }
126

127
    return PDFHexString.of(value);
7,024✔
128
  }
129

130
  protected parseString(ref?: PDFRef): PDFString {
131
    let nestingLvl = 0;
14,645✔
132
    let isEscaped = false;
14,645✔
133
    let value = '';
14,645✔
134

135
    while (!this.bytes.done()) {
14,645✔
136
      const byte = this.bytes.next();
333,074✔
137
      value += charFromCode(byte);
333,074✔
138

139
      // Check for unescaped parenthesis
140
      if (!isEscaped) {
333,074✔
141
        if (byte === CharCodes.LeftParen) nestingLvl += 1;
332,082✔
142
        if (byte === CharCodes.RightParen) nestingLvl -= 1;
332,082✔
143
      }
144

145
      // Track whether current character is being escaped or not
146
      if (byte === CharCodes.BackSlash) {
333,074✔
147
        isEscaped = !isEscaped;
999✔
148
      } else if (isEscaped) {
332,075✔
149
        isEscaped = false;
985✔
150
      }
151

152
      // Once (if) the unescaped parenthesis balance out, return their contents
153
      if (nestingLvl === 0) {
333,074✔
154
        let actualValue = value.substring(1, value.length - 1);
14,644✔
155

156
        if (this.cryptoFactory && ref) {
14,644✔
157
          const transformer = this.cryptoFactory.createCipherTransform(
8✔
158
            ref.objectNumber,
159
            ref.generationNumber,
160
          );
161
          const decrypted = transformer.decryptBytes(
8✔
162
            PDFString.of(actualValue).asBytes(),
163
          );
164
          actualValue = arrayAsString(decrypted);
8✔
165
        }
166
        // Remove the outer parens so they aren't part of the contents
167
        return PDFString.of(actualValue);
14,644✔
168
      }
169
    }
170

171
    throw new UnbalancedParenthesisError(this.bytes.position());
1✔
172
  }
173

174
  // TODO: Compare performance of string concatenation to charFromCode(...bytes)
175
  // TODO: Maybe preallocate small Uint8Array if can use charFromCode?
176
  protected parseName(): PDFName {
177
    this.bytes.assertNext(CharCodes.ForwardSlash);
990,973✔
178

179
    let name = '';
990,972✔
180
    while (!this.bytes.done()) {
990,972✔
181
      const byte = this.bytes.peek();
5,226,009✔
182
      if (IsWhitespace[byte] || IsDelimiter[byte]) break;
5,226,009✔
183
      name += charFromCode(byte);
4,235,053✔
184
      this.bytes.next();
4,235,053✔
185
    }
186

187
    return PDFName.of(name);
990,972✔
188
  }
189

190
  protected parseArray(ref?: PDFRef): PDFArray {
191
    this.bytes.assertNext(CharCodes.LeftSquareBracket);
104,952✔
192
    this.skipWhitespaceAndComments();
104,952✔
193

194
    const pdfArray = PDFArray.withContext(this.context);
104,952✔
195
    while (this.bytes.peek() !== CharCodes.RightSquareBracket) {
104,952✔
196
      const element = this.parseObject(ref);
5,577,052✔
197
      pdfArray.push(element);
5,577,049✔
198
      this.skipWhitespaceAndComments();
5,577,049✔
199
    }
200
    this.bytes.assertNext(CharCodes.RightSquareBracket);
104,949✔
201
    return pdfArray;
104,949✔
202
  }
203

204
  protected parseDict(ref?: PDFRef): PDFDict {
205
    this.bytes.assertNext(CharCodes.LessThan);
175,770✔
206
    this.bytes.assertNext(CharCodes.LessThan);
175,770✔
207
    this.skipWhitespaceAndComments();
175,770✔
208

209
    const dict: DictMap = new Map();
175,770✔
210

211
    while (
175,770✔
212
      !this.bytes.done() &&
2,516,345✔
213
      this.bytes.peek() !== CharCodes.GreaterThan &&
214
      this.bytes.peekAhead(1) !== CharCodes.GreaterThan
215
    ) {
216
      const key = this.parseName();
721,606✔
217
      const value = this.parseObject(ref);
721,605✔
218
      dict.set(key, value);
721,600✔
219
      this.skipWhitespaceAndComments();
721,600✔
220
    }
221

222
    this.skipWhitespaceAndComments();
175,764✔
223
    this.bytes.assertNext(CharCodes.GreaterThan);
175,764✔
224
    this.bytes.assertNext(CharCodes.GreaterThan);
175,763✔
225

226
    const Type = dict.get(PDFName.of('Type'));
175,762✔
227

228
    if (Type === PDFName.of('Catalog')) {
175,762✔
229
      return PDFCatalog.fromMapWithContext(dict, this.context);
202✔
230
    } else if (Type === PDFName.of('Pages')) {
175,560✔
231
      return PDFPageTree.fromMapWithContext(dict, this.context);
337✔
232
    } else if (Type === PDFName.of('Page')) {
175,223✔
233
      return PDFPageLeaf.fromMapWithContext(dict, this.context);
1,110✔
234
    } else {
235
      return PDFDict.fromMapWithContext(dict, this.context);
174,113✔
236
    }
237
  }
238

239
  protected parseDictOrStream(ref?: PDFRef): PDFDict | PDFStream {
240
    const startPos = this.bytes.position();
175,646✔
241

242
    const dict = this.parseDict(ref);
175,646✔
243

244
    this.skipWhitespaceAndComments();
175,638✔
245

246
    if (
175,638✔
247
      !this.matchKeyword(Keywords.streamEOF1) &&
797,249✔
248
      !this.matchKeyword(Keywords.streamEOF2) &&
249
      !this.matchKeyword(Keywords.streamEOF3) &&
250
      !this.matchKeyword(Keywords.streamEOF4) &&
251
      !this.matchKeyword(Keywords.stream)
252
    ) {
253
      return dict;
148,053✔
254
    }
255

256
    const start = this.bytes.offset();
27,585✔
257
    let end: number;
258

259
    const Length = dict.get(PDFName.of('Length'));
27,585✔
260
    if (isPDFInstance(Length, PDFClasses.PDFNumber)) {
27,585✔
261
      end = start + (Length as PDFNumber).asNumber();
27,064✔
262
      this.bytes.moveTo(end);
27,064✔
263
      this.skipWhitespaceAndComments();
27,064✔
264
      if (!this.matchKeyword(Keywords.endstream)) {
27,064✔
265
        this.bytes.moveTo(start);
3✔
266
        end = this.findEndOfStreamFallback(startPos);
3✔
267
      }
268
    } else {
269
      end = this.findEndOfStreamFallback(startPos);
521✔
270
    }
271

272
    let contents = this.bytes.slice(start, end);
27,585✔
273

274
    if (this.cryptoFactory && ref) {
27,585✔
275
      const transform = this.cryptoFactory.createCipherTransform(
4✔
276
        ref.objectNumber,
277
        ref.generationNumber,
278
      );
279
      contents = transform.decryptBytes(contents);
4✔
280
    }
281

282
    return PDFRawStream.of(dict, contents);
27,585✔
283
  }
284

285
  protected findEndOfStreamFallback(startPos: Position) {
286
    // Move to end of stream, while handling nested streams
287
    let nestingLvl = 1;
524✔
288
    let end = this.bytes.offset();
524✔
289

290
    while (!this.bytes.done()) {
524✔
291
      end = this.bytes.offset();
1,143,810✔
292

293
      if (this.matchKeyword(Keywords.stream)) {
1,143,810✔
294
        nestingLvl += 1;
1✔
295
      } else if (
1,143,809✔
296
        this.matchKeyword(Keywords.EOF1endstream) ||
4,574,698✔
297
        this.matchKeyword(Keywords.EOF2endstream) ||
298
        this.matchKeyword(Keywords.EOF3endstream) ||
299
        this.matchKeyword(Keywords.endstream)
300
      ) {
301
        nestingLvl -= 1;
525✔
302
      } else {
303
        this.bytes.next();
1,143,284✔
304
      }
305

306
      if (nestingLvl === 0) break;
1,143,810✔
307
    }
308

309
    if (nestingLvl !== 0) throw new PDFStreamParsingError(startPos);
524!
310

311
    return end;
524✔
312
  }
313
}
314

315
export default PDFObjectParser;
54✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc