• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PHPOffice / PhpSpreadsheet / 22907609982

10 Mar 2026 02:31PM UTC coverage: 96.895% (-0.009%) from 96.904%
22907609982

Pull #4827

github

web-flow
Merge a418af938 into a1dacfdf7
Pull Request #4827: Stream encoding conversion in CSV reader to reduce peak memory

30 of 35 new or added lines in 1 file covered. (85.71%)

5 existing lines in 1 file now uncovered.

47717 of 49246 relevant lines covered (96.9%)

384.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.54
/src/PhpSpreadsheet/Reader/Csv.php
1
<?php
2

3
namespace PhpOffice\PhpSpreadsheet\Reader;
4

5
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
6
use PhpOffice\PhpSpreadsheet\Cell\Cell;
7
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
8
use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
9
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
10
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
11
use PhpOffice\PhpSpreadsheet\Spreadsheet;
12
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
13
use Throwable;
14

15
class Csv extends BaseReader
16
{
17
    const DEFAULT_FALLBACK_ENCODING = 'CP1252';
18
    const GUESS_ENCODING = 'guess';
19
    const UTF8_BOM = "\xEF\xBB\xBF";
20
    const UTF8_BOM_LEN = 3;
21
    const UTF16BE_BOM = "\xfe\xff";
22
    const UTF16BE_BOM_LEN = 2;
23
    const UTF16BE_LF = "\x00\x0a";
24
    const UTF16LE_BOM = "\xff\xfe";
25
    const UTF16LE_BOM_LEN = 2;
26
    const UTF16LE_LF = "\x0a\x00";
27
    const UTF32BE_BOM = "\x00\x00\xfe\xff";
28
    const UTF32BE_BOM_LEN = 4;
29
    const UTF32BE_LF = "\x00\x00\x00\x0a";
30
    const UTF32LE_BOM = "\xff\xfe\x00\x00";
31
    const UTF32LE_BOM_LEN = 4;
32
    const UTF32LE_LF = "\x0a\x00\x00\x00";
33

34
    /**
35
     * Input encoding.
36
     */
37
    private string $inputEncoding = 'UTF-8';
38

39
    /**
40
     * Fallback encoding if guess strikes out.
41
     */
42
    private string $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
43

44
    /**
45
     * Delimiter.
46
     */
47
    private ?string $delimiter = null;
48

49
    /**
50
     * Enclosure.
51
     */
52
    private string $enclosure = '"';
53

54
    /**
55
     * Sheet index to read.
56
     */
57
    private int $sheetIndex = 0;
58

59
    /**
60
     * Load rows contiguously.
61
     */
62
    private bool $contiguous = false;
63

64
    /**
65
     * The character that can escape the enclosure.
66
     * This will probably become unsupported in Php 9.
67
     * Not yet ready to mark deprecated in order to give users
68
     * a migration path.
69
     */
70
    private ?string $escapeCharacter = null;
71

72
    /**
73
     * Callback for setting defaults in construction.
74
     *
75
     * @var ?callable
76
     */
77
    private static $constructorCallback;
78

79
    /** Changed from true to false in release 4.0.0 */
80
    public const DEFAULT_TEST_AUTODETECT = false;
81

82
    /**
83
     * Attempt autodetect line endings (deprecated after PHP8.1)?
84
     */
85
    private bool $testAutodetect = self::DEFAULT_TEST_AUTODETECT;
86

87
    protected bool $castFormattedNumberToNumeric = false;
88

89
    protected bool $preserveNumericFormatting = false;
90

91
    private bool $preserveNullString = false;
92

93
    private bool $sheetNameIsFileName = false;
94

95
    private string $getTrue = 'true';
96

97
    private string $getFalse = 'false';
98

99
    private string $thousandsSeparator = ',';
100

101
    private string $decimalSeparator = '.';
102

103
    /**
104
     * Create a new CSV Reader instance.
105
     */
106
    public function __construct()
179✔
107
    {
108
        parent::__construct();
179✔
109
        $callback = self::$constructorCallback;
179✔
110
        if ($callback !== null) {
179✔
111
            $callback($this);
5✔
112
        }
113
    }
114

115
    /**
116
     * Set a callback to change the defaults.
117
     *
118
     * The callback must accept the Csv Reader object as the first parameter,
119
     * and it should return void.
120
     */
121
    public static function setConstructorCallback(?callable $callback): void
6✔
122
    {
123
        self::$constructorCallback = $callback;
6✔
124
    }
125

126
    public static function getConstructorCallback(): ?callable
1✔
127
    {
128
        return self::$constructorCallback;
1✔
129
    }
130

131
    public function setInputEncoding(string $encoding): self
65✔
132
    {
133
        $this->inputEncoding = $encoding;
65✔
134

135
        return $this;
65✔
136
    }
137

138
    public function getInputEncoding(): string
1✔
139
    {
140
        return $this->inputEncoding;
1✔
141
    }
142

143
    public function setFallbackEncoding(string $fallbackEncoding): self
5✔
144
    {
145
        $this->fallbackEncoding = $fallbackEncoding;
5✔
146

147
        return $this;
5✔
148
    }
149

150
    public function getFallbackEncoding(): string
1✔
151
    {
152
        return $this->fallbackEncoding;
1✔
153
    }
154

155
    /**
156
     * Move filepointer past any BOM marker.
157
     */
158
    protected function skipBOM(): void
155✔
159
    {
160
        rewind($this->fileHandle);
155✔
161

162
        if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
155✔
163
            rewind($this->fileHandle);
139✔
164
        }
165
    }
166

167
    /**
168
     * Identify any separator that is explicitly set in the file.
169
     */
170
    protected function checkSeparator(): void
155✔
171
    {
172
        $line = fgets($this->fileHandle);
155✔
173
        if ($line === false) {
155✔
174
            return;
3✔
175
        }
176

177
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
153✔
178
            $this->delimiter = substr($line, 4, 1);
3✔
179

180
            return;
3✔
181
        }
182

183
        $this->skipBOM();
151✔
184
    }
185

186
    /**
187
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
188
     */
189
    protected function inferSeparator(): void
155✔
190
    {
191
        $temp = $this->delimiter;
155✔
192
        if ($temp !== null) {
155✔
193
            return;
23✔
194
        }
195

196
        $inferenceEngine = new Delimiter($this->fileHandle, $this->getEscapeCharacter(), $this->enclosure);
144✔
197

198
        // If number of lines is 0, nothing to infer : fall back to the default
199
        if ($inferenceEngine->linesCounted() === 0) {
144✔
200
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
3✔
201
            $this->skipBOM();
3✔
202

203
            return;
3✔
204
        }
205

206
        $this->delimiter = $inferenceEngine->infer();
142✔
207

208
        // If no delimiter could be detected, fall back to the default
209
        if ($this->delimiter === null) {
142✔
210
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
12✔
211
        }
212

213
        $this->skipBOM();
142✔
214
    }
215

216
    /**
217
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
218
     *
219
     * @return array<int, array{worksheetName: string, lastColumnLetter: string, lastColumnIndex: int, totalRows: int, totalColumns: int, sheetState: string}>
220
     */
221
    public function listWorksheetInfo(string $filename): array
13✔
222
    {
223
        // Open file
224
        $this->openFileOrMemory($filename);
13✔
225
        $fileHandle = $this->fileHandle;
12✔
226

227
        // Skip BOM, if any
228
        $this->skipBOM();
12✔
229
        $this->checkSeparator();
12✔
230
        $this->inferSeparator();
12✔
231

232
        $worksheetInfo = [
12✔
233
            [
12✔
234
                'worksheetName' => 'Worksheet',
12✔
235
                'lastColumnLetter' => 'A',
12✔
236
                'lastColumnIndex' => 0,
12✔
237
                'totalRows' => 0,
12✔
238
                'totalColumns' => 0,
12✔
239
            ],
12✔
240
        ];
12✔
241
        $delimiter = $this->delimiter ?? '';
12✔
242

243
        // Loop through each line of the file in turn
244
        $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
12✔
245
        while (is_array($rowData)) {
12✔
246
            ++$worksheetInfo[0]['totalRows'];
12✔
247
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
12✔
248
            $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
12✔
249
        }
250

251
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1, true);
12✔
252
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
12✔
253
        $worksheetInfo[0]['sheetState'] = Worksheet::SHEETSTATE_VISIBLE;
12✔
254

255
        // Close file
256
        fclose($fileHandle);
12✔
257

258
        return $worksheetInfo;
12✔
259
    }
260

261
    /**
262
     * Loads Spreadsheet from file.
263
     */
264
    protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
139✔
265
    {
266
        $spreadsheet = $this->newSpreadsheet();
139✔
267
        $spreadsheet->setValueBinder($this->valueBinder);
139✔
268

269
        // Load into this instance
270
        return $this->loadIntoExisting($filename, $spreadsheet);
139✔
271
    }
272

273
    /**
274
     * Loads Spreadsheet from string.
275
     */
276
    public function loadSpreadsheetFromString(string $contents): Spreadsheet
4✔
277
    {
278
        $spreadsheet = $this->newSpreadsheet();
4✔
279
        $spreadsheet->setValueBinder($this->valueBinder);
4✔
280

281
        // Load into this instance
282
        return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true);
4✔
283
    }
284

285
    /**
286
     * Size of each chunk when streaming encoding conversion.
287
     * Aligned to a multiple of 4 so UTF-16/UTF-32 character
288
     * boundaries are never split.
289
     */
290
    private const CHUNK_SIZE = 65536;
291

292
    private function openFileOrMemory(string $filename): void
154✔
293
    {
294
        // Open file
295
        $fhandle = $this->canRead($filename);
154✔
296
        if (!$fhandle) {
154✔
297
            throw new ReaderException($filename . ' is an Invalid Spreadsheet file.');
3✔
298
        }
299
        if ($this->inputEncoding === 'UTF-8') {
151✔
300
            $encoding = self::guessEncodingBom($filename);
95✔
301
            if ($encoding !== '') {
95✔
302
                $this->inputEncoding = $encoding;
7✔
303
            }
304
        }
305
        if ($this->inputEncoding === self::GUESS_ENCODING) {
151✔
306
            $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
23✔
307
        }
308
        $this->openFile($filename);
151✔
309
        if ($this->inputEncoding !== 'UTF-8') {
151✔
310
            fclose($this->fileHandle);
54✔
311
            $this->convertEncodingStreaming($filename);
54✔
312
        }
313
    }
314

315
    /**
316
     * Convert file encoding to UTF-8 using chunked streaming to avoid
317
     * loading the entire file into memory at once.
318
     */
319
    private function convertEncodingStreaming(string $filename): void
54✔
320
    {
321
        $sourceHandle = fopen($filename, 'rb');
54✔
322
        $outputHandle = fopen('php://temp', 'r+b');
54✔
323
        if ($sourceHandle === false || $outputHandle === false) {
54✔
324
            // @codeCoverageIgnoreStart
325
            if ($sourceHandle !== false) {
326
                fclose($sourceHandle);
327
            }
328

329
            return;
330
            // @codeCoverageIgnoreEnd
331
        }
332

333
        $encoding = $this->inputEncoding;
54✔
334
        $charWidth = $this->encodingCharWidth($encoding);
54✔
335
        // Ensure chunk size is aligned to character width
336
        $chunkSize = self::CHUNK_SIZE - (self::CHUNK_SIZE % $charWidth);
54✔
337

338
        $leftover = '';
54✔
339
        while (!feof($sourceHandle)) {
54✔
340
            $rawChunk = fread($sourceHandle, $chunkSize);
54✔
341
            if ($rawChunk === false || $rawChunk === '') {
54✔
NEW
UNCOV
342
                break;
×
343
            }
344

345
            $chunk = $leftover . $rawChunk;
54✔
346
            $leftover = '';
54✔
347

348
            if ($charWidth > 1) {
54✔
349
                // For fixed-width multi-byte encodings (UTF-16, UTF-32),
350
                // ensure we don't split in the middle of a character
351
                $remainder = strlen($chunk) % $charWidth;
39✔
352
                if ($remainder !== 0) {
39✔
NEW
UNCOV
353
                    $leftover = substr($chunk, -$remainder);
×
NEW
UNCOV
354
                    $chunk = substr($chunk, 0, -$remainder);
×
355
                }
356
            } else {
357
                // For variable-width encodings (e.g. UTF-8 source, though
358
                // this path is for non-UTF-8), and single-byte encodings
359
                // (ISO-8859-*, CP1252), no boundary adjustment needed.
360
                // Single-byte encodings have 1:1 byte-to-character mapping.
361
            }
362

363
            if ($chunk !== '') {
54✔
364
                $converted = StringHelper::convertEncoding($chunk, 'UTF-8', $encoding);
54✔
365
                fwrite($outputHandle, $converted);
54✔
366
            }
367
        }
368

369
        // Flush any remaining bytes
370
        if ($leftover !== '') {
54✔
NEW
UNCOV
371
            $converted = StringHelper::convertEncoding($leftover, 'UTF-8', $encoding);
×
NEW
UNCOV
372
            fwrite($outputHandle, $converted);
×
373
        }
374

375
        fclose($sourceHandle);
54✔
376
        $this->fileHandle = $outputHandle;
54✔
377
        $this->skipBOM();
54✔
378
    }
379

380
    /**
381
     * Return the byte width of a single character in the given encoding.
382
     * Returns 1 for variable-width or single-byte encodings.
383
     */
384
    private function encodingCharWidth(string $encoding): int
54✔
385
    {
386
        return match (strtoupper($encoding)) {
54✔
387
            'UTF-32', 'UTF-32BE', 'UTF-32LE', 'UCS-4', 'UCS-4BE', 'UCS-4LE' => 4,
19✔
388
            'UTF-16', 'UTF-16BE', 'UTF-16LE', 'UCS-2', 'UCS-2BE', 'UCS-2LE' => 2,
21✔
389
            default => 1,
54✔
390
        };
54✔
391
    }
392

393
    public function setTestAutoDetect(bool $value): self
4✔
394
    {
395
        $this->testAutodetect = $value;
4✔
396

397
        return $this;
4✔
398
    }
399

400
    private function setAutoDetect(?string $value, int $version = PHP_VERSION_ID): ?string
146✔
401
    {
402
        $retVal = null;
146✔
403
        if ($value !== null && $this->testAutodetect && $version < 90000) {
146✔
404
            $retVal2 = @ini_set('auto_detect_line_endings', $value);
4✔
405
            if (is_string($retVal2)) {
4✔
406
                $retVal = $retVal2;
4✔
407
            }
408
        }
409

410
        return $retVal;
146✔
411
    }
412

413
    public function castFormattedNumberToNumeric(
20✔
414
        bool $castFormattedNumberToNumeric,
415
        bool $preserveNumericFormatting = false
416
    ): void {
417
        $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric;
20✔
418
        $this->preserveNumericFormatting = $preserveNumericFormatting;
20✔
419
    }
420

421
    /**
422
     * Open data uri for reading.
423
     */
424
    private function openDataUri(string $filename): void
4✔
425
    {
426
        $fileHandle = fopen($filename, 'rb');
4✔
427
        if ($fileHandle === false) {
4✔
428
            // @codeCoverageIgnoreStart
429
            throw new ReaderException('Could not open file ' . $filename . ' for reading.');
430
            // @codeCoverageIgnoreEnd
431
        }
432

433
        $this->fileHandle = $fileHandle;
4✔
434
    }
435

436
    /**
437
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
438
     */
439
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
142✔
440
    {
441
        return $this->loadStringOrFile($filename, $spreadsheet, false);
142✔
442
    }
443

444
    /**
445
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
446
     */
447
    private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet
146✔
448
    {
449
        // Deprecated in Php8.1
450
        $iniset = $this->setAutoDetect('1');
146✔
451

452
        try {
453
            $this->loadStringOrFile2($filename, $spreadsheet, $dataUri);
146✔
454
            $this->setAutoDetect($iniset);
144✔
455
        } catch (Throwable $e) {
2✔
456
            $this->setAutoDetect($iniset);
2✔
457

458
            throw $e;
2✔
459
        }
460

461
        return $spreadsheet;
144✔
462
    }
463

464
    private function loadStringOrFile2(string $filename, Spreadsheet $spreadsheet, bool $dataUri): void
146✔
465
    {
466

467
        // Open file
468
        if ($dataUri) {
146✔
469
            $this->openDataUri($filename);
4✔
470
        } else {
471
            $this->openFileOrMemory($filename);
142✔
472
        }
473
        $fileHandle = $this->fileHandle;
144✔
474

475
        // Skip BOM, if any
476
        $this->skipBOM();
144✔
477
        $this->checkSeparator();
144✔
478
        $this->inferSeparator();
144✔
479

480
        // Create new PhpSpreadsheet object
481
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
144✔
482
            $spreadsheet->createSheet();
4✔
483
        }
484
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
144✔
485
        if ($this->sheetNameIsFileName) {
144✔
486
            $sheet->setTitle(substr(basename($filename, '.csv'), 0, Worksheet::SHEET_TITLE_MAXIMUM_LENGTH));
4✔
487
        }
488

489
        // Set our starting row based on whether we're in contiguous mode or not
490
        $currentRow = 1;
144✔
491
        $outRow = 0;
144✔
492

493
        // Loop through each line of the file in turn
494
        $delimiter = $this->delimiter ?? '';
144✔
495
        $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
144✔
496
        $valueBinder = $this->valueBinder ?? Cell::getValueBinder();
144✔
497
        $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
144✔
498
        $this->getTrue = Calculation::getTRUE();
144✔
499
        $this->getFalse = Calculation::getFALSE();
144✔
500
        $this->thousandsSeparator = StringHelper::getThousandsSeparator();
144✔
501
        $this->decimalSeparator = StringHelper::getDecimalSeparator();
144✔
502
        while (is_array($rowData)) {
144✔
503
            $noOutputYet = true;
142✔
504
            $columnLetter = 'A';
142✔
505
            foreach ($rowData as $rowDatum) {
142✔
506
                if ($preserveBooleanString) {
142✔
507
                    $rowDatum = $rowDatum ?? '';
5✔
508
                } else {
509
                    $this->convertBoolean($rowDatum);
138✔
510
                }
511
                $numberFormatMask = $this->castFormattedNumberToNumeric ? $this->convertFormattedNumber($rowDatum) : '';
142✔
512
                if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
142✔
513
                    if ($this->contiguous) {
141✔
514
                        if ($noOutputYet) {
3✔
515
                            $noOutputYet = false;
3✔
516
                            ++$outRow;
3✔
517
                        }
518
                    } else {
519
                        $outRow = $currentRow;
138✔
520
                    }
521
                    // Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
522
                    if ($numberFormatMask !== '') {
141✔
523
                        $sheet->getStyle($columnLetter . $outRow)
7✔
524
                            ->getNumberFormat()
7✔
525
                            ->setFormatCode($numberFormatMask);
7✔
526
                    }
527
                    // Set cell value
528
                    $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
141✔
529
                }
530
                StringHelper::stringIncrement($columnLetter);
142✔
531
            }
532
            $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
142✔
533
            ++$currentRow;
142✔
534
        }
535

536
        // Close file
537
        fclose($fileHandle);
144✔
538
    }
539

540
    /**
541
     * Convert string true/false to boolean, and null to null-string.
542
     */
543
    private function convertBoolean(mixed &$rowDatum): void
138✔
544
    {
545
        if (is_string($rowDatum)) {
138✔
546
            if (strcasecmp($this->getTrue, $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
137✔
547
                $rowDatum = true;
12✔
548
            } elseif (strcasecmp($this->getFalse, $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
137✔
549
                $rowDatum = false;
12✔
550
            }
551
        } else {
552
            $rowDatum = $rowDatum ?? '';
1✔
553
        }
554
    }
555

556
    /**
557
     * Convert numeric strings to int or float values.
558
     */
559
    private function convertFormattedNumber(mixed &$rowDatum): string
20✔
560
    {
561
        $numberFormatMask = '';
20✔
562
        if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
20✔
563
            $numeric = str_replace(
20✔
564
                [$this->thousandsSeparator, $this->decimalSeparator],
20✔
565
                ['', '.'],
20✔
566
                $rowDatum
20✔
567
            );
20✔
568

569
            if (is_numeric($numeric)) {
20✔
570
                $decimalPos = strpos($rowDatum, $this->decimalSeparator);
20✔
571
                if ($this->preserveNumericFormatting === true) {
20✔
572
                    $numberFormatMask = (str_contains($rowDatum, $this->thousandsSeparator))
7✔
573
                        ? '#,##0' : '0';
7✔
574
                    if ($decimalPos !== false) {
7✔
575
                        $decimals = strlen($rowDatum) - $decimalPos - 1;
7✔
576
                        $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6));
7✔
577
                    }
578
                }
579

580
                $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric;
20✔
581
            }
582
        }
583

584
        return $numberFormatMask;
20✔
585
    }
586

587
    public function getDelimiter(): ?string
14✔
588
    {
589
        return $this->delimiter;
14✔
590
    }
591

592
    public function setDelimiter(?string $delimiter): self
11✔
593
    {
594
        $this->delimiter = $delimiter;
11✔
595

596
        return $this;
11✔
597
    }
598

599
    public function getEnclosure(): string
2✔
600
    {
601
        return $this->enclosure;
2✔
602
    }
603

604
    public function setEnclosure(string $enclosure): self
10✔
605
    {
606
        if ($enclosure == '') {
10✔
607
            $enclosure = '"';
3✔
608
        }
609
        $this->enclosure = $enclosure;
10✔
610

611
        return $this;
10✔
612
    }
613

614
    public function getSheetIndex(): int
1✔
615
    {
616
        return $this->sheetIndex;
1✔
617
    }
618

619
    public function setSheetIndex(int $indexValue): self
5✔
620
    {
621
        $this->sheetIndex = $indexValue;
5✔
622

623
        return $this;
5✔
624
    }
625

626
    public function setContiguous(bool $contiguous): self
3✔
627
    {
628
        $this->contiguous = $contiguous;
3✔
629

630
        return $this;
3✔
631
    }
632

633
    public function getContiguous(): bool
1✔
634
    {
635
        return $this->contiguous;
1✔
636
    }
637

638
    /**
639
     * Php9 intends to drop support for this parameter in fgetcsv.
640
     * Not yet ready to mark deprecated in order to give users
641
     * a migration path.
642
     */
643
    public function setEscapeCharacter(string $escapeCharacter, int $version = PHP_VERSION_ID): self
11✔
644
    {
645
        if ($version >= 90000 && $escapeCharacter !== '') {
11✔
646
            throw new ReaderException('Escape character must be null string for Php9+');
2✔
647
        }
648

649
        $this->escapeCharacter = $escapeCharacter;
9✔
650

651
        return $this;
9✔
652
    }
653

654
    public function getEscapeCharacter(int $version = PHP_VERSION_ID): string
144✔
655
    {
656
        return $this->escapeCharacter ?? self::getDefaultEscapeCharacter($version);
144✔
657
    }
658

659
    /**
660
     * Can the current IReader read the file?
661
     */
662
    public function canRead(string $filename): bool
170✔
663
    {
664
        // Check if file exists
665
        try {
666
            $this->openFile($filename);
170✔
667
        } catch (ReaderException) {
3✔
668
            return false;
3✔
669
        }
670

671
        fclose($this->fileHandle);
167✔
672

673
        // Trust file extension if any
674
        $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
167✔
675
        if (in_array($extension, ['csv', 'tsv'])) {
167✔
676
            return true;
133✔
677
        }
678

679
        // Attempt to guess mimetype
680
        $type = mime_content_type($filename);
35✔
681
        $supportedTypes = [
35✔
682
            'application/csv',
35✔
683
            'text/csv',
35✔
684
            'text/plain',
35✔
685
            'inode/x-empty',
35✔
686
            'application/x-empty', // has now replaced previous
35✔
687
            'text/html',
35✔
688
        ];
35✔
689

690
        return in_array($type, $supportedTypes, true);
35✔
691
    }
692

693
    private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
26✔
694
    {
695
        if ($encoding === '') {
26✔
696
            $pos = strpos($contents, $compare);
26✔
697
            if ($pos !== false && $pos % strlen($compare) === 0) {
26✔
698
                $encoding = $setEncoding;
15✔
699
            }
700
        }
701
    }
702

703
    private static function guessEncodingNoBom(string $filename): string
26✔
704
    {
705
        $encoding = '';
26✔
706
        $contents = (string) file_get_contents($filename);
26✔
707
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
26✔
708
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
26✔
709
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
26✔
710
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
26✔
711
        if ($encoding === '' && preg_match('//u', $contents) === 1) {
26✔
712
            $encoding = 'UTF-8';
4✔
713
        }
714

715
        return $encoding;
26✔
716
    }
717

718
    private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
1,640✔
719
    {
720
        if ($encoding === '') {
1,640✔
721
            if (str_starts_with($first4, $compare)) {
1,640✔
722
                $encoding = $setEncoding;
38✔
723
            }
724
        }
725
    }
726

727
    public static function guessEncodingBom(string $filename, ?string $convertString = null): string
1,640✔
728
    {
729
        $encoding = '';
1,640✔
730
        $first4 = $convertString ?? (string) file_get_contents($filename, false, null, 0, 4);
1,640✔
731
        self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
1,640✔
732
        self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
1,640✔
733
        self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
1,640✔
734
        self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
1,640✔
735
        self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
1,640✔
736

737
        return $encoding;
1,640✔
738
    }
739

740
    public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
36✔
741
    {
742
        $encoding = self::guessEncodingBom($filename);
36✔
743
        if ($encoding === '') {
36✔
744
            $encoding = self::guessEncodingNoBom($filename);
26✔
745
        }
746

747
        return ($encoding === '') ? $dflt : $encoding;
36✔
748
    }
749

750
    public function setPreserveNullString(bool $value): self
1✔
751
    {
752
        $this->preserveNullString = $value;
1✔
753

754
        return $this;
1✔
755
    }
756

757
    public function getPreserveNullString(): bool
1✔
758
    {
759
        return $this->preserveNullString;
1✔
760
    }
761

762
    public function setSheetNameIsFileName(bool $sheetNameIsFileName): self
4✔
763
    {
764
        $this->sheetNameIsFileName = $sheetNameIsFileName;
4✔
765

766
        return $this;
4✔
767
    }
768

769
    /**
770
     * Php8.4 deprecates use of anything other than null string
771
     * as escape Character.
772
     *
773
     * @param resource $stream
774
     * @param null|int<0, max> $length
775
     *
776
     * @return array<int,?string>|false
777
     */
778
    private static function getCsv(
155✔
779
        $stream,
780
        ?int $length = null,
781
        string $separator = ',',
782
        string $enclosure = '"',
783
        ?string $escape = null,
784
        int $version = PHP_VERSION_ID
785
    ): array|false {
786
        $escape = $escape ?? self::getDefaultEscapeCharacter();
155✔
787
        if ($version >= 80400 && $escape !== '') {
155✔
788
            return @fgetcsv($stream, $length, $separator, $enclosure, $escape);
150✔
789
        }
790

791
        return fgetcsv($stream, $length, $separator, $enclosure, $escape);
6✔
792
    }
793

794
    public static function affectedByPhp9(
2✔
795
        string $filename,
796
        string $inputEncoding = 'UTF-8',
797
        ?string $delimiter = null,
798
        string $enclosure = '"',
799
        string $escapeCharacter = '\\',
800
        int $version = PHP_VERSION_ID
801
    ): bool {
802
        if ($version < 70400 || $version >= 90000) {
2✔
803
            throw new ReaderException('Function valid only for Php7.4 or Php8');
1✔
804
        }
805
        $reader1 = new self();
1✔
806
        $reader1->setInputEncoding($inputEncoding)
1✔
807
            ->setTestAutoDetect(true)
1✔
808
            ->setEscapeCharacter($escapeCharacter)
1✔
809
            ->setDelimiter($delimiter)
1✔
810
            ->setEnclosure($enclosure);
1✔
811
        $spreadsheet1 = $reader1->load($filename);
1✔
812
        $sheet1 = $spreadsheet1->getActiveSheet();
1✔
813
        $array1 = $sheet1->toArray(null, false, false);
1✔
814
        $spreadsheet1->disconnectWorksheets();
1✔
815

816
        $reader2 = new self();
1✔
817
        $reader2->setInputEncoding($inputEncoding)
1✔
818
            ->setTestAutoDetect(false)
1✔
819
            ->setEscapeCharacter('')
1✔
820
            ->setDelimiter($delimiter)
1✔
821
            ->setEnclosure($enclosure);
1✔
822
        $spreadsheet2 = $reader2->load($filename);
1✔
823
        $sheet2 = $spreadsheet2->getActiveSheet();
1✔
824
        $array2 = $sheet2->toArray(null, false, false);
1✔
825
        $spreadsheet2->disconnectWorksheets();
1✔
826

827
        return $array1 !== $array2;
1✔
828
    }
829

830
    /**
831
     * The character that will be supplied to fgetcsv
832
     * when escapeCharacter is null.
833
     * It is anticipated that it will conditionally be set
834
     * to null-string for Php9 and above.
835
     */
836
    private static function getDefaultEscapeCharacter(int $version = PHP_VERSION_ID): string
146✔
837
    {
838
        return $version < 90000 ? '\\' : '';
146✔
839
    }
840
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc