• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PHPOffice / PhpSpreadsheet / 22941966561

11 Mar 2026 07:39AM UTC coverage: 96.897% (-0.007%) from 96.904%
22941966561

Pull #4827

github

web-flow
Merge 545cafb7f into a1dacfdf7
Pull Request #4827: Stream encoding conversion in CSV reader to reduce peak memory

31 of 35 new or added lines in 1 file covered. (88.57%)

4 existing lines in 1 file now uncovered.

47718 of 49246 relevant lines covered (96.9%)

384.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.83
/src/PhpSpreadsheet/Reader/Csv.php
1
<?php
2

3
namespace PhpOffice\PhpSpreadsheet\Reader;
4

5
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
6
use PhpOffice\PhpSpreadsheet\Cell\Cell;
7
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
8
use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
9
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
10
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
11
use PhpOffice\PhpSpreadsheet\Spreadsheet;
12
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
13
use Throwable;
14

15
class Csv extends BaseReader
16
{
17
    const DEFAULT_FALLBACK_ENCODING = 'CP1252';
18
    const GUESS_ENCODING = 'guess';
19
    const UTF8_BOM = "\xEF\xBB\xBF";
20
    const UTF8_BOM_LEN = 3;
21
    const UTF16BE_BOM = "\xfe\xff";
22
    const UTF16BE_BOM_LEN = 2;
23
    const UTF16BE_LF = "\x00\x0a";
24
    const UTF16LE_BOM = "\xff\xfe";
25
    const UTF16LE_BOM_LEN = 2;
26
    const UTF16LE_LF = "\x0a\x00";
27
    const UTF32BE_BOM = "\x00\x00\xfe\xff";
28
    const UTF32BE_BOM_LEN = 4;
29
    const UTF32BE_LF = "\x00\x00\x00\x0a";
30
    const UTF32LE_BOM = "\xff\xfe\x00\x00";
31
    const UTF32LE_BOM_LEN = 4;
32
    const UTF32LE_LF = "\x0a\x00\x00\x00";
33

34
    /**
35
     * Input encoding.
36
     */
37
    private string $inputEncoding = 'UTF-8';
38

39
    /**
40
     * Fallback encoding if guess strikes out.
41
     */
42
    private string $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
43

44
    /**
45
     * Delimiter.
46
     */
47
    private ?string $delimiter = null;
48

49
    /**
50
     * Enclosure.
51
     */
52
    private string $enclosure = '"';
53

54
    /**
55
     * Sheet index to read.
56
     */
57
    private int $sheetIndex = 0;
58

59
    /**
60
     * Load rows contiguously.
61
     */
62
    private bool $contiguous = false;
63

64
    /**
65
     * The character that can escape the enclosure.
66
     * This will probably become unsupported in Php 9.
67
     * Not yet ready to mark deprecated in order to give users
68
     * a migration path.
69
     */
70
    private ?string $escapeCharacter = null;
71

72
    /**
73
     * Callback for setting defaults in construction.
74
     *
75
     * @var ?callable
76
     */
77
    private static $constructorCallback;
78

79
    /** Changed from true to false in release 4.0.0 */
80
    public const DEFAULT_TEST_AUTODETECT = false;
81

82
    /**
83
     * Attempt autodetect line endings (deprecated after PHP8.1)?
84
     */
85
    private bool $testAutodetect = self::DEFAULT_TEST_AUTODETECT;
86

87
    protected bool $castFormattedNumberToNumeric = false;
88

89
    protected bool $preserveNumericFormatting = false;
90

91
    private bool $preserveNullString = false;
92

93
    private bool $sheetNameIsFileName = false;
94

95
    private string $getTrue = 'true';
96

97
    private string $getFalse = 'false';
98

99
    private string $thousandsSeparator = ',';
100

101
    private string $decimalSeparator = '.';
102

103
    /**
104
     * Create a new CSV Reader instance.
105
     */
106
    public function __construct()
181✔
107
    {
108
        parent::__construct();
181✔
109
        $callback = self::$constructorCallback;
181✔
110
        if ($callback !== null) {
181✔
111
            $callback($this);
5✔
112
        }
113
    }
114

115
    /**
116
     * Set a callback to change the defaults.
117
     *
118
     * The callback must accept the Csv Reader object as the first parameter,
119
     * and it should return void.
120
     */
121
    public static function setConstructorCallback(?callable $callback): void
6✔
122
    {
123
        self::$constructorCallback = $callback;
6✔
124
    }
125

126
    public static function getConstructorCallback(): ?callable
1✔
127
    {
128
        return self::$constructorCallback;
1✔
129
    }
130

131
    public function setInputEncoding(string $encoding): self
67✔
132
    {
133
        $this->inputEncoding = $encoding;
67✔
134

135
        return $this;
67✔
136
    }
137

138
    public function getInputEncoding(): string
1✔
139
    {
140
        return $this->inputEncoding;
1✔
141
    }
142

143
    public function setFallbackEncoding(string $fallbackEncoding): self
5✔
144
    {
145
        $this->fallbackEncoding = $fallbackEncoding;
5✔
146

147
        return $this;
5✔
148
    }
149

150
    public function getFallbackEncoding(): string
1✔
151
    {
152
        return $this->fallbackEncoding;
1✔
153
    }
154

155
    /**
156
     * Move filepointer past any BOM marker.
157
     */
158
    protected function skipBOM(): void
157✔
159
    {
160
        rewind($this->fileHandle);
157✔
161

162
        if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
157✔
163
            rewind($this->fileHandle);
141✔
164
        }
165
    }
166

167
    /**
168
     * Identify any separator that is explicitly set in the file.
169
     */
170
    protected function checkSeparator(): void
157✔
171
    {
172
        $line = fgets($this->fileHandle);
157✔
173
        if ($line === false) {
157✔
174
            return;
4✔
175
        }
176

177
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
154✔
178
            $this->delimiter = substr($line, 4, 1);
3✔
179

180
            return;
3✔
181
        }
182

183
        $this->skipBOM();
152✔
184
    }
185

186
    /**
187
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
188
     */
189
    protected function inferSeparator(): void
157✔
190
    {
191
        $temp = $this->delimiter;
157✔
192
        if ($temp !== null) {
157✔
193
            return;
23✔
194
        }
195

196
        $inferenceEngine = new Delimiter($this->fileHandle, $this->getEscapeCharacter(), $this->enclosure);
146✔
197

198
        // If number of lines is 0, nothing to infer : fall back to the default
199
        if ($inferenceEngine->linesCounted() === 0) {
146✔
200
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
4✔
201
            $this->skipBOM();
4✔
202

203
            return;
4✔
204
        }
205

206
        $this->delimiter = $inferenceEngine->infer();
143✔
207

208
        // If no delimiter could be detected, fall back to the default
209
        if ($this->delimiter === null) {
143✔
210
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
12✔
211
        }
212

213
        $this->skipBOM();
143✔
214
    }
215

216
    /**
217
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
218
     *
219
     * @return array<int, array{worksheetName: string, lastColumnLetter: string, lastColumnIndex: int, totalRows: int, totalColumns: int, sheetState: string}>
220
     */
221
    public function listWorksheetInfo(string $filename): array
14✔
222
    {
223
        // Open file
224
        $this->openFileOrMemory($filename);
14✔
225
        $fileHandle = $this->fileHandle;
13✔
226

227
        // Skip BOM, if any
228
        $this->skipBOM();
13✔
229
        $this->checkSeparator();
13✔
230
        $this->inferSeparator();
13✔
231

232
        $worksheetInfo = [
13✔
233
            [
13✔
234
                'worksheetName' => 'Worksheet',
13✔
235
                'lastColumnLetter' => 'A',
13✔
236
                'lastColumnIndex' => 0,
13✔
237
                'totalRows' => 0,
13✔
238
                'totalColumns' => 0,
13✔
239
            ],
13✔
240
        ];
13✔
241
        $delimiter = $this->delimiter ?? '';
13✔
242

243
        // Loop through each line of the file in turn
244
        $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
13✔
245
        while (is_array($rowData)) {
13✔
246
            ++$worksheetInfo[0]['totalRows'];
13✔
247
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
13✔
248
            $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
13✔
249
        }
250

251
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1, true);
13✔
252
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
13✔
253
        $worksheetInfo[0]['sheetState'] = Worksheet::SHEETSTATE_VISIBLE;
13✔
254

255
        // Close file
256
        fclose($fileHandle);
13✔
257

258
        return $worksheetInfo;
13✔
259
    }
260

261
    /**
262
     * Loads Spreadsheet from file.
263
     */
264
    protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
140✔
265
    {
266
        $spreadsheet = $this->newSpreadsheet();
140✔
267
        $spreadsheet->setValueBinder($this->valueBinder);
140✔
268

269
        // Load into this instance
270
        return $this->loadIntoExisting($filename, $spreadsheet);
140✔
271
    }
272

273
    /**
274
     * Loads Spreadsheet from string.
275
     */
276
    public function loadSpreadsheetFromString(string $contents): Spreadsheet
4✔
277
    {
278
        $spreadsheet = $this->newSpreadsheet();
4✔
279
        $spreadsheet->setValueBinder($this->valueBinder);
4✔
280

281
        // Load into this instance
282
        return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true);
4✔
283
    }
284

285
    /**
286
     * Size of each chunk when streaming encoding conversion.
287
     * Aligned to a multiple of 4 so UTF-16/UTF-32 character
288
     * boundaries are never split.
289
     */
290
    private const CHUNK_SIZE = 65536;
291

292
    private function openFileOrMemory(string $filename): void
156✔
293
    {
294
        // Open file
295
        $fhandle = $this->canRead($filename);
156✔
296
        if (!$fhandle) {
156✔
297
            throw new ReaderException($filename . ' is an Invalid Spreadsheet file.');
3✔
298
        }
299
        if ($this->inputEncoding === 'UTF-8') {
153✔
300
            $encoding = self::guessEncodingBom($filename);
95✔
301
            if ($encoding !== '') {
95✔
302
                $this->inputEncoding = $encoding;
7✔
303
            }
304
        }
305
        if ($this->inputEncoding === self::GUESS_ENCODING) {
153✔
306
            $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
23✔
307
        }
308
        $this->openFile($filename);
153✔
309
        if ($this->inputEncoding !== 'UTF-8') {
153✔
310
            fclose($this->fileHandle);
56✔
311
            $this->convertEncodingStreaming($filename);
56✔
312
        }
313
    }
314

315
    /**
316
     * Convert file encoding to UTF-8 using chunked streaming to avoid
317
     * loading the entire file into memory at once.
318
     */
319
    private function convertEncodingStreaming(string $filename): void
56✔
320
    {
321
        $sourceHandle = fopen($filename, 'rb');
56✔
322
        // Using php://temp instead of php://memory: spills to disk when data
323
        // exceeds 2MB, reducing peak memory for large files.
324
        $outputHandle = fopen('php://temp', 'r+b');
56✔
325
        if ($sourceHandle === false || $outputHandle === false) {
56✔
326
            // @codeCoverageIgnoreStart
327
            if ($sourceHandle !== false) {
328
                fclose($sourceHandle);
329
            }
330
            if ($outputHandle !== false) {
331
                fclose($outputHandle);
332
            }
333

334
            throw new ReaderException("Failed to open file for encoding conversion: {$filename}");
335
            // @codeCoverageIgnoreEnd
336
        }
337

338
        $encoding = $this->inputEncoding;
56✔
339
        $charWidth = $this->encodingCharWidth($encoding);
56✔
340
        // Ensure chunk size is aligned to character width
341
        $chunkSize = self::CHUNK_SIZE - (self::CHUNK_SIZE % $charWidth);
56✔
342

343
        $leftover = '';
56✔
344
        while (!feof($sourceHandle)) {
56✔
345
            $rawChunk = fread($sourceHandle, max(1, $chunkSize));
56✔
346
            if ($rawChunk === false || $rawChunk === '') {
56✔
347
                break;
1✔
348
            }
349

350
            $chunk = $leftover . $rawChunk;
55✔
351
            $leftover = '';
55✔
352

353
            if ($charWidth > 1) {
55✔
354
                // For fixed-width multi-byte encodings (UTF-16, UTF-32),
355
                // ensure we don't split in the middle of a character
356
                $remainder = strlen($chunk) % $charWidth;
39✔
357
                if ($remainder !== 0) {
39✔
NEW
UNCOV
358
                    $leftover = substr($chunk, -$remainder);
×
NEW
UNCOV
359
                    $chunk = substr($chunk, 0, -$remainder);
×
360
                }
361
            }
362
                // For variable-width encodings (e.g. UTF-8 source, though
363
                // this path is for non-UTF-8), and single-byte encodings
364
                // (ISO-8859-*, CP1252), no boundary adjustment needed.
365
                // Single-byte encodings have 1:1 byte-to-character mapping.
366

367
            if ($chunk !== '') {
55✔
368
                $converted = StringHelper::convertEncoding($chunk, 'UTF-8', $encoding);
55✔
369
                fwrite($outputHandle, $converted);
55✔
370
            }
371
        }
372

373
        // Flush any remaining bytes
374
        if ($leftover !== '') {
56✔
NEW
UNCOV
375
            $converted = StringHelper::convertEncoding($leftover, 'UTF-8', $encoding);
×
NEW
UNCOV
376
            fwrite($outputHandle, $converted);
×
377
        }
378

379
        fclose($sourceHandle);
56✔
380
        $this->fileHandle = $outputHandle;
56✔
381
        $this->skipBOM();
56✔
382
    }
383

384
    /**
385
     * Return the byte width of a single character in the given encoding.
386
     * Returns 1 for variable-width or single-byte encodings.
387
     */
388
    private function encodingCharWidth(string $encoding): int
56✔
389
    {
390
        return match (strtoupper($encoding)) {
56✔
391
            'UTF-32', 'UTF-32BE', 'UTF-32LE', 'UCS-4', 'UCS-4BE', 'UCS-4LE' => 4,
19✔
392
            'UTF-16', 'UTF-16BE', 'UTF-16LE', 'UCS-2', 'UCS-2BE', 'UCS-2LE' => 2,
21✔
393
            default => 1,
56✔
394
        };
56✔
395
    }
396

397
    public function setTestAutoDetect(bool $value): self
4✔
398
    {
399
        $this->testAutodetect = $value;
4✔
400

401
        return $this;
4✔
402
    }
403

404
    private function setAutoDetect(?string $value, int $version = PHP_VERSION_ID): ?string
147✔
405
    {
406
        $retVal = null;
147✔
407
        if ($value !== null && $this->testAutodetect && $version < 90000) {
147✔
408
            $retVal2 = @ini_set('auto_detect_line_endings', $value);
4✔
409
            if (is_string($retVal2)) {
4✔
410
                $retVal = $retVal2;
4✔
411
            }
412
        }
413

414
        return $retVal;
147✔
415
    }
416

417
    public function castFormattedNumberToNumeric(
20✔
418
        bool $castFormattedNumberToNumeric,
419
        bool $preserveNumericFormatting = false
420
    ): void {
421
        $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric;
20✔
422
        $this->preserveNumericFormatting = $preserveNumericFormatting;
20✔
423
    }
424

425
    /**
426
     * Open data uri for reading.
427
     */
428
    private function openDataUri(string $filename): void
4✔
429
    {
430
        $fileHandle = fopen($filename, 'rb');
4✔
431
        if ($fileHandle === false) {
4✔
432
            // @codeCoverageIgnoreStart
433
            throw new ReaderException('Could not open file ' . $filename . ' for reading.');
434
            // @codeCoverageIgnoreEnd
435
        }
436

437
        $this->fileHandle = $fileHandle;
4✔
438
    }
439

440
    /**
441
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
442
     */
443
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
143✔
444
    {
445
        return $this->loadStringOrFile($filename, $spreadsheet, false);
143✔
446
    }
447

448
    /**
449
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
450
     */
451
    private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet
147✔
452
    {
453
        // Deprecated in Php8.1
454
        $iniset = $this->setAutoDetect('1');
147✔
455

456
        try {
457
            $this->loadStringOrFile2($filename, $spreadsheet, $dataUri);
147✔
458
            $this->setAutoDetect($iniset);
145✔
459
        } catch (Throwable $e) {
2✔
460
            $this->setAutoDetect($iniset);
2✔
461

462
            throw $e;
2✔
463
        }
464

465
        return $spreadsheet;
145✔
466
    }
467

468
    private function loadStringOrFile2(string $filename, Spreadsheet $spreadsheet, bool $dataUri): void
147✔
469
    {
470

471
        // Open file
472
        if ($dataUri) {
147✔
473
            $this->openDataUri($filename);
4✔
474
        } else {
475
            $this->openFileOrMemory($filename);
143✔
476
        }
477
        $fileHandle = $this->fileHandle;
145✔
478

479
        // Skip BOM, if any
480
        $this->skipBOM();
145✔
481
        $this->checkSeparator();
145✔
482
        $this->inferSeparator();
145✔
483

484
        // Create new PhpSpreadsheet object
485
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
145✔
486
            $spreadsheet->createSheet();
4✔
487
        }
488
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
145✔
489
        if ($this->sheetNameIsFileName) {
145✔
490
            $sheet->setTitle(substr(basename($filename, '.csv'), 0, Worksheet::SHEET_TITLE_MAXIMUM_LENGTH));
4✔
491
        }
492

493
        // Set our starting row based on whether we're in contiguous mode or not
494
        $currentRow = 1;
145✔
495
        $outRow = 0;
145✔
496

497
        // Loop through each line of the file in turn
498
        $delimiter = $this->delimiter ?? '';
145✔
499
        $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
145✔
500
        $valueBinder = $this->valueBinder ?? Cell::getValueBinder();
145✔
501
        $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
145✔
502
        $this->getTrue = Calculation::getTRUE();
145✔
503
        $this->getFalse = Calculation::getFALSE();
145✔
504
        $this->thousandsSeparator = StringHelper::getThousandsSeparator();
145✔
505
        $this->decimalSeparator = StringHelper::getDecimalSeparator();
145✔
506
        while (is_array($rowData)) {
145✔
507
            $noOutputYet = true;
142✔
508
            $columnLetter = 'A';
142✔
509
            foreach ($rowData as $rowDatum) {
142✔
510
                if ($preserveBooleanString) {
142✔
511
                    $rowDatum = $rowDatum ?? '';
5✔
512
                } else {
513
                    $this->convertBoolean($rowDatum);
138✔
514
                }
515
                $numberFormatMask = $this->castFormattedNumberToNumeric ? $this->convertFormattedNumber($rowDatum) : '';
142✔
516
                if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
142✔
517
                    if ($this->contiguous) {
141✔
518
                        if ($noOutputYet) {
3✔
519
                            $noOutputYet = false;
3✔
520
                            ++$outRow;
3✔
521
                        }
522
                    } else {
523
                        $outRow = $currentRow;
138✔
524
                    }
525
                    // Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
526
                    if ($numberFormatMask !== '') {
141✔
527
                        $sheet->getStyle($columnLetter . $outRow)
7✔
528
                            ->getNumberFormat()
7✔
529
                            ->setFormatCode($numberFormatMask);
7✔
530
                    }
531
                    // Set cell value
532
                    $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
141✔
533
                }
534
                StringHelper::stringIncrement($columnLetter);
142✔
535
            }
536
            $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
142✔
537
            ++$currentRow;
142✔
538
        }
539

540
        // Close file
541
        fclose($fileHandle);
145✔
542
    }
543

544
    /**
545
     * Convert string true/false to boolean, and null to null-string.
546
     */
547
    private function convertBoolean(mixed &$rowDatum): void
138✔
548
    {
549
        if (is_string($rowDatum)) {
138✔
550
            if (strcasecmp($this->getTrue, $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
137✔
551
                $rowDatum = true;
12✔
552
            } elseif (strcasecmp($this->getFalse, $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
137✔
553
                $rowDatum = false;
12✔
554
            }
555
        } else {
556
            $rowDatum = $rowDatum ?? '';
1✔
557
        }
558
    }
559

560
    /**
561
     * Convert numeric strings to int or float values.
562
     */
563
    private function convertFormattedNumber(mixed &$rowDatum): string
20✔
564
    {
565
        $numberFormatMask = '';
20✔
566
        if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
20✔
567
            $numeric = str_replace(
20✔
568
                [$this->thousandsSeparator, $this->decimalSeparator],
20✔
569
                ['', '.'],
20✔
570
                $rowDatum
20✔
571
            );
20✔
572

573
            if (is_numeric($numeric)) {
20✔
574
                $decimalPos = strpos($rowDatum, $this->decimalSeparator);
20✔
575
                if ($this->preserveNumericFormatting === true) {
20✔
576
                    $numberFormatMask = (str_contains($rowDatum, $this->thousandsSeparator))
7✔
577
                        ? '#,##0' : '0';
7✔
578
                    if ($decimalPos !== false) {
7✔
579
                        $decimals = strlen($rowDatum) - $decimalPos - 1;
7✔
580
                        $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6));
7✔
581
                    }
582
                }
583

584
                $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric;
20✔
585
            }
586
        }
587

588
        return $numberFormatMask;
20✔
589
    }
590

591
    public function getDelimiter(): ?string
14✔
592
    {
593
        return $this->delimiter;
14✔
594
    }
595

596
    public function setDelimiter(?string $delimiter): self
11✔
597
    {
598
        $this->delimiter = $delimiter;
11✔
599

600
        return $this;
11✔
601
    }
602

603
    public function getEnclosure(): string
2✔
604
    {
605
        return $this->enclosure;
2✔
606
    }
607

608
    public function setEnclosure(string $enclosure): self
10✔
609
    {
610
        if ($enclosure == '') {
10✔
611
            $enclosure = '"';
3✔
612
        }
613
        $this->enclosure = $enclosure;
10✔
614

615
        return $this;
10✔
616
    }
617

618
    public function getSheetIndex(): int
1✔
619
    {
620
        return $this->sheetIndex;
1✔
621
    }
622

623
    public function setSheetIndex(int $indexValue): self
5✔
624
    {
625
        $this->sheetIndex = $indexValue;
5✔
626

627
        return $this;
5✔
628
    }
629

630
    public function setContiguous(bool $contiguous): self
3✔
631
    {
632
        $this->contiguous = $contiguous;
3✔
633

634
        return $this;
3✔
635
    }
636

637
    public function getContiguous(): bool
1✔
638
    {
639
        return $this->contiguous;
1✔
640
    }
641

642
    /**
643
     * Php9 intends to drop support for this parameter in fgetcsv.
644
     * Not yet ready to mark deprecated in order to give users
645
     * a migration path.
646
     */
647
    public function setEscapeCharacter(string $escapeCharacter, int $version = PHP_VERSION_ID): self
11✔
648
    {
649
        if ($version >= 90000 && $escapeCharacter !== '') {
11✔
650
            throw new ReaderException('Escape character must be null string for Php9+');
2✔
651
        }
652

653
        $this->escapeCharacter = $escapeCharacter;
9✔
654

655
        return $this;
9✔
656
    }
657

658
    public function getEscapeCharacter(int $version = PHP_VERSION_ID): string
146✔
659
    {
660
        return $this->escapeCharacter ?? self::getDefaultEscapeCharacter($version);
146✔
661
    }
662

663
    /**
664
     * Can the current IReader read the file?
665
     */
666
    public function canRead(string $filename): bool
172✔
667
    {
668
        // Check if file exists
669
        try {
670
            $this->openFile($filename);
172✔
671
        } catch (ReaderException) {
3✔
672
            return false;
3✔
673
        }
674

675
        fclose($this->fileHandle);
169✔
676

677
        // Trust file extension if any
678
        $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
169✔
679
        if (in_array($extension, ['csv', 'tsv'])) {
169✔
680
            return true;
135✔
681
        }
682

683
        // Attempt to guess mimetype
684
        $type = mime_content_type($filename);
35✔
685
        $supportedTypes = [
35✔
686
            'application/csv',
35✔
687
            'text/csv',
35✔
688
            'text/plain',
35✔
689
            'inode/x-empty',
35✔
690
            'application/x-empty', // has now replaced previous
35✔
691
            'text/html',
35✔
692
        ];
35✔
693

694
        return in_array($type, $supportedTypes, true);
35✔
695
    }
696

697
    private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
26✔
698
    {
699
        if ($encoding === '') {
26✔
700
            $pos = strpos($contents, $compare);
26✔
701
            if ($pos !== false && $pos % strlen($compare) === 0) {
26✔
702
                $encoding = $setEncoding;
15✔
703
            }
704
        }
705
    }
706

707
    private static function guessEncodingNoBom(string $filename): string
26✔
708
    {
709
        $encoding = '';
26✔
710
        $contents = (string) file_get_contents($filename);
26✔
711
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
26✔
712
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
26✔
713
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
26✔
714
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
26✔
715
        if ($encoding === '' && preg_match('//u', $contents) === 1) {
26✔
716
            $encoding = 'UTF-8';
4✔
717
        }
718

719
        return $encoding;
26✔
720
    }
721

722
    private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
1,640✔
723
    {
724
        if ($encoding === '') {
1,640✔
725
            if (str_starts_with($first4, $compare)) {
1,640✔
726
                $encoding = $setEncoding;
38✔
727
            }
728
        }
729
    }
730

731
    public static function guessEncodingBom(string $filename, ?string $convertString = null): string
1,640✔
732
    {
733
        $encoding = '';
1,640✔
734
        $first4 = $convertString ?? (string) file_get_contents($filename, false, null, 0, 4);
1,640✔
735
        self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
1,640✔
736
        self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
1,640✔
737
        self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
1,640✔
738
        self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
1,640✔
739
        self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
1,640✔
740

741
        return $encoding;
1,640✔
742
    }
743

744
    public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
36✔
745
    {
746
        $encoding = self::guessEncodingBom($filename);
36✔
747
        if ($encoding === '') {
36✔
748
            $encoding = self::guessEncodingNoBom($filename);
26✔
749
        }
750

751
        return ($encoding === '') ? $dflt : $encoding;
36✔
752
    }
753

754
    public function setPreserveNullString(bool $value): self
1✔
755
    {
756
        $this->preserveNullString = $value;
1✔
757

758
        return $this;
1✔
759
    }
760

761
    public function getPreserveNullString(): bool
1✔
762
    {
763
        return $this->preserveNullString;
1✔
764
    }
765

766
    public function setSheetNameIsFileName(bool $sheetNameIsFileName): self
4✔
767
    {
768
        $this->sheetNameIsFileName = $sheetNameIsFileName;
4✔
769

770
        return $this;
4✔
771
    }
772

773
    /**
774
     * Php8.4 deprecates use of anything other than null string
775
     * as escape Character.
776
     *
777
     * @param resource $stream
778
     * @param null|int<0, max> $length
779
     *
780
     * @return array<int,?string>|false
781
     */
782
    private static function getCsv(
157✔
783
        $stream,
784
        ?int $length = null,
785
        string $separator = ',',
786
        string $enclosure = '"',
787
        ?string $escape = null,
788
        int $version = PHP_VERSION_ID
789
    ): array|false {
790
        $escape = $escape ?? self::getDefaultEscapeCharacter();
157✔
791
        if ($version >= 80400 && $escape !== '') {
157✔
792
            return @fgetcsv($stream, $length, $separator, $enclosure, $escape);
152✔
793
        }
794

795
        return fgetcsv($stream, $length, $separator, $enclosure, $escape);
6✔
796
    }
797

798
    public static function affectedByPhp9(
2✔
799
        string $filename,
800
        string $inputEncoding = 'UTF-8',
801
        ?string $delimiter = null,
802
        string $enclosure = '"',
803
        string $escapeCharacter = '\\',
804
        int $version = PHP_VERSION_ID
805
    ): bool {
806
        if ($version < 70400 || $version >= 90000) {
2✔
807
            throw new ReaderException('Function valid only for Php7.4 or Php8');
1✔
808
        }
809
        $reader1 = new self();
1✔
810
        $reader1->setInputEncoding($inputEncoding)
1✔
811
            ->setTestAutoDetect(true)
1✔
812
            ->setEscapeCharacter($escapeCharacter)
1✔
813
            ->setDelimiter($delimiter)
1✔
814
            ->setEnclosure($enclosure);
1✔
815
        $spreadsheet1 = $reader1->load($filename);
1✔
816
        $sheet1 = $spreadsheet1->getActiveSheet();
1✔
817
        $array1 = $sheet1->toArray(null, false, false);
1✔
818
        $spreadsheet1->disconnectWorksheets();
1✔
819

820
        $reader2 = new self();
1✔
821
        $reader2->setInputEncoding($inputEncoding)
1✔
822
            ->setTestAutoDetect(false)
1✔
823
            ->setEscapeCharacter('')
1✔
824
            ->setDelimiter($delimiter)
1✔
825
            ->setEnclosure($enclosure);
1✔
826
        $spreadsheet2 = $reader2->load($filename);
1✔
827
        $sheet2 = $spreadsheet2->getActiveSheet();
1✔
828
        $array2 = $sheet2->toArray(null, false, false);
1✔
829
        $spreadsheet2->disconnectWorksheets();
1✔
830

831
        return $array1 !== $array2;
1✔
832
    }
833

834
    /**
835
     * The character that will be supplied to fgetcsv
836
     * when escapeCharacter is null.
837
     * It is anticipated that it will conditionally be set
838
     * to null-string for Php9 and above.
839
     */
840
    private static function getDefaultEscapeCharacter(int $version = PHP_VERSION_ID): string
148✔
841
    {
842
        return $version < 90000 ? '\\' : '';
148✔
843
    }
844
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc