• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

xlnt-community / xlnt / e9004608-a473-4d13-9376-0953a6c90a52

07 Apr 2026 06:59PM UTC coverage: 82.654% (-1.3%) from 83.961%
e9004608-a473-4d13-9376-0953a6c90a52

Pull #147

circleci

doomlaur
Merge branch 'master' into compound_document_improvements
Pull Request #147: Compound document improvements

15404 of 20563 branches covered (74.91%)

412 of 723 new or added lines in 7 files covered. (56.98%)

15 existing lines in 2 files now uncovered.

12599 of 15243 relevant lines covered (82.65%)

12050.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.36
./source/detail/cryptography/compound_document.cpp
1
// Copyright (C) 2016-2022 Thomas Fussell
2
// Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
3
// Copyright (c) 2024-2026 xlnt-community
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions
7
// are met:
8
//
9
// 1. Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
// 2. Redistributions in binary form must reproduce the above copyright
12
// notice, this list of conditions and the following disclaimer in the
13
// documentation and/or other materials provided with the distribution.
14
//
15
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25

26
#include <algorithm>
27
#include <array>
28
#include <cstring>
29
#include <iostream>
30
#include <locale>
31
#include <string>
32
#include <vector>
33

34
#include <xlnt/utils/exceptions.hpp>
35
#include <detail/binary.hpp>
36
#include <detail/cryptography/compound_document.hpp>
37
#include <detail/unicode.hpp>
38

39
#define FMT_HEADER_ONLY
40
#include <fmt/format.h>
41

42
// NOTE: compound files are not part of the OOXML specification (ECMA-376).
43
// This implementation is based on the "[MS-CFB]: Compound File Binary File Format" specification.
44
namespace {
45

46
using namespace xlnt::detail;
47

48
template <typename T>
NEW
49
std::string format_hex(T value)
×
50
{
51
    // Format example: 0x0000660F
NEW
52
    return fmt::format("0x{:08X}", value);
×
53
}
54

55
int compare_keys(const std::string &left, const std::string &right)
8✔
56
{
57
    auto to_lower = [](std::string s) {
16✔
58
        if (s.empty())
16!
59
        {
NEW
60
            return s;
×
61
        }
62
        else
63
        {
64
            static const std::locale locale;
16!
65
            std::use_facet<std::ctype<char>>(locale).tolower(&s[0], &s[0] + s.size());
16✔
66

67
            return s;
16✔
68
        }
69
    };
70

71
    return to_lower(left).compare(to_lower(right));
8✔
72
}
73

74
std::vector<std::string> split_path(const std::string &path)
12✔
75
{
76
    std::vector<std::string> split = std::vector<std::string>();
12✔
77
    std::size_t current = path.find('/');
12✔
78
    std::size_t prev = 0;
12✔
79

80
    while (current != std::string::npos)
24✔
81
    {
82
        split.emplace_back(path.substr(prev, current - prev));
12✔
83
        prev = current + 1;
12✔
84
        current = path.find('/', prev);
12✔
85
    }
86

87
    split.emplace_back(path.substr(prev));
12✔
88

89
    return split;
12✔
90
}
×
91

92
std::string join_path(const std::vector<std::string> &path)
244✔
93
{
94
    std::string joined;
244✔
95

96
    for (const std::string &part : path)
328✔
97
    {
98
        joined.append(part);
84✔
99
        joined.push_back('/');
84✔
100
    }
101

102
    return joined;
244✔
103
}
×
104

105
template <typename T>
106
const T & last_elem(const std::vector<T> &vec)
107
{
108
    return vec.at(vec.size() - 1);
109
}
110

111
template <typename T>
112
T & last_elem(std::vector<T> &vec)
986✔
113
{
114
    return vec.at(vec.size() - 1);
986✔
115
}
116

117
} // namespace
118

119
namespace xlnt {
120
namespace detail {
121

122
bool is_chain_end(sector_id sector)
244,394✔
123
{
124
    expect_valid_sector_or_chain_end(sector);
244,394✔
125
    return sector == ENDOFCHAIN;
244,394✔
126
}
127

128
bool has_invalid_start_sector(const compound_document_entry &entry)
1,005✔
129
{
130
    switch (entry.type)
1,005!
131
    {
132
    case compound_document_entry::entry_type::RootStorage:
1,005✔
133
    case compound_document_entry::entry_type::Stream:
134
        return is_chain_end(entry.start_sector);
1,005✔
135
    // Both Unallocated and Storage entries can have a start sector that is 0, but they are still invalid.
136
    // For all other entries, the start sector does not make sense either, and is thus always invalid.
NEW
137
    case compound_document_entry::entry_type::Unallocated:
×
138
    case compound_document_entry::entry_type::Storage:
139
    case compound_document_entry::entry_type::LockBytes:
140
    case compound_document_entry::entry_type::Property:
141
    default:
NEW
142
        return true;
×
143
    }
144
}
145

146
bool is_invalid_entry(directory_id entry)
432✔
147
{
148
    expect_valid_entry_or_no_stream(entry);
432✔
149
    return entry == NOSTREAM;
432✔
150
}
151

152
void expect_valid_sector_or_chain_end(sector_id sector)
244,394✔
153
{
154
    if (sector > MAXREGSECT && sector != ENDOFCHAIN)
244,394!
155
    {
156
        throw xlnt::invalid_parameter("expected valid sector (<= MAXREGSECT, which means <= 0xFFFFFFFA) or ENDOFCHAIN (0xFFFFFFFE)"
NEW
157
            ", but got " + format_hex(sector));
×
158
    }
159
}
244,394✔
160

161
void expect_valid_entry_or_no_stream(directory_id entry)
432✔
162
{
163
    if (entry > MAXREGSID && entry != NOSTREAM)
432!
164
    {
165
        throw xlnt::invalid_parameter("expected valid entry (<= MAXREGSID, which means <= 0xFFFFFFFA) or NOSTREAM (0xFFFFFFFF)"
NEW
166
            ", but got " + format_hex(entry));
×
167
    }
168
}
432✔
169

170
/// <summary>
171
/// Allows a std::vector to be read through a std::istream.
172
/// </summary>
173
class compound_document_istreambuf : public std::streambuf
174
{
175
    using int_type = std::streambuf::int_type;
176

177
public:
178
    compound_document_istreambuf(const compound_document_entry &entry, compound_document &document)
64✔
179
        : entry_(entry),
128✔
180
          document_(document),
64✔
181
          sector_writer_(current_sector_)
64✔
182
    {
183
    }
64✔
184

185
    compound_document_istreambuf(const compound_document_istreambuf &) = delete;
186
    compound_document_istreambuf &operator=(const compound_document_istreambuf &) = delete;
187

188
    ~compound_document_istreambuf() override = default;
128✔
189

190
private:
191
    std::streamsize xsgetn(char *c, std::streamsize count) override
779✔
192
    {
193
        std::streamsize bytes_read = 0;
779✔
194

195
        const sector_chain &sec_chain = mini_stream() ? document_.mini_FAT_ : document_.FAT_;
779✔
196
        const sector_chain chain = document_.follow_chain(entry_, sec_chain);
779✔
197
        const std::uint64_t sector_size = mini_stream() ? document_.mini_sector_size() : document_.sector_size();
779✔
198
        sector_id current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
779✔
199
        std::uint64_t remaining = std::min(entry_.stream_size - position_, static_cast<std::uint64_t>(count));
779✔
200

201
        while (remaining)
3,781✔
202
        {
203
            if (current_sector_.empty() || chain.at(static_cast<std::size_t>(position_ / sector_size)) != current_sector)
3,002✔
204
            {
205
                current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
2,272✔
206
                sector_writer_.reset();
2,272✔
207
                if (mini_stream())
2,272✔
208
                {
209
                    document_.read_mini_sector(current_sector, sector_writer_);
192✔
210
                }
211
                else
212
                {
213
                    document_.read_sector(current_sector, sector_writer_);
2,080✔
214
                }
215
            }
216

217
            const std::uint64_t available = std::min(entry_.stream_size - position_, sector_size - position_ % sector_size);
3,002✔
218
            const std::uint64_t to_read = std::min(available, remaining);
3,002✔
219

220
            auto start = current_sector_.begin() + static_cast<std::ptrdiff_t>(position_ % sector_size);
3,002✔
221
            auto end = start + static_cast<std::ptrdiff_t>(to_read);
3,002✔
222

223
            for (auto i = start; i < end; ++i)
1,074,752✔
224
            {
225
                *(c++) = static_cast<char>(*i);
1,071,750✔
226
            }
227

228
            remaining -= to_read;
3,002✔
229
            position_ += to_read;
3,002✔
230
            bytes_read += to_read;
3,002✔
231
        }
232

233
        if (position_ < entry_.stream_size && chain.at(static_cast<std::size_t>(position_ / sector_size)) != current_sector)
779✔
234
        {
235
            current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
26✔
236
            sector_writer_.reset();
26✔
237
            if (mini_stream())
26!
238
            {
239
                document_.read_mini_sector(current_sector, sector_writer_);
26✔
240
            }
241
            else
242
            {
243
                document_.read_sector(current_sector, sector_writer_);
×
244
            }
245
        }
246

247
        return bytes_read;
779✔
248
    }
779✔
249

250
    bool mini_stream()
3,856✔
251
    {
252
        return entry_.stream_size < document_.header_.mini_stream_cutoff_size;
3,856✔
253
    }
254

255
    int_type underflow() override
×
256
    {
NEW
257
        if (position_ >= entry_.stream_size)
×
258
        {
259
            return traits_type::eof();
×
260
        }
261

NEW
262
        std::uint64_t old_position = position_;
×
NEW
263
        char result = '\0';
×
264
        xsgetn(&result, 1);
×
265
        position_ = old_position;
×
266

267
        return result;
×
268
    }
269

270
    int_type uflow() override
×
271
    {
NEW
272
        int_type result = underflow();
×
273
        ++position_;
×
274

275
        return result;
×
276
    }
277

278
    std::streamsize showmanyc() override
×
279
    {
NEW
280
        if (position_ == entry_.stream_size)
×
281
        {
282
            return static_cast<std::streamsize>(-1);
×
283
        }
284

NEW
285
        return static_cast<std::streamsize>(entry_.stream_size - position_);
×
286
    }
287

288
    std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
52✔
289
    {
290
        if (way == std::ios_base::beg)
52!
291
        {
292
            position_ = 0;
×
293
        }
294
        else if (way == std::ios_base::end)
52!
295
        {
NEW
296
            position_ = entry_.stream_size;
×
297
        }
298

299
        if (off < 0)
52!
300
        {
NEW
301
            if (static_cast<std::uint64_t>(-off) > position_)
×
302
            {
303
                position_ = 0;
×
NEW
304
                return static_cast<std::streamoff>(-1);
×
305
            }
306
            else
307
            {
NEW
308
                position_ -= static_cast<std::uint64_t>(-off);
×
309
            }
310
        }
311
        else if (off > 0)
52!
312
        {
NEW
313
            if (static_cast<std::uint64_t>(off) + position_ > entry_.stream_size)
×
314
            {
NEW
315
                position_ = entry_.stream_size;
×
NEW
316
                return static_cast<std::streamoff>(-1);
×
317
            }
318
            else
319
            {
NEW
320
                position_ += static_cast<std::uint64_t>(off);
×
321
            }
322
        }
323

324
        return static_cast<std::streamoff>(position_);
52✔
325
    }
326

327
    std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
×
328
    {
329
        if (sp < 0)
×
330
        {
331
            position_ = 0;
×
332
        }
NEW
333
        else if (static_cast<std::uint64_t>(sp) > entry_.stream_size)
×
334
        {
NEW
335
            position_ = entry_.stream_size;
×
336
        }
337
        else
338
        {
NEW
339
            position_ = static_cast<std::uint64_t>(sp);
×
340
        }
341

NEW
342
        return static_cast<std::streamoff>(position_);
×
343
    }
344

345
private:
346
    const compound_document_entry &entry_;
347
    compound_document &document_;
348
    std::vector<byte> current_sector_;
349
    binary_writer<byte> sector_writer_;
350
    std::uint64_t position_ = 0;
351
};
352

353
/// <summary>
354
/// Allows a std::vector to be written through a std::ostream.
355
/// </summary>
356
class compound_document_ostreambuf : public std::streambuf
357
{
358
    using int_type = std::streambuf::int_type;
359

360
public:
361
    compound_document_ostreambuf(compound_document_entry &entry, compound_document &document)
8✔
362
        : entry_(entry),
16✔
363
          document_(document),
8✔
364
          current_sector_(document.header_.mini_stream_cutoff_size),
8✔
365
          sector_reader_(current_sector_)
16✔
366
    {
367
        setp(reinterpret_cast<char *>(current_sector_.data()),
8✔
368
            reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
8✔
369
    }
8✔
370

371
    compound_document_ostreambuf(const compound_document_ostreambuf &) = delete;
372
    compound_document_ostreambuf &operator=(const compound_document_ostreambuf &) = delete;
373

374
    ~compound_document_ostreambuf() override;
375

376
private:
377
    int sync() override
986✔
378
    {
379
        auto written = static_cast<std::uint64_t>(pptr() - pbase());
986✔
380

381
        if (written == 0)
986✔
382
        {
383
            return 0;
4✔
384
        }
385

386
        sector_reader_.reset();
982✔
387

388
        if (mini_stream())
982✔
389
        {
390
            if (position_ + written >= document_.header_.mini_stream_cutoff_size)
8✔
391
            {
392
                convert_to_long_stream();
4✔
393
            }
394
            else
395
            {
396
                if (has_invalid_start_sector(entry_))
4!
397
                {
398
                    std::size_t num_sectors = static_cast<std::size_t>(
NEW
399
                        (position_ + written + document_.mini_sector_size() - 1) / document_.mini_sector_size());
×
NEW
400
                    chain_ = document_.allocate_mini_sectors(num_sectors);
×
NEW
401
                    entry_.start_sector = chain_.at(0);
×
402
                }
403

404
                for (sector_id link : chain_)
4!
405
                {
NEW
406
                    document_.write_mini_sector(sector_reader_, link);
×
NEW
407
                    sector_reader_.offset(sector_reader_.offset() + static_cast<std::size_t>(document_.mini_sector_size()));
×
408
                }
409
            }
410
        }
411
        else
412
        {
413
            const std::size_t sector_index = static_cast<std::size_t>(position_ / document_.sector_size());
974✔
414
            document_.write_sector(sector_reader_, chain_.at(sector_index));
974✔
415
        }
416

417
        position_ += written;
982✔
418
        entry_.stream_size = std::max(entry_.stream_size, position_);
982✔
419
        document_.write_directory();
982✔
420

421
        std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
982✔
422
        setp(reinterpret_cast<char *>(current_sector_.data()),
982✔
423
            reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
982✔
424

425
        return 0;
982✔
426
    }
427

428
    bool mini_stream()
1,956✔
429
    {
430
        return entry_.stream_size < document_.header_.mini_stream_cutoff_size;
1,956✔
431
    }
432

433
    int_type overflow(int_type c = traits_type::eof()) override
974✔
434
    {
435
        sync();
974✔
436

437
        if (mini_stream())
974!
438
        {
NEW
439
            sector_id next_sector = document_.allocate_mini_sector();
×
NEW
440
            document_.mini_FAT_.at(last_elem(chain_)) = next_sector;
×
441
            chain_.push_back(next_sector);
×
NEW
442
            document_.write_mini_FAT();
×
443
        }
444
        else
445
        {
446
            sector_id next_sector = document_.allocate_sector();
974✔
447
            document_.FAT_.at(last_elem(chain_)) = next_sector;
974✔
448
            chain_.push_back(next_sector);
974✔
449
            document_.write_FAT();
974✔
450
        }
451

452
        auto value = static_cast<std::uint8_t>(c);
974✔
453

454
        if (c != traits_type::eof())
974!
455
        {
456
            std::size_t sector_index = static_cast<std::size_t>(position_ % current_sector_.size());
974✔
457
            current_sector_.at(sector_index) = value;
974✔
458
        }
459

460
        pbump(1);
974✔
461

462
        return traits_type::to_int_type(static_cast<char>(value));
974✔
463
    }
464

465
    void convert_to_long_stream()
4✔
466
    {
467
        sector_reader_.reset();
4✔
468

469
        std::size_t num_sectors = static_cast<std::size_t>(current_sector_.size() / document_.sector_size());
4✔
470
        sector_chain new_chain = document_.allocate_sectors(num_sectors);
4✔
471

472
        for (sector_id link : new_chain)
36✔
473
        {
474
            document_.write_sector(sector_reader_, link);
32✔
475
            sector_reader_.offset(sector_reader_.offset() + static_cast<std::size_t>(document_.mini_sector_size()));
32✔
476
        }
477

478
        current_sector_.resize(static_cast<std::size_t>(document_.sector_size()), 0);
4✔
479
        std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
4✔
480

481
        if (has_invalid_start_sector(entry_))
4!
482
        {
483
            // TODO: deallocate mini sectors here
NEW
484
            if (document_.header_.num_mini_FAT_sectors == 0)
×
485
            {
NEW
486
                document_.entries_.at(0).start_sector = ENDOFCHAIN;
×
487
            }
488
        }
489

490
        chain_ = new_chain;
4✔
491
        entry_.start_sector = chain_.at(0);
4✔
492
        document_.write_directory();
4✔
493
    }
4✔
494

495
    std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
×
496
    {
497
        if (way == std::ios_base::beg)
×
498
        {
499
            position_ = 0;
×
500
        }
501
        else if (way == std::ios_base::end)
×
502
        {
NEW
503
            position_ = entry_.stream_size;
×
504
        }
505

506
        if (off < 0)
×
507
        {
NEW
508
            if (static_cast<std::uint64_t>(-off) > position_)
×
509
            {
510
                position_ = 0;
×
NEW
511
                return static_cast<std::streamoff>(-1);
×
512
            }
513
            else
514
            {
NEW
515
                position_ -= static_cast<std::uint64_t>(-off);
×
516
            }
517
        }
518
        else if (off > 0)
×
519
        {
NEW
520
            if (static_cast<std::uint64_t>(off) + position_ > entry_.stream_size)
×
521
            {
NEW
522
                position_ = entry_.stream_size;
×
NEW
523
                return static_cast<std::streamoff>(-1);
×
524
            }
525
            else
526
            {
NEW
527
                position_ += static_cast<std::uint64_t>(off);
×
528
            }
529
        }
530

NEW
531
        return static_cast<std::streamoff>(position_);
×
532
    }
533

534
    std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
×
535
    {
536
        if (sp < 0)
×
537
        {
538
            position_ = 0;
×
539
        }
NEW
540
        else if (static_cast<std::uint64_t>(sp) > entry_.stream_size)
×
541
        {
NEW
542
            position_ = entry_.stream_size;
×
543
        }
544
        else
545
        {
NEW
546
            position_ = static_cast<std::uint64_t>(sp);
×
547
        }
548

NEW
549
        return static_cast<std::streamoff>(position_);
×
550
    }
551

552
private:
553
    compound_document_entry &entry_;
554
    compound_document &document_;
555
    std::vector<byte> current_sector_;
556
    binary_reader<byte> sector_reader_;
557
    std::uint64_t position_ = 0;
558
    sector_chain chain_;
559
};
560

561
compound_document_ostreambuf::~compound_document_ostreambuf()
16✔
562
{
563
    sync();
8✔
564
}
16✔
565

566
compound_document::compound_document(std::ostream &out)
4✔
567
    : out_(&out),
4✔
568
      stream_in_(nullptr),
4✔
569
      stream_out_(nullptr)
8✔
570
{
571
    write_header();
4✔
572
    insert_entry("/Root Entry", compound_document_entry::entry_type::RootStorage);
4✔
573
}
4✔
574

575
compound_document::compound_document(std::istream &in)
32✔
576
    : in_(&in),
32✔
577
      stream_in_(nullptr),
32✔
578
      stream_out_(nullptr)
64✔
579
{
580
    read_header();
32✔
581
    read_DIFAT();
32✔
582
    read_FAT();
32✔
583
    read_mini_FAT();
32✔
584
    read_directory();
32✔
585
}
32✔
586

587
compound_document::~compound_document()
36✔
588
{
589
    close();
36✔
590
}
36✔
591

592
void compound_document::close()
36✔
593
{
594
    stream_out_buffer_.reset(nullptr);
36✔
595
}
36✔
596

597
std::uint64_t compound_document::sector_size()
40,768✔
598
{
599
    return static_cast<std::uint64_t>(1) << header_.sector_shift;
40,768✔
600
}
601

602
std::uint64_t compound_document::mini_sector_size()
960✔
603
{
604
    return static_cast<std::uint64_t>(1) << header_.mini_sector_shift;
960✔
605
}
606

607
std::istream &compound_document::open_read_stream(const std::string &name)
64✔
608
{
609
    if (!contains_entry(name, compound_document_entry::entry_type::Stream))
64!
610
    {
NEW
611
        throw xlnt::invalid_file("compound document entry of type 2 (Stream) not found at path: " + name);
×
612
    }
613

614
    const directory_id entry_id = find_entry(name, compound_document_entry::entry_type::Stream);
64✔
615
    const compound_document_entry &entry = entries_.at(entry_id);
64✔
616

617
    stream_in_buffer_.reset(new compound_document_istreambuf(entry, *this));
64!
618
    stream_in_.rdbuf(stream_in_buffer_.get());
64✔
619

620
    return stream_in_;
64✔
621
}
622

623
std::ostream &compound_document::open_write_stream(const std::string &name)
8✔
624
{
625
    directory_id entry_id = contains_entry(name, compound_document_entry::entry_type::Stream)
8✔
626
        ? find_entry(name, compound_document_entry::entry_type::Stream)
8!
627
        : insert_entry(name, compound_document_entry::entry_type::Stream);
8✔
628
    compound_document_entry &entry = entries_.at(entry_id);
8✔
629

630
    stream_out_buffer_.reset(new compound_document_ostreambuf(entry, *this));
8!
631
    stream_out_.rdbuf(stream_out_buffer_.get());
8✔
632

633
    return stream_out_;
8✔
634
}
635

636
template <typename T>
637
void compound_document::write_sector(binary_reader<T> &reader, sector_id id)
10,399✔
638
{
639
    out_->seekp(static_cast<std::streampos>(sector_data_start() + sector_size() * id));
10,399✔
640
    out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
10,399✔
641
        static_cast<std::streamsize>(std::min(sector_size(), static_cast<std::uint64_t>(reader.bytes() - reader.offset()))));
10,399✔
642
}
10,399✔
643

644
template <typename T>
NEW
645
void compound_document::write_mini_sector(binary_reader<T> &reader, sector_id id)
×
646
{
NEW
647
    sector_id sector_id = chain_sector_at_index(entries_.at(0), FAT_, id / (sector_size() / mini_sector_size()));
×
NEW
648
    std::uint64_t sector_offset = id % (sector_size() / mini_sector_size()) * mini_sector_size();
×
NEW
649
    out_->seekp(static_cast<std::streampos>(sector_data_start() + sector_size() * sector_id + sector_offset));
×
UNCOV
650
    out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
×
NEW
651
        static_cast<std::streamsize>(std::min(mini_sector_size(), static_cast<std::uint64_t>(reader.bytes() - reader.offset()))));
×
UNCOV
652
}
×
653

654
template <typename T>
655
void compound_document::read_sector(sector_id id, binary_writer<T> &writer)
2,743✔
656
{
657
    auto seek_pos = static_cast<std::streampos>(sector_data_start() + sector_size() * id);
2,743✔
658
    // Exception handling could provide useful information about why errors have occurred.
659
    auto previous_exception_mask = in_->exceptions();
2,743✔
660
    in_->exceptions(std::istream::failbit | std::istream::badbit);
2,743✔
661

662
    try
663
    {
664
        in_->seekg(seek_pos);
2,743✔
665
    }
NEW
666
    catch (const std::exception &ex)
×
667
    {
NEW
668
        throw xlnt::invalid_file("Failed reading compound document sector " + std::to_string(id) + ": could not seek to stream position " +
×
NEW
669
            std::to_string(seek_pos) + ". Reason: " + ex.what());
×
670
    }
671

672
    std::vector<byte> sector(static_cast<std::size_t>(sector_size()), 0);
2,743✔
673
    try
674
    {
675
        in_->read(reinterpret_cast<char *>(sector.data()), static_cast<std::streamsize>(sector_size()));
2,743✔
676
    }
NEW
677
    catch (const std::exception &ex)
×
678
    {
NEW
679
        throw xlnt::invalid_file("Failed reading compound document sector " + std::to_string(id) + ": could not read " + std::to_string(sector_size()) +
×
NEW
680
            " bytes from stream position " + std::to_string(seek_pos) + ". Reason: " + ex.what());
×
681
    }
682

683
    writer.append(sector);
2,743✔
684
    in_->exceptions(previous_exception_mask);
2,743✔
685
}
2,743✔
686

687
template <typename T>
688
void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer)
689
{
690
    for (sector_id link : follow_chain(start, FAT_))
691
    {
692
        read_sector(link, writer);
693
    }
694
}
695

696
template <typename T>
697
void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
698
{
699
    sector_chain chain = follow_chain(start, FAT_);
700

701
    for (std::size_t i = 0; i < count; ++i)
702
    {
703
        read_sector(chain.at(offset + i), writer);
704
    }
705
}
706

707
template <typename T>
708
void compound_document::read_mini_sector(sector_id id, binary_writer<T> &writer)
218✔
709
{
710
    const sector_chain container_chain = follow_chain(entries_.at(0), FAT_);
218✔
711
    std::vector<byte> container;
218✔
712
    binary_writer<byte> container_writer(container);
218✔
713

714
    for (sector_id sector : container_chain)
790✔
715
    {
716
        read_sector(sector, container_writer);
572✔
717
    }
718

719
    binary_reader<byte> container_reader(container);
218✔
720
    container_reader.offset(static_cast<std::size_t>(id * mini_sector_size()));
218✔
721

722
    writer.append(container_reader, static_cast<std::size_t>(mini_sector_size()));
218✔
723
}
218✔
724

725
template <typename T>
726
void compound_document::read_mini_sector_chain(sector_id start, binary_writer<T> &writer)
727
{
728
    for (sector_id link : follow_chain(start, mini_FAT_))
729
    {
730
        read_mini_sector(link, writer);
731
    }
732
}
733

734
template <typename T>
735
void compound_document::read_mini_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
736
{
737
    sector_chain chain = follow_chain(start, mini_FAT_);
738

739
    for (std::size_t i = 0; i < count; ++i)
740
    {
741
        read_mini_sector(chain.at(offset + i), writer);
742
    }
743
}
744

745
sector_id compound_document::allocate_sector()
1,010✔
746
{
747
    const auto sectors_per_sector = static_cast<std::size_t>(sector_size() / sizeof(sector_id));
1,010✔
748
    auto next_free_iter = std::find(FAT_.begin(), FAT_.end(), FREESECT);
1,010✔
749

750
    if (next_free_iter == FAT_.end())
1,010✔
751
    {
752
        std::uint32_t next_DIFAT_index = header_.num_FAT_sectors;
11✔
753
        auto new_FAT_sector_id = static_cast<sector_id>(FAT_.size());
11✔
754

755
        DIFAT_.push_back(new_FAT_sector_id);
11✔
756
        write_DIFAT();
11✔
757

758
        header_.DIFAT.at(DIFAT_.size() - 1) = new_FAT_sector_id;
11✔
759
        ++header_.num_FAT_sectors;
11✔
760
        write_header();
11✔
761

762
        FAT_.resize(FAT_.size() + sectors_per_sector, FREESECT);
11✔
763
        FAT_.at(new_FAT_sector_id) = FATSECT;
11✔
764

765
        binary_reader<sector_id> FAT_reader(FAT_);
11✔
766
        FAT_reader.offset(next_DIFAT_index * sectors_per_sector);
11✔
767
        write_sector(FAT_reader, new_FAT_sector_id);
11✔
768

769
        next_free_iter = std::find(FAT_.begin(), FAT_.end(), FREESECT);
11✔
770
    }
771

772
    auto next_free = static_cast<sector_id>(next_free_iter - FAT_.begin());
1,010✔
773
    FAT_.at(next_free) = ENDOFCHAIN;
1,010✔
774

775
    write_FAT();
1,010✔
776

777
    std::vector<byte> empty_sector(static_cast<std::size_t>(sector_size()));
1,010✔
778
    binary_reader<byte> empty_sector_reader(empty_sector);
1,010✔
779
    write_sector(empty_sector_reader, next_free);
1,010✔
780

781
    return next_free;
1,010✔
782
}
1,010✔
783

784
sector_chain compound_document::allocate_sectors(std::size_t count)
4✔
785
{
786
    if (count == 0) return {};
4!
787

788
    sector_chain chain;
4✔
789
    chain.reserve(count);
4✔
790
    sector_id current = allocate_sector();
4✔
791

792
    for (std::size_t i = 1; i < count; ++i)
32✔
793
    {
794
        chain.push_back(current);
28✔
795
        sector_id next = allocate_sector();
28✔
796
        FAT_.at(current) = next;
28✔
797
        current = next;
28✔
798
    }
799

800
    chain.push_back(current);
4✔
801
    write_FAT();
4✔
802

803
    return chain;
4✔
804
}
4✔
805

806
sector_chain compound_document::follow_chain(sector_id start, const sector_chain &table)
1,029✔
807
{
808
    sector_chain chain;
1,029✔
809
    sector_id current = start;
1,029✔
810

811
    while (!is_chain_end(current))
238,987✔
812
    {
813
        chain.push_back(current);
237,958✔
814
        current = table.at(current);
237,958✔
815
    }
816

817
    return chain;
1,029✔
818
}
×
819

820
sector_chain compound_document::follow_chain(const compound_document_entry &entry, const sector_chain &table)
997✔
821
{
822
    if (has_invalid_start_sector(entry))
997!
823
    {
NEW
824
        return {};
×
825
    }
826
    else
827
    {
828
        return follow_chain(entry.start_sector, table);
997✔
829
    }
830
}
831

832
sector_id compound_document::chain_sector_at_index(sector_id start, const sector_chain &table, std::uint64_t index)
4,208✔
833
{
834
    std::uint64_t current_index = 0;
4,208✔
835
    sector_id current = start;
4,208✔
836

837
    while (!is_chain_end(current) && current_index < index)
4,316!
838
    {
839
        current = table.at(current);
108✔
840
        ++current_index;
108✔
841
    }
842

843
    if (current_index == index)
4,208!
844
    {
845
        return current;
4,208✔
846
    }
847

NEW
848
    return ENDOFCHAIN;
×
849
}
850

NEW
851
sector_id compound_document::chain_sector_at_index(const compound_document_entry &entry, const sector_chain &table, std::uint64_t index)
×
852
{
NEW
853
    if (has_invalid_start_sector(entry))
×
854
    {
NEW
855
        return ENDOFCHAIN;
×
856
    }
857
    else
858
    {
NEW
859
        return chain_sector_at_index(entry.start_sector, table, index);
×
860
    }
861
}
862

863
sector_id compound_document::last_chain_sector(sector_id start, const sector_chain &table, std::uint64_t *num_sectors_out)
32✔
864
{
865
    sector_id current = start;
32✔
866
    std::uint64_t num_sectors = 0;
32✔
867

868
    while (!is_chain_end(current))
82✔
869
    {
870
        ++num_sectors;
50✔
871
        current = table.at(current);
50✔
872
    }
873

874
    if (num_sectors_out != nullptr)
32!
875
    {
876
        *num_sectors_out = num_sectors;
32✔
877
    }
878

879
    return current;
32✔
880
}
881

NEW
882
sector_id compound_document::last_chain_sector(const compound_document_entry &entry, const sector_chain &table, std::uint64_t *num_sectors_out)
×
883
{
NEW
884
    if (has_invalid_start_sector(entry))
×
885
    {
NEW
886
        if (num_sectors_out != nullptr)
×
887
        {
NEW
888
            *num_sectors_out = 0;
×
889
        }
NEW
890
        return ENDOFCHAIN;
×
891
    }
892
    else
893
    {
NEW
894
        return last_chain_sector(entry.start_sector, table, num_sectors_out);
×
895
    }
896
}
897

NEW
898
sector_chain compound_document::allocate_mini_sectors(std::size_t count)
×
899
{
NEW
900
    if (count == 0) return {};
×
901

NEW
902
    sector_chain chain;
×
NEW
903
    chain.reserve(count);
×
NEW
904
    sector_id current = allocate_mini_sector();
×
905

NEW
906
    for (std::size_t i = 1; i < count; ++i)
×
907
    {
UNCOV
908
        chain.push_back(current);
×
NEW
909
        sector_id next = allocate_mini_sector();
×
NEW
910
        mini_FAT_.at(current) = next;
×
UNCOV
911
        current = next;
×
912
    }
913

UNCOV
914
    chain.push_back(current);
×
NEW
915
    write_mini_FAT();
×
916

UNCOV
917
    return chain;
×
UNCOV
918
}
×
919

NEW
920
sector_id compound_document::allocate_mini_sector()
×
921
{
NEW
922
    const auto sectors_per_sector = static_cast<std::size_t>(sector_size() / sizeof(sector_id));
×
NEW
923
    auto next_free_iter = std::find(mini_FAT_.begin(), mini_FAT_.end(), FREESECT);
×
924

NEW
925
    if (next_free_iter == mini_FAT_.end())
×
926
    {
NEW
927
        sector_id new_mini_FAT_sector_id = allocate_sector();
×
928

NEW
929
        if (is_chain_end(header_.mini_FAT_start_sector))
×
930
        {
NEW
931
            header_.mini_FAT_start_sector = new_mini_FAT_sector_id;
×
932
        }
933
        else
934
        {
NEW
935
            sector_id last_mini_FAT_sector = last_chain_sector(header_.mini_FAT_start_sector, FAT_);
×
NEW
936
            FAT_.at(last_mini_FAT_sector) = new_mini_FAT_sector_id;
×
NEW
937
            write_FAT();
×
938
        }
939

UNCOV
940
        write_header();
×
941

NEW
942
        std::size_t old_size = mini_FAT_.size();
×
NEW
943
        mini_FAT_.resize(old_size + sectors_per_sector, FREESECT);
×
944

NEW
945
        binary_reader<sector_id> mini_FAT_reader(mini_FAT_);
×
NEW
946
        mini_FAT_reader.offset(old_size / sectors_per_sector);
×
NEW
947
        write_sector(mini_FAT_reader, new_mini_FAT_sector_id);
×
948

NEW
949
        next_free_iter = std::find(mini_FAT_.begin(), mini_FAT_.end(), FREESECT);
×
950
    }
951

NEW
952
    ++header_.num_mini_FAT_sectors;
×
UNCOV
953
    write_header();
×
954

NEW
955
    auto next_free = static_cast<sector_id>(next_free_iter - mini_FAT_.begin());
×
NEW
956
    mini_FAT_.at(next_free) = ENDOFCHAIN;
×
957

NEW
958
    write_mini_FAT();
×
959

NEW
960
    const std::uint64_t mini_sectors_per_sector = sector_size() / mini_sector_size();
×
NEW
961
    const std::uint64_t required_container_sectors = next_free / mini_sectors_per_sector + 1;
×
962

UNCOV
963
    if (required_container_sectors > 0)
×
964
    {
NEW
965
        if (has_invalid_start_sector(entries_.at(0)))
×
966
        {
NEW
967
            entries_.at(0).start_sector = allocate_sector();
×
UNCOV
968
            write_entry(0);
×
969
        }
970

NEW
971
        std::uint64_t num_sectors = 0;
×
NEW
972
        sector_id container_last_sector = last_chain_sector(entries_.at(0), FAT_, &num_sectors);
×
973

NEW
974
        if (required_container_sectors > num_sectors)
×
975
        {
NEW
976
            FAT_.at(container_last_sector) = allocate_sector();
×
NEW
977
            write_FAT();
×
978
        }
979
    }
980

UNCOV
981
    return next_free;
×
982
}
983

984
directory_id compound_document::next_unallocated_entry()
12✔
985
{
986
    directory_id entry_id = 0;
12✔
987

988
    for (; entry_id < entries_.size(); ++entry_id)
24✔
989
    {
990
        if (entries_.at(entry_id).type == compound_document_entry::entry_type::Unallocated)
20✔
991
        {
992
            return entry_id;
8✔
993
        }
994
    }
995

996
    // entry_id is now equal to entries_.size()
997

998
    if (is_chain_end(header_.directory_start_sector))
4!
999
    {
1000
        header_.directory_start_sector = allocate_sector();
4✔
1001
    }
1002
    else
1003
    {
NEW
1004
        sector_id last_directory_sector = last_chain_sector(header_.directory_start_sector, FAT_);
×
NEW
1005
        FAT_.at(last_directory_sector) = allocate_sector();
×
NEW
1006
        write_FAT();
×
1007
    }
1008

1009
    const auto entries_per_sector = static_cast<std::size_t>(sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE);
4✔
1010

1011
    entries_.reserve(entries_.size() + entries_per_sector);
4✔
1012
    for (std::size_t i = 0; i < entries_per_sector; ++i)
20✔
1013
    {
1014
        entries_.emplace_back();
16✔
1015
        write_entry(entry_id + static_cast<directory_id>(i));
16✔
1016
    }
1017

1018
    return entry_id;
4✔
1019
}
1020

1021
directory_id compound_document::insert_entry(
12✔
1022
    const std::string &name,
1023
    compound_document_entry::entry_type type)
1024
{
1025
    directory_id entry_id = next_unallocated_entry();
12✔
1026
    compound_document_entry &entry = entries_.at(entry_id);
12✔
1027

1028
    directory_id parent_id = 0;
12✔
1029
    std::vector<std::string> split = split_path(name);
12✔
1030
    std::string filename = last_elem(split);
12✔
1031
    split.pop_back();
12✔
1032

1033
    if (split.size() > 1)
12!
1034
    {
NEW
1035
        std::string joined_path = join_path(split);
×
NEW
1036
        parent_id = find_entry(joined_path, compound_document_entry::entry_type::Storage);
×
1037

NEW
1038
        if (is_invalid_entry(parent_id))
×
1039
        {
NEW
1040
            throw xlnt::key_not_found("parent compound document entry of type 1 (Storage) not found at path \"" + joined_path + "\", "
×
NEW
1041
                "necessary to insert entry \"" + name + "\" of type " + std::to_string(static_cast<int>(type)));
×
1042
        }
1043

1044
        parent_storage_[entry_id] = parent_id;
×
1045
    }
×
1046

1047
    entry.name(filename);
12✔
1048
    entry.type = type;
12✔
1049

1050
    tree_insert(entry_id, parent_id);
12✔
1051
    write_directory();
12✔
1052

1053
    return entry_id;
12✔
1054
}
12✔
1055

1056
std::uint64_t compound_document::sector_data_start()
17,350✔
1057
{
1058
    return sizeof(compound_document_header);
17,350✔
1059
}
1060

1061
bool compound_document::contains_entry(const std::string &path,
72✔
1062
    compound_document_entry::entry_type type)
1063
{
1064
    return !is_invalid_entry(find_entry(path, type));
72✔
1065
}
1066

1067
directory_id compound_document::find_entry(const std::string &name,
136✔
1068
    compound_document_entry::entry_type type)
1069
{
1070
    if (type == compound_document_entry::entry_type::RootStorage
136✔
1071
        && (name == "/" || name == "/Root Entry")) return 0;
136!
1072

1073
    directory_id entry_id = 0;
136✔
1074

1075
    for (const compound_document_entry &entry : entries_)
456✔
1076
    {
1077
        if (entry.type == type && tree_path(entry_id) == name)
448!
1078
        {
1079
            return entry_id;
128✔
1080
        }
1081

1082
        ++entry_id;
320✔
1083
    }
1084

1085
    return NOSTREAM;
8✔
1086
}
1087

1088
void compound_document::print_directory()
×
1089
{
NEW
1090
    directory_id entry_id = 0;
×
1091

NEW
1092
    for (const compound_document_entry &entry : entries_)
×
1093
    {
NEW
1094
        if (entry.type == compound_document_entry::entry_type::Stream)
×
1095
        {
1096
            std::cout << tree_path(entry_id) << std::endl;
×
1097
        }
1098

1099
        ++entry_id;
×
1100
    }
1101
}
×
1102

1103
void compound_document::write_directory()
998✔
1104
{
1105
    for (std::size_t entry_id = 0; entry_id < entries_.size(); ++entry_id)
4,990✔
1106
    {
1107
        write_entry(static_cast<directory_id>(entry_id));
3,992✔
1108
    }
1109
}
998✔
1110

1111
void compound_document::read_directory()
32✔
1112
{
1113
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
32✔
1114
    std::uint64_t num_sectors = 0;
32✔
1115
    last_chain_sector(header_.directory_start_sector, FAT_, &num_sectors);
32✔
1116
    const std::size_t num_entries = static_cast<std::size_t>(num_sectors * entries_per_sector);
32✔
1117

1118
    entries_.reserve(entries_.size() + num_entries);
32✔
1119
    for (std::size_t entry_id = 0; entry_id < num_entries; ++entry_id)
232✔
1120
    {
1121
        entries_.emplace_back();
200✔
1122
        read_entry(static_cast<directory_id>(entry_id));
200✔
1123
    }
1124

1125
    std::vector<directory_id> stack;
32✔
1126
    std::vector<directory_id> storage_siblings;
32✔
1127
    std::vector<directory_id> stream_siblings;
32✔
1128

1129
    std::vector<directory_id> directory_stack;
32✔
1130
    directory_stack.push_back(0u);
32✔
1131

1132
    while (!directory_stack.empty())
100✔
1133
    {
1134
        directory_id current_storage_id = directory_stack.back();
68✔
1135
        directory_stack.pop_back();
68✔
1136

1137
        if (is_invalid_entry(tree_child(current_storage_id))) continue;
68!
1138

1139
        std::vector<directory_id> storage_stack;
68✔
1140
        directory_id storage_root_id = tree_child(current_storage_id);
68✔
1141
        parent_[storage_root_id] = NOSTREAM;
68✔
1142
        storage_stack.push_back(storage_root_id);
68✔
1143

1144
        while (!storage_stack.empty())
204✔
1145
        {
1146
            directory_id current_entry_id = storage_stack.back();
136✔
1147
            const compound_document_entry &current_entry = entries_.at(current_entry_id);
136✔
1148
            storage_stack.pop_back();
136✔
1149

1150
            parent_storage_[current_entry_id] = current_storage_id;
136✔
1151

1152
            if (current_entry.type == compound_document_entry::entry_type::Storage)
136✔
1153
            {
1154
                directory_stack.push_back(current_entry_id);
36✔
1155
            }
1156

1157
            if (!is_invalid_entry(tree_left(current_entry_id)))
136✔
1158
            {
1159
                storage_stack.push_back(tree_left(current_entry_id));
18✔
1160
                tree_parent(tree_left(current_entry_id)) = current_entry_id;
18✔
1161
            }
1162

1163
            if (!is_invalid_entry(tree_right(current_entry_id)))
136✔
1164
            {
1165
                storage_stack.push_back(tree_right(current_entry_id));
50✔
1166
                tree_parent(tree_right(current_entry_id)) = current_entry_id;
50✔
1167
            }
1168
        }
1169
    }
68✔
1170
}
32✔
1171

1172
void compound_document::tree_insert(directory_id new_id, directory_id storage_id)
12✔
1173
{
1174
    using entry_color = compound_document_entry::entry_color;
1175

1176
    parent_storage_[new_id] = storage_id;
12✔
1177

1178
    tree_left(new_id) = NOSTREAM;
12✔
1179
    tree_right(new_id) = NOSTREAM;
12✔
1180

1181
    if (is_invalid_entry(tree_root(new_id)))
12✔
1182
    {
1183
        if (new_id != 0)
8✔
1184
        {
1185
            tree_root(new_id) = new_id;
4✔
1186
        }
1187

1188
        tree_color(new_id) = entry_color::Black;
8✔
1189
        tree_parent(new_id) = NOSTREAM;
8✔
1190

1191
        return;
8✔
1192
    }
1193

1194
    // normal tree insert
1195
    // (will probably unbalance the tree, fix after)
1196
    directory_id x = tree_root(new_id);
4✔
1197
    directory_id y = NOSTREAM;
4✔
1198

1199
    while (!is_invalid_entry(x))
8✔
1200
    {
1201
        y = x;
4✔
1202

1203
        if (compare_keys(tree_key(new_id), tree_key(x)) > 0)
4!
1204
        {
1205
            x = tree_right(x);
×
1206
        }
1207
        else
1208
        {
1209
            x = tree_left(x);
4✔
1210
        }
1211
    }
1212

1213
    tree_parent(new_id) = y;
4✔
1214

1215
    if (compare_keys(tree_key(new_id), tree_key(y)) > 0)
4!
1216
    {
1217
        tree_right(y) = new_id;
×
1218
    }
1219
    else
1220
    {
1221
        tree_left(y) = new_id;
4✔
1222
    }
1223

1224
    tree_insert_fixup(new_id);
4✔
1225
}
1226

1227
std::string compound_document::tree_path(directory_id id)
244✔
1228
{
1229
    directory_id storage_id = parent_storage_.at(id);
244✔
1230
    std::vector<std::string> result;
244✔
1231

1232
    while (storage_id > 0)
328✔
1233
    {
1234
        storage_id = parent_storage_.at(storage_id);
84✔
1235
        result.emplace_back(entries_.at(storage_id).name());
84✔
1236
    }
1237

1238
    return "/" + join_path(result) + entries_.at(id).name();
488✔
1239
}
244✔
1240

1241
void compound_document::tree_rotate_left(directory_id x)
×
1242
{
NEW
1243
    directory_id y = tree_right(x);
×
1244

1245
    // turn y's left subtree into x's right subtree
1246
    tree_right(x) = tree_left(y);
×
1247

NEW
1248
    if (!is_invalid_entry(tree_left(y)))
×
1249
    {
1250
        tree_parent(tree_left(y)) = x;
×
1251
    }
1252

1253
    // link x's parent to y
1254
    tree_parent(y) = tree_parent(x);
×
1255

NEW
1256
    if (is_invalid_entry(tree_parent(x)))
×
1257
    {
1258
        tree_root(x) = y;
×
1259
    }
1260
    else if (x == tree_left(tree_parent(x)))
×
1261
    {
1262
        tree_left(tree_parent(x)) = y;
×
1263
    }
1264
    else
1265
    {
1266
        tree_right(tree_parent(x)) = y;
×
1267
    }
1268

1269
    // put x on y's left
1270
    tree_left(y) = x;
×
1271
    tree_parent(x) = y;
×
1272
}
×
1273

1274
void compound_document::tree_rotate_right(directory_id y)
×
1275
{
NEW
1276
    directory_id x = tree_left(y);
×
1277

1278
    // turn x's right subtree into y's left subtree
1279
    tree_left(y) = tree_right(x);
×
1280

NEW
1281
    if (!is_invalid_entry(tree_right(x)))
×
1282
    {
1283
        tree_parent(tree_right(x)) = y;
×
1284
    }
1285

1286
    // link y's parent to x
1287
    tree_parent(x) = tree_parent(y);
×
1288

NEW
1289
    if (is_invalid_entry(tree_parent(y)))
×
1290
    {
1291
        tree_root(y) = x;
×
1292
    }
1293
    else if (y == tree_left(tree_parent(y)))
×
1294
    {
1295
        tree_left(tree_parent(y)) = x;
×
1296
    }
1297
    else
1298
    {
1299
        tree_right(tree_parent(y)) = x;
×
1300
    }
1301

1302
    // put y on x's right
1303
    tree_right(x) = y;
×
1304
    tree_parent(y) = x;
×
1305
}
×
1306

1307
void compound_document::tree_insert_fixup(directory_id x)
4✔
1308
{
1309
    using entry_color = compound_document_entry::entry_color;
1310

1311
    tree_color(x) = entry_color::Red;
4✔
1312

1313
    while (x != tree_root(x) && tree_color(tree_parent(x)) == entry_color::Red)
4!
1314
    {
1315
        if (tree_parent(x) == tree_left(tree_parent(tree_parent(x))))
×
1316
        {
NEW
1317
            directory_id y = tree_right(tree_parent(tree_parent(x)));
×
1318

NEW
1319
            if (!is_invalid_entry(y) && tree_color(y) == entry_color::Red)
×
1320
            {
1321
                // case 1
1322
                tree_color(tree_parent(x)) = entry_color::Black;
×
1323
                tree_color(y) = entry_color::Black;
×
1324
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1325
                x = tree_parent(tree_parent(x));
×
1326
            }
1327
            else
1328
            {
1329
                if (x == tree_right(tree_parent(x)))
×
1330
                {
1331
                    // case 2
1332
                    x = tree_parent(x);
×
1333
                    tree_rotate_left(x);
×
1334
                }
1335

1336
                // case 3
1337
                tree_color(tree_parent(x)) = entry_color::Black;
×
1338
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1339
                tree_rotate_right(tree_parent(tree_parent(x)));
×
1340
            }
1341
        }
1342
        else // same as above with left and right switched
1343
        {
NEW
1344
            directory_id y = tree_left(tree_parent(tree_parent(x)));
×
1345

NEW
1346
            if (!is_invalid_entry(y) && tree_color(y) == entry_color::Red)
×
1347
            {
1348
                //case 1
1349
                tree_color(tree_parent(x)) = entry_color::Black;
×
1350
                tree_color(y) = entry_color::Black;
×
1351
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1352
                x = tree_parent(tree_parent(x));
×
1353
            }
1354
            else
1355
            {
1356
                if (x == tree_left(tree_parent(x)))
×
1357
                {
1358
                    // case 2
1359
                    x = tree_parent(x);
×
1360
                    tree_rotate_right(x);
×
1361
                }
1362

1363
                // case 3
1364
                tree_color(tree_parent(x)) = entry_color::Black;
×
1365
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1366
                tree_rotate_left(tree_parent(tree_parent(x)));
×
1367
            }
1368
        }
1369
    }
1370

1371
    tree_color(tree_root(x)) = entry_color::Black;
4✔
1372
}
4✔
1373

1374
directory_id &compound_document::tree_left(directory_id id)
192✔
1375
{
1376
    return entries_.at(id).left_sibling;
192✔
1377
}
1378

1379
directory_id &compound_document::tree_right(directory_id id)
248✔
1380
{
1381
    return entries_.at(id).right_sibling;
248✔
1382
}
1383

1384
directory_id &compound_document::tree_parent(directory_id id)
84✔
1385
{
1386
    // Note: the parent will be created, if it does not yet exist. This is fine.
1387
    return parent_[id];
84✔
1388
}
1389

1390
directory_id &compound_document::tree_root(directory_id id)
28✔
1391
{
1392
    return tree_child(parent_storage_.at(id));
28✔
1393
}
1394

1395
directory_id &compound_document::tree_child(directory_id id)
164✔
1396
{
1397
    return entries_.at(id).child;
164✔
1398
}
1399

1400
std::string compound_document::tree_key(directory_id id)
16✔
1401
{
1402
    return entries_.at(id).name();
16✔
1403
}
1404

1405
compound_document_entry::entry_color &compound_document::tree_color(directory_id id)
20✔
1406
{
1407
    return entries_.at(id).color;
20✔
1408
}
1409

1410
void compound_document::read_header()
32✔
1411
{
1412
    // Exception handling could provide useful information about why errors have occurred.
1413
    auto previous_exception_mask = in_->exceptions();
32✔
1414
    in_->exceptions(std::istream::failbit | std::istream::badbit);
32✔
1415

1416
    try
1417
    {
1418
        in_->seekg(0, std::ios::beg);
32✔
1419
    }
NEW
1420
    catch (const std::exception &ex)
×
1421
    {
NEW
1422
        throw xlnt::invalid_file("Failed reading compound document header: could not seek to position 0. Reason: " + std::string(ex.what()));
×
NEW
1423
    }
×
1424

1425
    try
1426
    {
1427
        in_->read(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
32✔
1428
    }
NEW
1429
    catch (const std::exception &ex)
×
1430
    {
NEW
1431
        throw xlnt::invalid_file("Failed reading compound document header: could not read " + std::to_string(sizeof(compound_document_header)) +
×
NEW
1432
            " bytes of header at position 0. Reason: " + ex.what());
×
NEW
1433
    }
×
1434

1435
    // Header Signature (8 bytes): Identification signature for the compound file structure, and MUST be
1436
    // set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1.
1437
    if (header_.header_signature != 0xE11AB1A1E011CFD0)
32!
1438
    {
NEW
1439
        throw xlnt::invalid_file("invalid header signature, expected 0xE11AB1A1E011CFD0 but got " + format_hex(header_.header_signature));
×
1440
    }
1441

1442
    // Header CLSID (16 bytes): Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL).
1443
    if (std::any_of(header_.header_clsid.begin(), header_.header_clsid.end(), [](std::uint8_t i) { return i != 0; }))
544!
1444
    {
NEW
1445
        std::string exception_str = "invalid header CLSID, expected only zeros but got: ";
×
NEW
1446
        for (std::uint8_t val : header_.header_clsid)
×
1447
        {
NEW
1448
            exception_str += fmt::format("{:02x} ", val);
×
1449
        }
NEW
1450
        throw xlnt::invalid_file(exception_str);
×
NEW
1451
    }
×
1452

1453
    // Major Version (2 bytes): Version number for breaking changes. This field MUST be set to either
1454
    // 0x0003 (version 3) or 0x0004 (version 4).
1455
    if (header_.major_version != 3 && header_.major_version != 4)
32!
1456
    {
NEW
1457
        throw xlnt::invalid_file("invalid major version, expected 3 or 4 but got " + std::to_string(header_.major_version));
×
1458
    }
1459

1460
    // Byte Order (2 bytes): This field MUST be set to 0xFFFE. This field is a byte order mark for all integer
1461
    // fields, specifying little-endian byte order.
1462
    if (header_.byte_order != compound_document_header::byte_order_type::little_endian)
32!
1463
    {
NEW
1464
        throw xlnt::invalid_file("invalid byte order, expected 0xFFFE (little-endian) but got " +
×
NEW
1465
            fmt::format("0x{:04X}", static_cast<std::uint16_t>(header_.byte_order)));
×
1466
    }
1467

1468
    // Sector Shift (2 bytes): This field MUST be set to 0x0009, or 0x000c, depending on the Major
1469
    // Version field. This field specifies the sector size of the compound file as a power of 2.
1470
    // - If Major Version is 3, the Sector Shift MUST be 0x0009, specifying a sector size of 512 bytes.
1471
    // - If Major Version is 4, the Sector Shift MUST be 0x000C, specifying a sector size of 4096 bytes.
1472
    if (!((header_.major_version == 3 && header_.sector_shift == 0x0009) ||
32!
NEW
1473
        (header_.major_version == 4 && header_.sector_shift == 0x000C)))
×
1474
    {
NEW
1475
        throw xlnt::invalid_file("invalid combination of sector shift and major version, got sector_shift = " +
×
NEW
1476
            fmt::format("0x{:04X}", header_.sector_shift) + "; major_version = " + std::to_string(header_.major_version));
×
1477
    }
1478

1479
    // Mini Sector Shift (2 bytes): This field MUST be set to 0x0006. This field specifies the sector size of
1480
    // the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes.
1481
    if (header_.mini_sector_shift != 0x0006)
32!
1482
    {
NEW
1483
        throw xlnt::invalid_file("invalid mini sector shift, expected 0x0006 but got " + fmt::format("0x{:04X}", header_.mini_sector_shift));
×
1484
    }
1485

1486
    // Reserved (6 bytes): This field MUST be set to all zeroes.
1487
    if (std::any_of(header_.reserved.begin(), header_.reserved.end(), [](std::uint8_t i) { return i != 0; }))
224!
1488
    {
NEW
1489
        std::string exception_str = "invalid reserved field, expected only zeros but got: ";
×
NEW
1490
        for (std::uint8_t val : header_.reserved)
×
1491
        {
NEW
1492
            exception_str += fmt::format("{:02x} ", val);
×
1493
        }
NEW
1494
        throw xlnt::invalid_file(exception_str);
×
NEW
1495
    }
×
1496

1497
    // Number of Directory Sectors (4 bytes): This integer field contains the count of the number of
1498
    // directory sectors in the compound file.
1499
    // - If Major Version is 3, the Number of Directory Sectors MUST be zero. This field is not
1500
    //   supported for version 3 compound files.
1501
    if (header_.major_version == 3 && header_.num_directory_sectors != 0)
32!
1502
    {
NEW
1503
        throw xlnt::invalid_file("invalid number of directory sectors for major version 3: expected 0 directory sectors but got " +
×
NEW
1504
            std::to_string(header_.num_directory_sectors));
×
1505
    }
1506

1507
    // Mini Stream Cutoff Size (4 bytes): This integer field MUST be set to 0x00001000. This field
1508
    // specifies the maximum size of a user-defined data stream that is allocated from the mini FAT
1509
    // and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than
1510
    // or equal to this cutoff size must be allocated as normal sectors from the FAT.
1511
    if (header_.mini_stream_cutoff_size != 0x00001000)
32!
1512
    {
NEW
1513
        throw xlnt::invalid_file("invalid mini stream cutoff size, expected 0x00001000 but got " + format_hex(header_.mini_stream_cutoff_size));
×
1514
    }
1515

1516
    // DIFAT (436 bytes): This array of 32-bit integer fields contains the first 109 FAT sector locations of
1517
    // the compound file.
1518
    // - For version 4 compound files, the header size (512 bytes) is less than the sector size (4,096
1519
    //   bytes), so the remaining part of the header (3,584 bytes) MUST be filled with all zeroes.
1520
    if (header_.major_version == 4)
32!
1521
    {
NEW
1522
        std::array<std::uint8_t, 3584> remaining {{ 0 }};
×
1523

1524
        try
1525
        {
NEW
1526
            in_->read(reinterpret_cast<char *>(remaining.data()), sizeof(remaining));
×
1527
        }
NEW
1528
        catch (const std::exception &ex)
×
1529
        {
NEW
1530
            throw xlnt::invalid_file("Failed reading compound document header: could not read " + std::to_string(sizeof(remaining)) +
×
NEW
1531
                " bytes of remaining data. Reason: " + ex.what());
×
NEW
1532
        }
×
1533

NEW
1534
        if (std::any_of(remaining.begin(), remaining.end(), [](std::uint8_t i) { return i != 0; }))
×
1535
        {
NEW
1536
            std::string exception_str = "invalid remaining bytes in header (major version 4), expected only zeros but got: ";
×
NEW
1537
            for (std::uint8_t val : remaining)
×
1538
            {
NEW
1539
                exception_str += fmt::format("{:02x} ", val);
×
1540
            }
NEW
1541
            throw xlnt::invalid_file(exception_str);
×
NEW
1542
        }
×
1543
    }
1544

1545
    in_->exceptions(previous_exception_mask);
32✔
1546
}
32✔
1547

1548
void compound_document::read_DIFAT()
32✔
1549
{
1550
    DIFAT_.clear();
32✔
1551

1552
    sector_id DIFAT_sector = header_.DIFAT_start_sector;
32✔
1553
    binary_writer<sector_id> DIFAT_writer(DIFAT_);
32✔
1554

1555
    for (std::uint32_t i = 0u; i < header_.num_FAT_sectors; ++i)
91✔
1556
    {
1557
        if (i < 109u)
59!
1558
        {
1559
            DIFAT_writer.write(header_.DIFAT.at(i));
59✔
1560
        }
1561
        else
1562
        {
NEW
1563
            read_sector(DIFAT_sector, DIFAT_writer);
×
1564

NEW
1565
            DIFAT_sector = last_elem(DIFAT_);
×
NEW
1566
            DIFAT_.pop_back();
×
1567
        }
1568
    }
1569
}
32✔
1570

1571
void compound_document::read_FAT()
32✔
1572
{
1573
    FAT_.clear();
32✔
1574
    binary_writer<sector_id> FAT_writer(FAT_);
32✔
1575

1576
    for (sector_id DIFAT_sector : DIFAT_)
91✔
1577
    {
1578
        read_sector(DIFAT_sector, FAT_writer);
59✔
1579
    }
1580
}
32✔
1581

1582
void compound_document::read_mini_FAT()
32✔
1583
{
1584
    mini_FAT_.clear();
32✔
1585
    binary_writer<sector_id> mini_FAT_writer(mini_FAT_);
32✔
1586

1587
    for (sector_id mini_FAT_sector : follow_chain(header_.mini_FAT_start_sector, FAT_))
64✔
1588
    {
1589
        read_sector(mini_FAT_sector, mini_FAT_writer);
32✔
1590
    }
32✔
1591
}
32✔
1592

NEW
1593
std::string compound_document_entry::format_info(
×
1594
    directory_id entry_id,
1595
    sector_id sector_id,
1596
    /// IMPORTANT: only show the name after the name and its length have been validated!
1597
    bool show_entry_name) const
1598
{
1599
    // The formatted IDs should be as short as possible to keep the exception message readable - so we do not add leading zeros.
NEW
1600
    std::string message = "(entry " + fmt::format("0x{:X}", entry_id);
×
NEW
1601
    if (show_entry_name)
×
1602
    {
NEW
1603
        message += " with name \"";
×
1604
        // Only add the name if the conversion does not throw an exception itself!
1605
        try
1606
        {
NEW
1607
            message += name();
×
1608
        }
NEW
1609
        catch (const std::exception &ex)
×
1610
        {
NEW
1611
            message += "INVALID (";
×
NEW
1612
            message += ex.what();
×
NEW
1613
            message.push_back(')');
×
NEW
1614
        }
×
NEW
1615
        message.push_back('"');
×
1616
    }
NEW
1617
    message += " of type " + std::to_string(static_cast<int>(type)) +
×
NEW
1618
        " in sector " + fmt::format("0x{:X}", sector_id) + ")";
×
NEW
1619
    return message;
×
NEW
1620
}
×
1621

1622
void check_unallocated_entry(
32✔
1623
    const compound_document_entry &entry,
1624
    directory_id id,
1625
    sector_id directory_sector)
1626
{
1627
    if (entry.type != compound_document_entry::entry_type::Unallocated)
32!
1628
    {
NEW
1629
        throw xlnt::invalid_parameter("invalid entry type " +
×
NEW
1630
            entry.format_info(id, directory_sector, false) +
×
NEW
1631
            ", expected 0 (Unallocated) but got " + std::to_string(static_cast<int>(entry.type)));
×
1632
    }
1633

1634
    // Free (unused) directory entries are marked with Object Type 0x0 (unknown or unallocated). The
1635
    // entire directory entry must consist of all zeroes except for the child, right sibling, and left sibling
1636
    // pointers, which must be initialized to NOSTREAM (0xFFFFFFFF).
1637

1638
    // NOTE: Some implementations seem to not initialize this buffer at all, so we cannot check it for correctness.
1639
    /*if (std::any_of(entry.directory_entry_name.begin(), entry.directory_entry_name.end(), [](char16_t i) { return i != 0; }))
1640
    {
1641
        std::string exception_str = "invalid directory entry name " +
1642
            entry.format_info(id, directory_sector, false) +
1643
            ", expected all zeros but got: ";
1644
        for (char16_t val : entry.directory_entry_name)
1645
        {
1646
            exception_str += fmt::format("{:04x} ", static_cast<std::uint16_t>(val));
1647
        }
1648
        throw xlnt::invalid_file(exception_str);
1649
    }*/
1650

1651
    if (entry.directory_entry_name_length != 0)
32!
1652
    {
NEW
1653
        throw xlnt::invalid_file("invalid directory entry name length " + entry.format_info(id, directory_sector, false) +
×
NEW
1654
            ", expected 0 but got " + std::to_string(entry.directory_entry_name_length));
×
1655
    }
1656

1657
    if (entry.color != compound_document_entry::entry_color::Red)
32!
1658
    {
NEW
1659
        throw xlnt::invalid_file("invalid entry color " + entry.format_info(id, directory_sector, false) +
×
NEW
1660
            ", expected 0 (Red) but got " + std::to_string(static_cast<int>(entry.color)));
×
1661
    }
1662

1663
    if (entry.left_sibling != NOSTREAM || entry.right_sibling != NOSTREAM || entry.child != NOSTREAM)
32!
1664
    {
NEW
1665
        throw xlnt::invalid_file("unallocated entry contains invalid child or sibling " +
×
NEW
1666
            entry.format_info(id, directory_sector, false) +
×
NEW
1667
            ", expected all to be NOSTREAM (0xFFFFFFFF) but got" +
×
NEW
1668
            ": left_sibling = " + fmt::format("0x{:08X}", (entry.left_sibling)) +
×
NEW
1669
            "; right_sibling = " + fmt::format("0x{:08X}", (entry.right_sibling)) +
×
NEW
1670
            "; child = " + fmt::format("0x{:08X}", (entry.child)));
×
1671
    }
1672

1673
    if (std::any_of(entry.clsid.begin(), entry.clsid.end(), [](std::uint8_t i) { return i != 0; }))
544!
1674
    {
NEW
1675
        std::string exception_str = "invalid entry CLSID " + entry.format_info(id, directory_sector, false) +
×
NEW
1676
            ", expected all zeros but got: ";
×
NEW
1677
        for (std::uint8_t val : entry.clsid)
×
1678
        {
NEW
1679
            exception_str += fmt::format("{:02x} ", val);
×
1680
        }
NEW
1681
        throw xlnt::invalid_file(exception_str);
×
NEW
1682
    }
×
1683

1684
    if (entry.state_bits != 0)
32!
1685
    {
NEW
1686
        throw xlnt::invalid_file("invalid entry state bits " + entry.format_info(id, directory_sector, false) +
×
NEW
1687
            ", expected 0 but got " + std::to_string(entry.state_bits));
×
1688
    }
1689

1690
    // NOTE: some implementations seem to use the timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC.
1691
    if (entry.creation_time != 0 && entry.creation_time != 116444736000000000)
32!
1692
    {
NEW
1693
        throw xlnt::invalid_file("invalid entry creation time " + entry.format_info(id, directory_sector, false) +
×
NEW
1694
            ", expected 0 or 116444736000000000, but got " + std::to_string(entry.creation_time));
×
1695
    }
1696

1697
    // NOTE: some implementations seem to use the timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC.
1698
    if (entry.modified_time != 0 && entry.modified_time != 116444736000000000)
32!
1699
    {
NEW
1700
        throw xlnt::invalid_file("invalid entry modification time " + entry.format_info(id, directory_sector, false) +
×
NEW
1701
            ", expected 0 or 116444736000000000, but got " + std::to_string(entry.modified_time));
×
1702
    }
1703

1704
    // According to the specification (see above), it must be 0, but it seems that some immplementations
1705
    // initialize it with ENDOFCHAIN or FREESECT, which is honestly not wrong either. So let's accept that.
1706
    if (entry.start_sector != 0 && entry.start_sector != ENDOFCHAIN && entry.start_sector != FREESECT)
32!
1707
    {
NEW
1708
        throw xlnt::invalid_file("invalid entry start sector location " + entry.format_info(id, directory_sector, false) +
×
NEW
1709
            ", expected 0 or ENDOFCHAIN (0xFFFFFFFE) or FREESECT (0xFFFFFFFF), but got " + format_hex(entry.start_sector));
×
1710
    }
1711

1712
    if (entry.stream_size != 0)
32!
1713
    {
NEW
1714
        throw xlnt::invalid_file("invalid entry stream size " + entry.format_info(id, directory_sector, false) +
×
NEW
1715
            ", expected 0 but got " + std::to_string(entry.stream_size));
×
1716
    }
1717
}
32✔
1718

1719
void check_non_unallocated_entry(
168✔
1720
    const compound_document_entry &entry,
1721
    directory_id id,
1722
    sector_id directory_sector)
1723
{
1724
    if (entry.type == compound_document_entry::entry_type::Unallocated)
168!
1725
    {
NEW
1726
        throw xlnt::invalid_parameter("invalid entry type " +
×
NEW
1727
            entry.format_info(id, directory_sector, false) +
×
NEW
1728
            ", expected different than 0 (Unallocated) but got 0 (Unallocated)");
×
1729
    }
1730

1731
    // First check the length, as we'll need this for the string itself.
1732
    // Directory Entry Name Length (2 bytes): This field MUST match the length of the Directory Entry
1733
    // Name Unicode string in bytes. The length MUST be a multiple of 2 and include the terminating null
1734
    // character in the count. This length MUST NOT exceed 64, the maximum size of the Directory Entry
1735
    // Name field.
1736
    if (entry.directory_entry_name_length < 2 || entry.directory_entry_name_length > 64)
168!
1737
    {
NEW
1738
        throw xlnt::invalid_file("invalid entry name length " +
×
NEW
1739
            entry.format_info(id, directory_sector, false) +
×
NEW
1740
            ", expected >= 2 and <= 64, but got " + std::to_string(entry.directory_entry_name_length));
×
1741
    }
1742
    else if (entry.directory_entry_name_length % 2 != 0)
168!
1743
    {
NEW
1744
        throw xlnt::invalid_file("invalid entry name length " +
×
NEW
1745
            entry.format_info(id, directory_sector, false) +
×
NEW
1746
            ", which must be a multiple of 2, but got " + std::to_string(entry.directory_entry_name_length));
×
1747
    }
1748

1749
    // Directory Entry Name (64 bytes): This field MUST contain a Unicode string for the storage or
1750
    // stream name encoded in UTF-16. The name MUST be terminated with a UTF-16 terminating null
1751
    // character. Thus, storage and stream names are limited to 32 UTF-16 code points, including the
1752
    // terminating null character. When locating an object in the compound file except for the root
1753
    // storage, the directory entry name is compared by using a special case-insensitive uppercase
1754
    // mapping, described in Red-Black Tree. The following characters are illegal and MUST NOT be part
1755
    // of the name: '/', '\', ':', '!'.
1756
    std::uint16_t name_length_characters = (entry.directory_entry_name_length / 2) - 1; // does NOT include \0 at the end
168✔
1757
    if (entry.directory_entry_name.at(name_length_characters) != u'\0')
168!
1758
    {
NEW
1759
        std::string exception_str = "invalid directory entry name " +
×
NEW
1760
            entry.format_info(id, directory_sector, false) +
×
NEW
1761
            ", which must be terminated with \\0 but is terminated with " +
×
NEW
1762
            fmt::format("0x{:04X}", static_cast<std::uint16_t>(entry.directory_entry_name.at(name_length_characters))) +
×
NEW
1763
            "\nString has a length of " + std::to_string(name_length_characters) + " characters (" +
×
NEW
1764
            std::to_string(entry.directory_entry_name_length) + " bytes including \\0). Full buffer contents:\n";
×
NEW
1765
        for (char16_t val : entry.directory_entry_name)
×
1766
        {
NEW
1767
            exception_str += fmt::format("{:04x} ", static_cast<std::uint16_t>(val));
×
1768
        }
1769

NEW
1770
        throw xlnt::invalid_file(exception_str);
×
NEW
1771
    }
×
1772

1773
    for (std::uint16_t n = 0; n < name_length_characters; ++n)
2,471✔
1774
    {
1775
        char16_t curr = entry.directory_entry_name.at(n);
2,303✔
1776
        if (curr == u'/' || curr == u'\\' || curr == u':' || curr == u'!')
2,303!
1777
        {
NEW
1778
            throw xlnt::invalid_file("invalid directory entry name " + entry.format_info(id, directory_sector, true) +
×
NEW
1779
                ", which contains invalid character " +
×
NEW
1780
                fmt::format("0x{:04X}", static_cast<std::uint16_t>(curr)) + " at position " + std::to_string(n));
×
1781
        }
1782
    }
1783

1784
    // Object Type (1 byte): This field MUST be 0x00, 0x01, 0x02, or 0x05, depending on the actual type
1785
    // of object. All other values are not valid.
1786
    // --------------------------------
1787
    // NOTE: the Unallocated type is handled in check_unallocated_entry().
1788
    if (entry.type != compound_document_entry::entry_type::Storage &&
168✔
1789
        entry.type != compound_document_entry::entry_type::Stream &&
132✔
1790
        entry.type != compound_document_entry::entry_type::RootStorage)
32!
1791
    {
NEW
1792
        throw xlnt::invalid_file("invalid entry object type " + entry.format_info(id, directory_sector, true) +
×
NEW
1793
            ", expected 0 (Unallocated), 1 (Storage), 2 (Stream) or 5 (RootStorage) but got " + std::to_string(static_cast<int>(entry.type)));
×
1794
    }
1795

1796
    // Color Flag (1 byte): This field MUST be 0x00 (red) or 0x01 (black). All other values are not valid.
1797
    if (entry.color != compound_document_entry::entry_color::Red && entry.color != compound_document_entry::entry_color::Black)
168!
1798
    {
NEW
1799
        throw xlnt::invalid_file("invalid entry color " + entry.format_info(id, directory_sector, true) +
×
NEW
1800
            ", expected 0 (Red) or 1 (Black), but got " + std::to_string(static_cast<int>(entry.color)));
×
1801
    }
1802

1803
    // CLSID (16 bytes): This field contains an object class GUID, if this entry is for a storage object or
1804
    // root storage object. For a stream object, this field MUST be set to all zeroes. A value containing all
1805
    // zeroes in a storage or root storage directory entry is valid, and indicates that no object class is
1806
    // associated with the storage. If an implementation of the file format enables applications to create
1807
    // storage objects without explicitly setting an object class GUID, it MUST write all zeroes by default.
1808
    // If this value is not all zeroes, the object class GUID can be used as a parameter to start
1809
    // applications.
1810
    if (entry.type == compound_document_entry::entry_type::Stream &&
268!
1811
        std::any_of(entry.clsid.begin(), entry.clsid.end(), [](std::uint8_t i) { return i != 0; }))
1,700!
1812
    {
NEW
1813
        std::string exception_str = "invalid entry CLSID " + entry.format_info(id, directory_sector, true) +
×
NEW
1814
            " for type 2 (Stream), expected all zeros but got: ";
×
NEW
1815
        for (std::uint8_t val : entry.clsid)
×
1816
        {
NEW
1817
            exception_str += fmt::format("{:02x} ", val);
×
1818
        }
NEW
1819
        throw xlnt::invalid_file(exception_str);
×
NEW
1820
    }
×
1821

1822
    // Creation Time (8 bytes): This field contains the creation time for a storage object, or all zeroes to
1823
    // indicate that the creation time of the storage object was not recorded. The Windows FILETIME
1824
    // structure is used to represent this field in UTC. For a stream object, this field MUST be all zeroes.
1825
    // For a root storage object, this field MUST be all zeroes, and the creation time is retrieved or set on
1826
    // the compound file itself.
1827
    // --------------------------------
1828
    // NOTE: unfortunately cannot be enforced, as some files:
1829
    // - have a root entry with timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC
1830
    // - have a stream with an actual timestamp
1831
    /*if ((entry.type == compound_document_entry::entry_type::Stream ||
1832
        entry.type == compound_document_entry::entry_type::RootStorage) &&
1833
        entry.creation_time != 0)
1834
    {
1835
        throw xlnt::invalid_file("invalid entry creation time " + entry.format_info(id, directory_sector, true) +
1836
            " for type " + std::to_string(static_cast<int>(entry.type)) +
1837
            ", expected 0 but got " + std::to_string(entry.creation_time));
1838
    }*/
1839

1840
    // Modified Time (8 bytes): This field contains the modification time for a storage object, or all
1841
    // zeroes to indicate that the modified time of the storage object was not recorded. The Windows
1842
    // FILETIME structure is used to represent this field in UTC. For a stream object, this field MUST be
1843
    // all zeroes. For a root storage object, this field MAY<2> be set to all zeroes, and the modified time
1844
    // is retrieved or set on the compound file itself.
1845
    // --------------------------------
1846
    // NOTE: unfortunately cannot be enforced, as some files have a stream with an actual timestamp.
1847
    /*if (entry.type == compound_document_entry::entry_type::Stream &&
1848
        entry.modified_time != 0)
1849
    {
1850
        throw xlnt::invalid_file("invalid entry modification time " + entry.format_info(id, directory_sector, true) +
1851
            " for type 2 (Stream), expected 0 but got " + std::to_string(entry.modified_time));
1852
    }*/
1853

1854
    // Starting Sector Location (4 bytes): This field contains the first sector location if this is a stream
1855
    // object. For a root storage object, this field MUST contain the first sector of the mini stream, if the
1856
    // mini stream exists. For a storage object, this field MUST be set to all zeroes.
1857
    // --------------------------------
1858
    // It seems that some immplementations initialize it with FREESECT,
1859
    // which is honestly not wrong either. So let's accept that.
1860
    if (entry.type == compound_document_entry::entry_type::Storage &&
168✔
1861
        !(entry.start_sector == 0 || entry.start_sector == FREESECT))
36!
1862
    {
NEW
1863
        throw xlnt::invalid_file("invalid entry start sector location " + entry.format_info(id, directory_sector, true) +
×
NEW
1864
            " for type 1 (Storage), expected 0 or FREESECT (0xFFFFFFFF), but got " + format_hex(entry.start_sector));
×
1865
    }
1866

1867
    // Stream Size (8 bytes): This 64-bit integer field contains the size of the user-defined data if this is
1868
    // a stream object. For a root storage object, this field contains the size of the mini stream. For a
1869
    // storage object, this field MUST be set to all zeroes.
1870
    if (entry.type == compound_document_entry::entry_type::Storage &&
168✔
1871
        entry.stream_size != 0)
36!
1872
    {
NEW
1873
        throw xlnt::invalid_file("invalid entry stream size " + entry.format_info(id, directory_sector, true) +
×
NEW
1874
            " for type 1 (Storage), expected 0 but got " + std::to_string(entry.stream_size));
×
1875
    }
1876
}
168✔
1877

1878
void compound_document::read_entry(directory_id id)
200✔
1879
{
1880
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
200✔
1881
    const sector_id directory_sector = chain_sector_at_index(header_.directory_start_sector, FAT_, id / entries_per_sector);
200✔
1882
    const std::uint64_t offset = sector_size() * directory_sector + ((id % entries_per_sector) * COMPOUND_DOCUMENT_ENTRY_SIZE);
200✔
1883

1884
    compound_document_entry &entry = entries_.at(id);
200✔
1885
    auto seek_pos = static_cast<std::streamoff>(sector_data_start() + offset);
200✔
1886

1887
    // Exception handling could provide useful information about why errors have occurred.
1888
    auto previous_exception_mask = in_->exceptions();
200✔
1889
    in_->exceptions(std::istream::failbit | std::istream::badbit);
200✔
1890

1891
    try
1892
    {
1893
        in_->seekg(seek_pos, std::ios::beg);
200✔
1894
    }
NEW
1895
    catch (const std::exception &ex)
×
1896
    {
NEW
1897
        throw xlnt::invalid_file("Failed reading compound document entry " + std::to_string(id) +
×
NEW
1898
            ": failed seeking to position" + std::to_string(seek_pos) + ". Reason: " + ex.what());
×
NEW
1899
    }
×
1900

1901
    try
1902
    {
1903
        // Read the fields manually due to struct padding (larger sizeof than 128 bytes).
1904
        in_->read(reinterpret_cast<char *>(entry.directory_entry_name.data()), sizeof(entry.directory_entry_name));
400✔
1905
        in_->read(reinterpret_cast<char *>(&entry.directory_entry_name_length), sizeof(entry.directory_entry_name_length));
200✔
1906
        in_->read(reinterpret_cast<char *>(&entry.type), sizeof(entry.type));
200✔
1907
        in_->read(reinterpret_cast<char *>(&entry.color), sizeof(entry.color));
200✔
1908
        in_->read(reinterpret_cast<char *>(&entry.left_sibling), sizeof(entry.left_sibling));
200✔
1909
        in_->read(reinterpret_cast<char *>(&entry.right_sibling), sizeof(entry.right_sibling));
200✔
1910
        in_->read(reinterpret_cast<char *>(&entry.child), sizeof(entry.child));
200✔
1911
        in_->read(reinterpret_cast<char *>(entry.clsid.data()), sizeof(entry.clsid));
400✔
1912
        in_->read(reinterpret_cast<char *>(&entry.state_bits), sizeof(entry.state_bits));
200✔
1913
        in_->read(reinterpret_cast<char *>(&entry.creation_time), sizeof(entry.creation_time));
200✔
1914
        in_->read(reinterpret_cast<char *>(&entry.modified_time), sizeof(entry.modified_time));
200✔
1915
        in_->read(reinterpret_cast<char *>(&entry.start_sector), sizeof(entry.start_sector));
200✔
1916
        in_->read(reinterpret_cast<char *>(&entry.stream_size), sizeof(entry.stream_size));
200✔
1917
    }
NEW
1918
    catch (const std::exception &ex)
×
1919
    {
NEW
1920
        throw xlnt::invalid_file("Failed reading compound document entry " + std::to_string(id) +
×
NEW
1921
            ": failed reading entry fields. Reason: " + ex.what());
×
NEW
1922
    }
×
1923

1924
    in_->exceptions(previous_exception_mask);
200✔
1925

1926
    // Stream Size (8 bytes): ... (see below for the rest)
1927
    // - For a version 3 compound file 512-byte sector size, the value of this field MUST be less than
1928
    //   or equal to 0x80000000. (Equivalently, this requirement can be stated: the size of a stream or
1929
    //   of the mini stream in a version 3 compound file MUST be less than or equal to 2 gigabytes
1930
    //   (GB).) Note that as a consequence of this requirement, the most significant 32 bits of this field
1931
    //   MUST be zero in a version 3 compound file. However, implementers should be aware that
1932
    //   some older implementations did not initialize the most significant 32 bits of this field, and
1933
    //   these bits might therefore be nonzero in files that are otherwise valid version 3 compound
1934
    //   files. Although this document does not normatively specify parser behavior, it is recommended
1935
    //   that parsers ignore the most significant 32 bits of this field in version 3 compound files,
1936
    //   treating it as if its value were zero, unless there is a specific reason to do otherwise (for
1937
    //   example, a parser whose purpose is to verify the correctness of a compound file).
1938
    if (header_.major_version == 3 && entry.stream_size > 0x80000000)
200!
1939
    {
1940
        // Note: the only allowed byte order is little-endian.
NEW
1941
        entry.stream_size = entry.stream_size & 0x0000FFFF;
×
1942
    }
1943

1944
    if (entry.type == compound_document_entry::entry_type::Unallocated)
200✔
1945
    {
1946
        check_unallocated_entry(entry, id, directory_sector);
32✔
1947
    }
1948
    else
1949
    {
1950
        check_non_unallocated_entry(entry, id, directory_sector);
168✔
1951
    }
1952
}
200✔
1953

1954
void compound_document::write_header()
15✔
1955
{
1956
    out_->seekp(0, std::ios::beg);
15✔
1957
    out_->write(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
15✔
1958
}
15✔
1959

1960
void compound_document::write_DIFAT()
11✔
1961
{
1962
    sector_id DIFAT_sector = header_.DIFAT_start_sector;
11✔
1963

1964
    for (std::uint32_t i = 0u; i < header_.num_FAT_sectors; ++i)
39✔
1965
    {
1966
        if (i < 109u)
28!
1967
        {
1968
            header_.DIFAT.at(i) = DIFAT_.at(i);
28✔
1969
        }
1970
        else
1971
        {
NEW
1972
            std::vector<sector_id> sector;
×
NEW
1973
            binary_writer<sector_id> sector_writer(sector);
×
1974

NEW
1975
            read_sector(DIFAT_sector, sector_writer);
×
1976

NEW
1977
            DIFAT_sector = last_elem(sector);
×
1978
            sector.pop_back();
×
1979

NEW
1980
            std::copy(sector.begin(), sector.end(), std::back_inserter(DIFAT_));
×
1981
        }
×
1982
    }
1983
}
11✔
1984

1985
void compound_document::write_FAT()
1,988✔
1986
{
1987
    binary_reader<sector_id> sector_reader(FAT_);
1,988✔
1988

1989
    for (sector_id FAT_sector : DIFAT_)
10,360✔
1990
    {
1991
        write_sector(sector_reader, FAT_sector);
8,372✔
1992
    }
1993
}
1,988✔
1994

NEW
1995
void compound_document::write_mini_FAT()
×
1996
{
NEW
1997
    binary_reader<sector_id> sector_reader(mini_FAT_);
×
1998

NEW
1999
    for (sector_id mini_FAT_sector : follow_chain(header_.mini_FAT_start_sector, FAT_))
×
2000
    {
NEW
2001
        write_sector(sector_reader, mini_FAT_sector);
×
UNCOV
2002
    }
×
UNCOV
2003
}
×
2004

2005
void compound_document::write_entry(directory_id id)
4,008✔
2006
{
2007
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
4,008✔
2008
    const sector_id directory_sector = chain_sector_at_index(header_.directory_start_sector, FAT_, id / entries_per_sector);
4,008✔
2009
    const std::uint64_t offset = sector_data_start() + sector_size() * directory_sector
4,008✔
2010
        + ((id % entries_per_sector) * COMPOUND_DOCUMENT_ENTRY_SIZE);
4,008✔
2011

2012
    out_->seekp(static_cast<std::streamoff>(offset), std::ios::beg);
4,008✔
2013
    const compound_document_entry &entry = entries_.at(id);
4,008✔
2014
    // Write the fields manually due to struct padding (larger sizeof than 128 bytes).
2015
    out_->write(reinterpret_cast<const char *>(entry.directory_entry_name.data()), sizeof(entry.directory_entry_name));
4,008✔
2016
    out_->write(reinterpret_cast<const char *>(&entry.directory_entry_name_length), sizeof(entry.directory_entry_name_length));
4,008✔
2017
    out_->write(reinterpret_cast<const char *>(&entry.type), sizeof(entry.type));
4,008✔
2018
    out_->write(reinterpret_cast<const char *>(&entry.color), sizeof(entry.color));
4,008✔
2019
    out_->write(reinterpret_cast<const char *>(&entry.left_sibling), sizeof(entry.left_sibling));
4,008✔
2020
    out_->write(reinterpret_cast<const char *>(&entry.right_sibling), sizeof(entry.right_sibling));
4,008✔
2021
    out_->write(reinterpret_cast<const char *>(&entry.child), sizeof(entry.child));
4,008✔
2022
    out_->write(reinterpret_cast<const char *>(entry.clsid.data()), sizeof(entry.clsid));
4,008✔
2023
    out_->write(reinterpret_cast<const char *>(&entry.state_bits), sizeof(entry.state_bits));
4,008✔
2024
    out_->write(reinterpret_cast<const char *>(&entry.creation_time), sizeof(entry.creation_time));
4,008✔
2025
    out_->write(reinterpret_cast<const char *>(&entry.modified_time), sizeof(entry.modified_time));
4,008✔
2026
    out_->write(reinterpret_cast<const char *>(&entry.start_sector), sizeof(entry.start_sector));
4,008✔
2027
    out_->write(reinterpret_cast<const char *>(&entry.stream_size), sizeof(entry.stream_size));
4,008✔
2028
}
4,008✔
2029

2030
} // namespace detail
2031
} // namespace xlnt
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc