• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

xlnt-community / xlnt / c94c0fe5-6585-40c8-aa61-1f224e0e3e60

22 Feb 2026 07:38PM UTC coverage: 82.899% (-1.1%) from 83.961%
c94c0fe5-6585-40c8-aa61-1f224e0e3e60

Pull #147

circleci

doomlaur
Improved handling of compound document entries for start sectors. Fixed endianness values.
Pull Request #147: Compound document improvements

15383 of 20393 branches covered (75.43%)

274 of 485 new or added lines in 4 files covered. (56.49%)

34 existing lines in 2 files now uncovered.

12531 of 15116 relevant lines covered (82.9%)

12155.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.93
./source/detail/cryptography/compound_document.cpp
1
// Copyright (C) 2016-2022 Thomas Fussell
2
// Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
3
// Copyright (c) 2024-2026 xlnt-community
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions
7
// are met:
8
//
9
// 1. Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
// 2. Redistributions in binary form must reproduce the above copyright
12
// notice, this list of conditions and the following disclaimer in the
13
// documentation and/or other materials provided with the distribution.
14
//
15
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25

26
#include <algorithm>
27
#include <array>
28
#include <cstring>
29
#include <iostream>
30
#include <locale>
31
#include <string>
32
#include <vector>
33

34
#include <xlnt/utils/exceptions.hpp>
35
#include <detail/binary.hpp>
36
#include <detail/cryptography/compound_document.hpp>
37
#include <detail/unicode.hpp>
38

39
#define FMT_HEADER_ONLY
40
#include <fmt/format.h>
41

42
// NOTE: compound files are not part of the OOXML specification (ECMA-376).
43
// This implementation is based on the "[MS-CFB]: Compound File Binary File Format" specification.
44
namespace {
45

46
using namespace xlnt::detail;
47

48
template <typename T>
NEW
49
std::string format_hex(T value)
×
50
{
51
    // Format example: 0x0000660F
NEW
52
    return fmt::format("0x{:08X}", value);
×
53
}
54

55
int compare_keys(const std::string &left, const std::string &right)
8✔
56
{
57
    auto to_lower = [](std::string s) {
16✔
58
        if (s.empty())
16!
59
        {
NEW
60
            return s;
×
61
        }
62
        else
63
        {
64
            static const std::locale locale;
16!
65
            std::use_facet<std::ctype<char>>(locale).tolower(&s[0], &s[0] + s.size());
16✔
66

67
            return s;
16✔
68
        }
69
    };
70

71
    return to_lower(left).compare(to_lower(right));
8✔
72
}
73

74
std::vector<std::string> split_path(const std::string &path)
12✔
75
{
76
    std::vector<std::string> split = std::vector<std::string>();
12✔
77
    std::size_t current = path.find('/');
12✔
78
    std::size_t prev = 0;
12✔
79

80
    while (current != std::string::npos)
24✔
81
    {
82
        split.emplace_back(path.substr(prev, current - prev));
12✔
83
        prev = current + 1;
12✔
84
        current = path.find('/', prev);
12✔
85
    }
86

87
    split.emplace_back(path.substr(prev));
12✔
88

89
    return split;
12✔
90
}
×
91

92
std::string join_path(const std::vector<std::string> &path)
244✔
93
{
94
    std::string joined;
244✔
95

96
    for (const std::string &part : path)
328✔
97
    {
98
        joined.append(part);
84✔
99
        joined.push_back('/');
84✔
100
    }
101

102
    return joined;
244✔
103
}
×
104

105
template <typename T>
106
const T & last_elem(const std::vector<T> &vec)
107
{
108
    return vec.at(vec.size() - 1);
109
}
110

111
template <typename T>
112
T & last_elem(std::vector<T> &vec)
986✔
113
{
114
    return vec.at(vec.size() - 1);
986✔
115
}
116

117
} // namespace
118

119
namespace xlnt {
120
namespace detail {
121

NEW
122
bool is_chain_end(sector_id sector)
×
123
{
NEW
124
    expect_valid_sector_or_chain_end(sector);
×
NEW
125
    return sector == ENDOFCHAIN;
×
126
}
127

128
bool is_invalid_sector(sector_id sector)
248,710✔
129
{
130
    expect_valid_sector_or_chain_end_or_free(sector);
248,710✔
131
    return sector == ENDOFCHAIN || sector == FREESECT;
248,710!
132
}
133

134
bool has_invalid_start_sector(const compound_document_entry &entry)
1,005✔
135
{
136
    // Empty entries must use start sector 0, which is however invalid in this case.
137
    if (entry.type == compound_document_entry::entry_type::Empty && entry.start == 0)
1,005!
138
    {
NEW
139
        return true;
×
140
    }
141
    else
142
    {
143
        return is_invalid_sector(entry.start);
1,005✔
144
    }
145
}
146

147
bool is_invalid_entry(directory_id entry)
432✔
148
{
149
    expect_valid_entry_or_no_stream(entry);
432✔
150
    return entry == NOSTREAM;
432✔
151
}
152

NEW
153
void expect_valid_sector_or_chain_end(sector_id sector)
×
154
{
NEW
155
    if (sector > MAXREGSECT && sector != ENDOFCHAIN)
×
156
    {
157
        throw xlnt::invalid_parameter("expected valid sector (<= MAXREGSECT, which means <= 0xFFFFFFFA) or ENDOFCHAIN (0xFFFFFFFE)"
NEW
158
            ", but got " + format_hex(sector));
×
159
    }
NEW
160
}
×
161

162
void expect_valid_sector_or_chain_end_or_free(sector_id sector)
248,710✔
163
{
164
    if (sector > MAXREGSECT && sector != ENDOFCHAIN && sector != FREESECT)
248,710!
165
    {
166
        throw xlnt::invalid_parameter("expected valid sector (<= MAXREGSECT, which means <= 0xFFFFFFFA),"
NEW
167
            " or ENDOFCHAIN (0xFFFFFFFE), or FREESECT (0xFFFFFFFF), but got " + format_hex(sector));
×
168
    }
169
}
248,710✔
170

171
void expect_valid_entry_or_no_stream(directory_id entry)
432✔
172
{
173
    if (entry > MAXREGSID && entry != NOSTREAM)
432!
174
    {
175
        throw xlnt::invalid_parameter("expected valid entry (<= MAXREGSID, which means <= 0xFFFFFFFA) or NOSTREAM (0xFFFFFFFF)"
NEW
176
            ", but got " + format_hex(entry));
×
177
    }
178
}
432✔
179

180
/// <summary>
181
/// Allows a std::vector to be read through a std::istream.
182
/// </summary>
183
class compound_document_istreambuf : public std::streambuf
184
{
185
    using int_type = std::streambuf::int_type;
186

187
public:
188
    compound_document_istreambuf(const compound_document_entry &entry, compound_document &document)
64✔
189
        : entry_(entry),
128✔
190
          document_(document),
64✔
191
          sector_writer_(current_sector_)
64✔
192
    {
193
    }
64✔
194

195
    compound_document_istreambuf(const compound_document_istreambuf &) = delete;
196
    compound_document_istreambuf &operator=(const compound_document_istreambuf &) = delete;
197

198
    ~compound_document_istreambuf() override = default;
128✔
199

200
private:
201
    std::streamsize xsgetn(char *c, std::streamsize count) override
779✔
202
    {
203
        std::streamsize bytes_read = 0;
779✔
204

205
        const sector_chain &sec_chain = short_stream() ? document_.ssat_ : document_.sat_;
779✔
206
        const sector_chain chain = document_.follow_chain(entry_, sec_chain);
779✔
207
        const std::uint64_t sector_size = short_stream() ? document_.short_sector_size() : document_.sector_size();
779✔
208
        sector_id current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
779✔
209
        std::uint64_t remaining = std::min(entry_.size - position_, static_cast<std::uint64_t>(count));
779✔
210

211
        while (remaining)
3,781✔
212
        {
213
            if (current_sector_.empty() || chain.at(static_cast<std::size_t>(position_ / sector_size)) != current_sector)
3,002✔
214
            {
215
                current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
2,272✔
216
                sector_writer_.reset();
2,272✔
217
                if (short_stream())
2,272✔
218
                {
219
                    document_.read_short_sector(current_sector, sector_writer_);
192✔
220
                }
221
                else
222
                {
223
                    document_.read_sector(current_sector, sector_writer_);
2,080✔
224
                }
225
            }
226

227
            const std::uint64_t available = std::min(entry_.size - position_, sector_size - position_ % sector_size);
3,002✔
228
            const std::uint64_t to_read = std::min(available, remaining);
3,002✔
229

230
            auto start = current_sector_.begin() + static_cast<std::ptrdiff_t>(position_ % sector_size);
3,002✔
231
            auto end = start + static_cast<std::ptrdiff_t>(to_read);
3,002✔
232

233
            for (auto i = start; i < end; ++i)
1,074,752✔
234
            {
235
                *(c++) = static_cast<char>(*i);
1,071,750✔
236
            }
237

238
            remaining -= to_read;
3,002✔
239
            position_ += to_read;
3,002✔
240
            bytes_read += to_read;
3,002✔
241
        }
242

243
        if (position_ < entry_.size && chain.at(static_cast<std::size_t>(position_ / sector_size)) != current_sector)
779✔
244
        {
245
            current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
26✔
246
            sector_writer_.reset();
26✔
247
            if (short_stream())
26!
248
            {
249
                document_.read_short_sector(current_sector, sector_writer_);
26✔
250
            }
251
            else
252
            {
253
                document_.read_sector(current_sector, sector_writer_);
×
254
            }
255
        }
256

257
        return bytes_read;
779✔
258
    }
779✔
259

260
    bool short_stream()
3,856✔
261
    {
262
        return entry_.size < document_.header_.threshold;
3,856✔
263
    }
264

265
    int_type underflow() override
×
266
    {
267
        if (position_ >= entry_.size)
×
268
        {
269
            return traits_type::eof();
×
270
        }
271

NEW
272
        std::uint64_t old_position = position_;
×
NEW
273
        char result = '\0';
×
274
        xsgetn(&result, 1);
×
275
        position_ = old_position;
×
276

277
        return result;
×
278
    }
279

280
    int_type uflow() override
×
281
    {
NEW
282
        int_type result = underflow();
×
283
        ++position_;
×
284

285
        return result;
×
286
    }
287

288
    std::streamsize showmanyc() override
×
289
    {
290
        if (position_ == entry_.size)
×
291
        {
292
            return static_cast<std::streamsize>(-1);
×
293
        }
294

295
        return static_cast<std::streamsize>(entry_.size - position_);
×
296
    }
297

298
    std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
52✔
299
    {
300
        if (way == std::ios_base::beg)
52!
301
        {
302
            position_ = 0;
×
303
        }
304
        else if (way == std::ios_base::end)
52!
305
        {
306
            position_ = entry_.size;
×
307
        }
308

309
        if (off < 0)
52!
310
        {
NEW
311
            if (static_cast<std::uint64_t>(-off) > position_)
×
312
            {
313
                position_ = 0;
×
NEW
314
                return static_cast<std::streamoff>(-1);
×
315
            }
316
            else
317
            {
NEW
318
                position_ -= static_cast<std::uint64_t>(-off);
×
319
            }
320
        }
321
        else if (off > 0)
52!
322
        {
NEW
323
            if (static_cast<std::uint64_t>(off) + position_ > entry_.size)
×
324
            {
325
                position_ = entry_.size;
×
NEW
326
                return static_cast<std::streamoff>(-1);
×
327
            }
328
            else
329
            {
NEW
330
                position_ += static_cast<std::uint64_t>(off);
×
331
            }
332
        }
333

334
        return static_cast<std::streamoff>(position_);
52✔
335
    }
336

337
    std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
×
338
    {
339
        if (sp < 0)
×
340
        {
341
            position_ = 0;
×
342
        }
NEW
343
        else if (static_cast<std::uint64_t>(sp) > entry_.size)
×
344
        {
345
            position_ = entry_.size;
×
346
        }
347
        else
348
        {
NEW
349
            position_ = static_cast<std::uint64_t>(sp);
×
350
        }
351

NEW
352
        return static_cast<std::streamoff>(position_);
×
353
    }
354

355
private:
356
    const compound_document_entry &entry_;
357
    compound_document &document_;
358
    std::vector<byte> current_sector_;
359
    binary_writer<byte> sector_writer_;
360
    std::uint64_t position_ = 0;
361
};
362

363
/// <summary>
364
/// Allows a std::vector to be written through a std::ostream.
365
/// </summary>
366
class compound_document_ostreambuf : public std::streambuf
367
{
368
    using int_type = std::streambuf::int_type;
369

370
public:
371
    compound_document_ostreambuf(compound_document_entry &entry, compound_document &document)
8✔
372
        : entry_(entry),
16✔
373
          document_(document),
8✔
374
          current_sector_(document.header_.threshold),
8✔
375
          sector_reader_(current_sector_)
16✔
376
    {
377
        setp(reinterpret_cast<char *>(current_sector_.data()),
8✔
378
            reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
8✔
379
    }
8✔
380

381
    compound_document_ostreambuf(const compound_document_ostreambuf &) = delete;
382
    compound_document_ostreambuf &operator=(const compound_document_ostreambuf &) = delete;
383

384
    ~compound_document_ostreambuf() override;
385

386
private:
387
    int sync() override
986✔
388
    {
389
        auto written = static_cast<std::uint64_t>(pptr() - pbase());
986✔
390

391
        if (written == 0)
986✔
392
        {
393
            return 0;
4✔
394
        }
395

396
        sector_reader_.reset();
982✔
397

398
        if (short_stream())
982✔
399
        {
400
            if (position_ + written >= document_.header_.threshold)
8✔
401
            {
402
                convert_to_long_stream();
4✔
403
            }
404
            else
405
            {
406
                if (has_invalid_start_sector(entry_))
4!
407
                {
408
                    std::size_t num_sectors = static_cast<std::size_t>(
NEW
409
                        (position_ + written + document_.short_sector_size() - 1) / document_.short_sector_size());
×
UNCOV
410
                    chain_ = document_.allocate_short_sectors(num_sectors);
×
NEW
411
                    entry_.start = chain_.at(0);
×
412
                }
413

414
                for (sector_id link : chain_)
4!
415
                {
UNCOV
416
                    document_.write_short_sector(sector_reader_, link);
×
UNCOV
417
                    sector_reader_.offset(sector_reader_.offset() + document_.short_sector_size());
×
418
                }
419
            }
420
        }
421
        else
422
        {
423
            const std::size_t sector_index = static_cast<std::size_t>(position_ / document_.sector_size());
974✔
424
            document_.write_sector(sector_reader_, chain_.at(sector_index));
974✔
425
        }
426

427
        position_ += written;
982✔
428
        entry_.size = std::max(entry_.size, position_);
982✔
429
        document_.write_directory();
982✔
430

431
        std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
982✔
432
        setp(reinterpret_cast<char *>(current_sector_.data()),
982✔
433
            reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
982✔
434

435
        return 0;
982✔
436
    }
437

438
    bool short_stream()
1,956✔
439
    {
440
        return entry_.size < document_.header_.threshold;
1,956✔
441
    }
442

443
    int_type overflow(int_type c = traits_type::eof()) override
974✔
444
    {
445
        sync();
974✔
446

447
        if (short_stream())
974!
448
        {
NEW
449
            sector_id next_sector = document_.allocate_short_sector();
×
NEW
450
            document_.ssat_.at(last_elem(chain_)) = next_sector;
×
451
            chain_.push_back(next_sector);
×
452
            document_.write_ssat();
×
453
        }
454
        else
455
        {
456
            sector_id next_sector = document_.allocate_sector();
974✔
457
            document_.sat_.at(last_elem(chain_)) = next_sector;
974✔
458
            chain_.push_back(next_sector);
974✔
459
            document_.write_sat();
974✔
460
        }
461

462
        auto value = static_cast<std::uint8_t>(c);
974✔
463

464
        if (c != traits_type::eof())
974!
465
        {
466
            std::size_t sector_index = static_cast<std::size_t>(position_ % current_sector_.size());
974✔
467
            current_sector_.at(sector_index) = value;
974✔
468
        }
469

470
        pbump(1);
974✔
471

472
        return traits_type::to_int_type(static_cast<char>(value));
974✔
473
    }
474

475
    void convert_to_long_stream()
4✔
476
    {
477
        sector_reader_.reset();
4✔
478

479
        std::size_t num_sectors = static_cast<std::size_t>(current_sector_.size() / document_.sector_size());
4✔
480
        sector_chain new_chain = document_.allocate_sectors(num_sectors);
4✔
481

482
        for (sector_id link : new_chain)
36✔
483
        {
484
            document_.write_sector(sector_reader_, link);
32✔
485
            sector_reader_.offset(sector_reader_.offset() + document_.short_sector_size());
32✔
486
        }
487

488
        current_sector_.resize(document_.sector_size(), 0);
4✔
489
        std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
4✔
490

491
        if (has_invalid_start_sector(entry_))
4!
492
        {
493
            // TODO: deallocate short sectors here
UNCOV
494
            if (document_.header_.num_short_sectors == 0)
×
495
            {
NEW
496
                document_.entries_.at(0).start = ENDOFCHAIN;
×
497
            }
498
        }
499

500
        chain_ = new_chain;
4✔
501
        entry_.start = chain_.at(0);
4✔
502
        document_.write_directory();
4✔
503
    }
4✔
504

505
    std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
×
506
    {
507
        if (way == std::ios_base::beg)
×
508
        {
509
            position_ = 0;
×
510
        }
511
        else if (way == std::ios_base::end)
×
512
        {
513
            position_ = entry_.size;
×
514
        }
515

516
        if (off < 0)
×
517
        {
NEW
518
            if (static_cast<std::uint64_t>(-off) > position_)
×
519
            {
520
                position_ = 0;
×
NEW
521
                return static_cast<std::streamoff>(-1);
×
522
            }
523
            else
524
            {
NEW
525
                position_ -= static_cast<std::uint64_t>(-off);
×
526
            }
527
        }
528
        else if (off > 0)
×
529
        {
NEW
530
            if (static_cast<std::uint64_t>(off) + position_ > entry_.size)
×
531
            {
532
                position_ = entry_.size;
×
NEW
533
                return static_cast<std::streamoff>(-1);
×
534
            }
535
            else
536
            {
NEW
537
                position_ += static_cast<std::uint64_t>(off);
×
538
            }
539
        }
540

NEW
541
        return static_cast<std::streamoff>(position_);
×
542
    }
543

544
    std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
×
545
    {
546
        if (sp < 0)
×
547
        {
548
            position_ = 0;
×
549
        }
NEW
550
        else if (static_cast<std::uint64_t>(sp) > entry_.size)
×
551
        {
552
            position_ = entry_.size;
×
553
        }
554
        else
555
        {
NEW
556
            position_ = static_cast<std::uint64_t>(sp);
×
557
        }
558

NEW
559
        return static_cast<std::streamoff>(position_);
×
560
    }
561

562
private:
563
    compound_document_entry &entry_;
564
    compound_document &document_;
565
    std::vector<byte> current_sector_;
566
    binary_reader<byte> sector_reader_;
567
    std::uint64_t position_ = 0;
568
    sector_chain chain_;
569
};
570

571
compound_document_ostreambuf::~compound_document_ostreambuf()
16✔
572
{
573
    sync();
8✔
574
}
16✔
575

576
compound_document::compound_document(std::ostream &out)
4✔
577
    : out_(&out),
4✔
578
      stream_in_(nullptr),
4✔
579
      stream_out_(nullptr)
8✔
580
{
581
    write_header();
4✔
582
    insert_entry("/Root Entry", compound_document_entry::entry_type::RootStorage);
4✔
583
}
4✔
584

585
compound_document::compound_document(std::istream &in)
32✔
586
    : in_(&in),
32✔
587
      stream_in_(nullptr),
32✔
588
      stream_out_(nullptr)
64✔
589
{
590
    read_header();
32✔
591
    read_msat();
32✔
592
    read_sat();
32✔
593
    read_ssat();
32✔
594
    read_directory();
32✔
595
}
32✔
596

597
compound_document::~compound_document()
36✔
598
{
599
    close();
36✔
600
}
36✔
601

602
void compound_document::close()
36✔
603
{
604
    stream_out_buffer_.reset(nullptr);
36✔
605
}
36✔
606

607
std::uint64_t compound_document::sector_size()
40,768✔
608
{
609
    return static_cast<std::uint64_t>(1) << header_.sector_size_power;
40,768✔
610
}
611

612
std::uint64_t compound_document::short_sector_size()
960✔
613
{
614
    return static_cast<std::uint64_t>(1) << header_.short_sector_size_power;
960✔
615
}
616

617
std::istream &compound_document::open_read_stream(const std::string &name)
64✔
618
{
619
    if (!contains_entry(name, compound_document_entry::entry_type::UserStream))
64!
620
    {
NEW
621
        throw xlnt::invalid_file("compound document entry of type UserStream not found at path: " + name);
×
622
    }
623

624
    const directory_id entry_id = find_entry(name, compound_document_entry::entry_type::UserStream);
64✔
625
    const compound_document_entry &entry = entries_.at(entry_id);
64✔
626

627
    stream_in_buffer_.reset(new compound_document_istreambuf(entry, *this));
64!
628
    stream_in_.rdbuf(stream_in_buffer_.get());
64✔
629

630
    return stream_in_;
64✔
631
}
632

633
std::ostream &compound_document::open_write_stream(const std::string &name)
8✔
634
{
635
    directory_id entry_id = contains_entry(name, compound_document_entry::entry_type::UserStream)
8✔
636
        ? find_entry(name, compound_document_entry::entry_type::UserStream)
8!
637
        : insert_entry(name, compound_document_entry::entry_type::UserStream);
8✔
638
    compound_document_entry &entry = entries_.at(entry_id);
8✔
639

640
    stream_out_buffer_.reset(new compound_document_ostreambuf(entry, *this));
8!
641
    stream_out_.rdbuf(stream_out_buffer_.get());
8✔
642

643
    return stream_out_;
8✔
644
}
645

646
template <typename T>
647
void compound_document::write_sector(binary_reader<T> &reader, sector_id id)
10,399✔
648
{
649
    out_->seekp(static_cast<std::streampos>(sector_data_start() + sector_size() * id));
10,399✔
650
    out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
10,399✔
651
        static_cast<std::streamsize>(std::min(sector_size(), static_cast<std::uint64_t>(reader.bytes() - reader.offset()))));
10,399✔
652
}
10,399✔
653

654
template <typename T>
UNCOV
655
void compound_document::write_short_sector(binary_reader<T> &reader, sector_id id)
×
656
{
NEW
657
    sector_chain chain = follow_chain(entries_.at(0), sat_);
×
NEW
658
    sector_id sector_id = chain.at(static_cast<std::size_t>(id / (sector_size() / short_sector_size())));
×
NEW
659
    std::uint64_t sector_offset = id % (sector_size() / short_sector_size()) * short_sector_size();
×
NEW
660
    out_->seekp(static_cast<std::streampos>(sector_data_start() + sector_size() * sector_id + sector_offset));
×
UNCOV
661
    out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
×
NEW
662
        static_cast<std::streamsize>(std::min(short_sector_size(), static_cast<std::uint64_t>(reader.bytes() - reader.offset()))));
×
UNCOV
663
}
×
664

665
template <typename T>
666
void compound_document::read_sector(sector_id id, binary_writer<T> &writer)
2,743✔
667
{
668
    in_->seekg(static_cast<std::streampos>(sector_data_start() + sector_size() * id));
2,743✔
669
    std::vector<byte> sector(sector_size(), 0);
2,743✔
670
    in_->read(reinterpret_cast<char *>(sector.data()), static_cast<std::streamsize>(sector_size()));
2,743✔
671
    writer.append(sector);
2,743✔
672
}
2,743✔
673

674
template <typename T>
675
void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer)
676
{
677
    for (sector_id link : follow_chain(start, sat_))
678
    {
679
        read_sector(link, writer);
680
    }
681
}
682

683
template <typename T>
684
void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
685
{
686
    sector_chain chain = follow_chain(start, sat_);
687

688
    for (std::size_t i = 0; i < count; ++i)
689
    {
690
        read_sector(chain.at(offset + i), writer);
691
    }
692
}
693

694
template <typename T>
695
void compound_document::read_short_sector(sector_id id, binary_writer<T> &writer)
218✔
696
{
697
    const sector_chain container_chain = follow_chain(entries_.at(0), sat_);
218✔
698
    std::vector<byte> container;
218✔
699
    binary_writer<byte> container_writer(container);
218✔
700

701
    for (sector_id sector : container_chain)
790✔
702
    {
703
        read_sector(sector, container_writer);
572✔
704
    }
705

706
    binary_reader<byte> container_reader(container);
218✔
707
    container_reader.offset(static_cast<std::size_t>(id * short_sector_size()));
218✔
708

709
    writer.append(container_reader, short_sector_size());
218✔
710
}
218✔
711

712
template <typename T>
713
void compound_document::read_short_sector_chain(sector_id start, binary_writer<T> &writer)
714
{
715
    for (sector_id link : follow_chain(start, ssat_))
716
    {
717
        read_short_sector(link, writer);
718
    }
719
}
720

721
template <typename T>
722
void compound_document::read_short_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
723
{
724
    sector_chain chain = follow_chain(start, ssat_);
725

726
    for (std::size_t i = 0; i < count; ++i)
727
    {
728
        read_short_sector(chain.at(offset + i), writer);
729
    }
730
}
731

732
sector_id compound_document::allocate_sector()
1,010✔
733
{
734
    const auto sectors_per_sector = static_cast<std::size_t>(sector_size() / sizeof(sector_id));
1,010✔
735
    auto next_free_iter = std::find(sat_.begin(), sat_.end(), FREESECT);
1,010✔
736

737
    if (next_free_iter == sat_.end())
1,010✔
738
    {
739
        std::uint32_t next_msat_index = header_.num_msat_sectors;
11✔
740
        auto new_sat_sector_id = static_cast<sector_id>(sat_.size());
11✔
741

742
        msat_.push_back(new_sat_sector_id);
11✔
743
        write_msat();
11✔
744

745
        header_.msat.at(msat_.size() - 1) = new_sat_sector_id;
11✔
746
        ++header_.num_msat_sectors;
11✔
747
        write_header();
11✔
748

749
        sat_.resize(sat_.size() + sectors_per_sector, FREESECT);
11✔
750
        sat_.at(new_sat_sector_id) = FATSECT;
11✔
751

752
        binary_reader<sector_id> sat_reader(sat_);
11✔
753
        sat_reader.offset(next_msat_index * sectors_per_sector);
11✔
754
        write_sector(sat_reader, new_sat_sector_id);
11✔
755

756
        next_free_iter = std::find(sat_.begin(), sat_.end(), FREESECT);
11✔
757
    }
758

759
    auto next_free = static_cast<sector_id>(next_free_iter - sat_.begin());
1,010✔
760
    sat_.at(next_free) = ENDOFCHAIN;
1,010✔
761

762
    write_sat();
1,010✔
763

764
    std::vector<byte> empty_sector(sector_size());
1,010✔
765
    binary_reader<byte> empty_sector_reader(empty_sector);
1,010✔
766
    write_sector(empty_sector_reader, next_free);
1,010✔
767

768
    return next_free;
1,010✔
769
}
1,010✔
770

771
sector_chain compound_document::allocate_sectors(std::size_t count)
4✔
772
{
773
    if (count == 0) return {};
4!
774

775
    sector_chain chain;
4✔
776
    chain.reserve(count);
4✔
777
    sector_id current = allocate_sector();
4✔
778

779
    for (std::size_t i = 1; i < count; ++i)
32✔
780
    {
781
        chain.push_back(current);
28✔
782
        sector_id next = allocate_sector();
28✔
783
        sat_.at(current) = next;
28✔
784
        current = next;
28✔
785
    }
786

787
    chain.push_back(current);
4✔
788
    write_sat();
4✔
789

790
    return chain;
4✔
791
}
4✔
792

793
sector_chain compound_document::follow_chain(sector_id start, const sector_chain &table)
5,269✔
794
{
795
    sector_chain chain;
5,269✔
796
    sector_id current = start;
5,269✔
797

798
    while (!is_invalid_sector(current))
247,701✔
799
    {
800
        chain.push_back(current);
242,432✔
801
        current = table.at(current);
242,432✔
802
    }
803

804
    return chain;
5,269✔
805
}
×
806

807
sector_chain compound_document::follow_chain(const compound_document_entry &entry, const sector_chain &table)
997✔
808
{
809
    if (has_invalid_start_sector(entry))
997!
810
    {
NEW
811
        return {};
×
812
    }
813
    else
814
    {
815
        return follow_chain(entry.start, table);
997✔
816
    }
817
}
818

UNCOV
819
sector_chain compound_document::allocate_short_sectors(std::size_t count)
×
820
{
NEW
821
    if (count == 0) return {};
×
822

NEW
823
    sector_chain chain;
×
NEW
824
    chain.reserve(count);
×
NEW
825
    sector_id current = allocate_short_sector();
×
826

NEW
827
    for (std::size_t i = 1; i < count; ++i)
×
828
    {
UNCOV
829
        chain.push_back(current);
×
NEW
830
        sector_id next = allocate_short_sector();
×
NEW
831
        ssat_.at(current) = next;
×
UNCOV
832
        current = next;
×
833
    }
834

UNCOV
835
    chain.push_back(current);
×
UNCOV
836
    write_ssat();
×
837

UNCOV
838
    return chain;
×
UNCOV
839
}
×
840

UNCOV
841
sector_id compound_document::allocate_short_sector()
×
842
{
NEW
843
    const auto sectors_per_sector = static_cast<std::size_t>(sector_size() / sizeof(sector_id));
×
NEW
844
    auto next_free_iter = std::find(ssat_.begin(), ssat_.end(), FREESECT);
×
845

UNCOV
846
    if (next_free_iter == ssat_.end())
×
847
    {
NEW
848
        sector_id new_ssat_sector_id = allocate_sector();
×
849

NEW
850
        if (is_invalid_sector(header_.ssat_start))
×
851
        {
UNCOV
852
            header_.ssat_start = new_ssat_sector_id;
×
853
        }
854
        else
855
        {
NEW
856
            sector_chain ssat_chain = follow_chain(header_.ssat_start, sat_);
×
NEW
857
            sat_.at(last_elem(ssat_chain)) = new_ssat_sector_id;
×
858
            write_sat();
×
859
        }
×
860

UNCOV
861
        write_header();
×
862

NEW
863
        std::size_t old_size = ssat_.size();
×
NEW
864
        ssat_.resize(old_size + sectors_per_sector, FREESECT);
×
865

NEW
866
        binary_reader<sector_id> ssat_reader(ssat_);
×
UNCOV
867
        ssat_reader.offset(old_size / sectors_per_sector);
×
UNCOV
868
        write_sector(ssat_reader, new_ssat_sector_id);
×
869

NEW
870
        next_free_iter = std::find(ssat_.begin(), ssat_.end(), FREESECT);
×
871
    }
872

UNCOV
873
    ++header_.num_short_sectors;
×
UNCOV
874
    write_header();
×
875

NEW
876
    auto next_free = static_cast<sector_id>(next_free_iter - ssat_.begin());
×
NEW
877
    ssat_.at(next_free) = ENDOFCHAIN;
×
878

UNCOV
879
    write_ssat();
×
880

NEW
881
    const std::uint64_t short_sectors_per_sector = sector_size() / short_sector_size();
×
NEW
882
    const std::uint64_t required_container_sectors = next_free / short_sectors_per_sector + 1;
×
883

UNCOV
884
    if (required_container_sectors > 0)
×
885
    {
NEW
886
        if (has_invalid_start_sector(entries_.at(0)))
×
887
        {
NEW
888
            entries_.at(0).start = allocate_sector();
×
UNCOV
889
            write_entry(0);
×
890
        }
891

NEW
892
        sector_chain container_chain = follow_chain(entries_.at(0), sat_);
×
893

UNCOV
894
        if (required_container_sectors > container_chain.size())
×
895
        {
NEW
896
            sat_.at(last_elem(container_chain)) = allocate_sector();
×
UNCOV
897
            write_sat();
×
898
        }
UNCOV
899
    }
×
900

UNCOV
901
    return next_free;
×
902
}
903

904
directory_id compound_document::next_empty_entry()
12✔
905
{
906
    directory_id entry_id = 0;
12✔
907

908
    for (; entry_id < entries_.size(); ++entry_id)
24✔
909
    {
910
        if (entries_.at(entry_id).type == compound_document_entry::entry_type::Empty)
20✔
911
        {
912
            return entry_id;
8✔
913
        }
914
    }
915

916
    // entry_id is now equal to entries_.size()
917

918
    if (is_invalid_sector(header_.directory_start))
4!
919
    {
920
        header_.directory_start = allocate_sector();
4✔
921
    }
922
    else
923
    {
NEW
924
        sector_chain directory_chain = follow_chain(header_.directory_start, sat_);
×
NEW
925
        sat_.at(last_elem(directory_chain)) = allocate_sector();
×
926
        write_sat();
×
927
    }
×
928

929
    const auto entries_per_sector = static_cast<std::size_t>(sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE);
4✔
930

931
    entries_.reserve(entries_.size() + entries_per_sector);
4✔
932
    for (std::size_t i = 0; i < entries_per_sector; ++i)
20✔
933
    {
934
        entries_.emplace_back();
16✔
935
        write_entry(entry_id + static_cast<directory_id>(i));
16✔
936
    }
937

938
    return entry_id;
4✔
939
}
940

941
directory_id compound_document::insert_entry(
12✔
942
    const std::string &name,
943
    compound_document_entry::entry_type type)
944
{
945
    directory_id entry_id = next_empty_entry();
12✔
946
    compound_document_entry &entry = entries_.at(entry_id);
12✔
947

948
    directory_id parent_id = 0;
12✔
949
    std::vector<std::string> split = split_path(name);
12✔
950
    std::string filename = last_elem(split);
12✔
951
    split.pop_back();
12✔
952

953
    if (split.size() > 1)
12!
954
    {
NEW
955
        std::string joined_path = join_path(split);
×
956
        parent_id = find_entry(joined_path, compound_document_entry::entry_type::UserStorage);
×
957

NEW
958
        if (is_invalid_entry(parent_id))
×
959
        {
NEW
960
            throw xlnt::key_not_found("parent compound document entry of type UserStorage not found at path \"" + joined_path + "\", "
×
NEW
961
                "necessary to insert entry \"" + name + "\" of type " + std::to_string(static_cast<int>(type)));
×
962
        }
963

964
        parent_storage_[entry_id] = parent_id;
×
965
    }
×
966

967
    entry.name(filename);
12✔
968
    entry.type = type;
12✔
969

970
    tree_insert(entry_id, parent_id);
12✔
971
    write_directory();
12✔
972

973
    return entry_id;
12✔
974
}
12✔
975

976
std::uint64_t compound_document::sector_data_start()
17,350✔
977
{
978
    return sizeof(compound_document_header);
17,350✔
979
}
980

981
bool compound_document::contains_entry(const std::string &path,
72✔
982
    compound_document_entry::entry_type type)
983
{
984
    return !is_invalid_entry(find_entry(path, type));
72✔
985
}
986

987
directory_id compound_document::find_entry(const std::string &name,
136✔
988
    compound_document_entry::entry_type type)
989
{
990
    if (type == compound_document_entry::entry_type::RootStorage
136✔
991
        && (name == "/" || name == "/Root Entry")) return 0;
136!
992

993
    directory_id entry_id = 0;
136✔
994

995
    for (const compound_document_entry &entry : entries_)
456✔
996
    {
997
        if (entry.type == type && tree_path(entry_id) == name)
448!
998
        {
999
            return entry_id;
128✔
1000
        }
1001

1002
        ++entry_id;
320✔
1003
    }
1004

1005
    return NOSTREAM;
8✔
1006
}
1007

1008
void compound_document::print_directory()
×
1009
{
NEW
1010
    directory_id entry_id = 0;
×
1011

NEW
1012
    for (const compound_document_entry &entry : entries_)
×
1013
    {
1014
        if (entry.type == compound_document_entry::entry_type::UserStream)
×
1015
        {
1016
            std::cout << tree_path(entry_id) << std::endl;
×
1017
        }
1018

1019
        ++entry_id;
×
1020
    }
1021
}
×
1022

1023
void compound_document::write_directory()
998✔
1024
{
1025
    for (std::size_t entry_id = 0; entry_id < entries_.size(); ++entry_id)
4,990✔
1026
    {
1027
        write_entry(static_cast<directory_id>(entry_id));
3,992✔
1028
    }
1029
}
998✔
1030

1031
void compound_document::read_directory()
32✔
1032
{
1033
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
32✔
1034
    const std::size_t num_entries = static_cast<std::size_t>(
1035
        follow_chain(header_.directory_start, sat_).size() * entries_per_sector);
32✔
1036

1037
    entries_.reserve(entries_.size() + num_entries);
32✔
1038
    for (std::size_t entry_id = 0; entry_id < num_entries; ++entry_id)
232✔
1039
    {
1040
        entries_.emplace_back();
200✔
1041
        read_entry(static_cast<directory_id>(entry_id));
200✔
1042
    }
1043

1044
    std::vector<directory_id> stack;
32✔
1045
    std::vector<directory_id> storage_siblings;
32✔
1046
    std::vector<directory_id> stream_siblings;
32✔
1047

1048
    std::vector<directory_id> directory_stack;
32✔
1049
    directory_stack.push_back(0u);
32✔
1050

1051
    while (!directory_stack.empty())
100✔
1052
    {
1053
        directory_id current_storage_id = directory_stack.back();
68✔
1054
        directory_stack.pop_back();
68✔
1055

1056
        if (is_invalid_entry(tree_child(current_storage_id))) continue;
68!
1057

1058
        std::vector<directory_id> storage_stack;
68✔
1059
        directory_id storage_root_id = tree_child(current_storage_id);
68✔
1060
        parent_[storage_root_id] = NOSTREAM;
68✔
1061
        storage_stack.push_back(storage_root_id);
68✔
1062

1063
        while (!storage_stack.empty())
204✔
1064
        {
1065
            directory_id current_entry_id = storage_stack.back();
136✔
1066
            const compound_document_entry &current_entry = entries_.at(current_entry_id);
136✔
1067
            storage_stack.pop_back();
136✔
1068

1069
            parent_storage_[current_entry_id] = current_storage_id;
136✔
1070

1071
            if (current_entry.type == compound_document_entry::entry_type::UserStorage)
136✔
1072
            {
1073
                directory_stack.push_back(current_entry_id);
36✔
1074
            }
1075

1076
            if (!is_invalid_entry(tree_left(current_entry_id)))
136✔
1077
            {
1078
                storage_stack.push_back(tree_left(current_entry_id));
18✔
1079
                tree_parent(tree_left(current_entry_id)) = current_entry_id;
18✔
1080
            }
1081

1082
            if (!is_invalid_entry(tree_right(current_entry_id)))
136✔
1083
            {
1084
                storage_stack.push_back(tree_right(current_entry_id));
50✔
1085
                tree_parent(tree_right(current_entry_id)) = current_entry_id;
50✔
1086
            }
1087
        }
1088
    }
68✔
1089
}
32✔
1090

1091
void compound_document::tree_insert(directory_id new_id, directory_id storage_id)
12✔
1092
{
1093
    using entry_color = compound_document_entry::entry_color;
1094

1095
    parent_storage_[new_id] = storage_id;
12✔
1096

1097
    tree_left(new_id) = NOSTREAM;
12✔
1098
    tree_right(new_id) = NOSTREAM;
12✔
1099

1100
    if (is_invalid_entry(tree_root(new_id)))
12✔
1101
    {
1102
        if (new_id != 0)
8✔
1103
        {
1104
            tree_root(new_id) = new_id;
4✔
1105
        }
1106

1107
        tree_color(new_id) = entry_color::Black;
8✔
1108
        tree_parent(new_id) = NOSTREAM;
8✔
1109

1110
        return;
8✔
1111
    }
1112

1113
    // normal tree insert
1114
    // (will probably unbalance the tree, fix after)
1115
    directory_id x = tree_root(new_id);
4✔
1116
    directory_id y = NOSTREAM;
4✔
1117

1118
    while (!is_invalid_entry(x))
8✔
1119
    {
1120
        y = x;
4✔
1121

1122
        if (compare_keys(tree_key(new_id), tree_key(x)) > 0)
4!
1123
        {
1124
            x = tree_right(x);
×
1125
        }
1126
        else
1127
        {
1128
            x = tree_left(x);
4✔
1129
        }
1130
    }
1131

1132
    tree_parent(new_id) = y;
4✔
1133

1134
    if (compare_keys(tree_key(new_id), tree_key(y)) > 0)
4!
1135
    {
1136
        tree_right(y) = new_id;
×
1137
    }
1138
    else
1139
    {
1140
        tree_left(y) = new_id;
4✔
1141
    }
1142

1143
    tree_insert_fixup(new_id);
4✔
1144
}
1145

1146
std::string compound_document::tree_path(directory_id id)
244✔
1147
{
1148
    directory_id storage_id = parent_storage_.at(id);
244✔
1149
    std::vector<std::string> result;
244✔
1150

1151
    while (storage_id > 0)
328✔
1152
    {
1153
        storage_id = parent_storage_.at(storage_id);
84✔
1154
        result.emplace_back(entries_.at(storage_id).name());
84✔
1155
    }
1156

1157
    return "/" + join_path(result) + entries_.at(id).name();
488✔
1158
}
244✔
1159

1160
void compound_document::tree_rotate_left(directory_id x)
×
1161
{
NEW
1162
    directory_id y = tree_right(x);
×
1163

1164
    // turn y's left subtree into x's right subtree
1165
    tree_right(x) = tree_left(y);
×
1166

NEW
1167
    if (!is_invalid_entry(tree_left(y)))
×
1168
    {
1169
        tree_parent(tree_left(y)) = x;
×
1170
    }
1171

1172
    // link x's parent to y
1173
    tree_parent(y) = tree_parent(x);
×
1174

NEW
1175
    if (is_invalid_entry(tree_parent(x)))
×
1176
    {
1177
        tree_root(x) = y;
×
1178
    }
1179
    else if (x == tree_left(tree_parent(x)))
×
1180
    {
1181
        tree_left(tree_parent(x)) = y;
×
1182
    }
1183
    else
1184
    {
1185
        tree_right(tree_parent(x)) = y;
×
1186
    }
1187

1188
    // put x on y's left
1189
    tree_left(y) = x;
×
1190
    tree_parent(x) = y;
×
1191
}
×
1192

1193
void compound_document::tree_rotate_right(directory_id y)
×
1194
{
NEW
1195
    directory_id x = tree_left(y);
×
1196

1197
    // turn x's right subtree into y's left subtree
1198
    tree_left(y) = tree_right(x);
×
1199

NEW
1200
    if (!is_invalid_entry(tree_right(x)))
×
1201
    {
1202
        tree_parent(tree_right(x)) = y;
×
1203
    }
1204

1205
    // link y's parent to x
1206
    tree_parent(x) = tree_parent(y);
×
1207

NEW
1208
    if (is_invalid_entry(tree_parent(y)))
×
1209
    {
1210
        tree_root(y) = x;
×
1211
    }
1212
    else if (y == tree_left(tree_parent(y)))
×
1213
    {
1214
        tree_left(tree_parent(y)) = x;
×
1215
    }
1216
    else
1217
    {
1218
        tree_right(tree_parent(y)) = x;
×
1219
    }
1220

1221
    // put y on x's right
1222
    tree_right(x) = y;
×
1223
    tree_parent(y) = x;
×
1224
}
×
1225

1226
void compound_document::tree_insert_fixup(directory_id x)
4✔
1227
{
1228
    using entry_color = compound_document_entry::entry_color;
1229

1230
    tree_color(x) = entry_color::Red;
4✔
1231

1232
    while (x != tree_root(x) && tree_color(tree_parent(x)) == entry_color::Red)
4!
1233
    {
1234
        if (tree_parent(x) == tree_left(tree_parent(tree_parent(x))))
×
1235
        {
NEW
1236
            directory_id y = tree_right(tree_parent(tree_parent(x)));
×
1237

NEW
1238
            if (!is_invalid_entry(y) && tree_color(y) == entry_color::Red)
×
1239
            {
1240
                // case 1
1241
                tree_color(tree_parent(x)) = entry_color::Black;
×
1242
                tree_color(y) = entry_color::Black;
×
1243
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1244
                x = tree_parent(tree_parent(x));
×
1245
            }
1246
            else
1247
            {
1248
                if (x == tree_right(tree_parent(x)))
×
1249
                {
1250
                    // case 2
1251
                    x = tree_parent(x);
×
1252
                    tree_rotate_left(x);
×
1253
                }
1254

1255
                // case 3
1256
                tree_color(tree_parent(x)) = entry_color::Black;
×
1257
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1258
                tree_rotate_right(tree_parent(tree_parent(x)));
×
1259
            }
1260
        }
1261
        else // same as above with left and right switched
1262
        {
NEW
1263
            directory_id y = tree_left(tree_parent(tree_parent(x)));
×
1264

NEW
1265
            if (!is_invalid_entry(y) && tree_color(y) == entry_color::Red)
×
1266
            {
1267
                //case 1
1268
                tree_color(tree_parent(x)) = entry_color::Black;
×
1269
                tree_color(y) = entry_color::Black;
×
1270
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1271
                x = tree_parent(tree_parent(x));
×
1272
            }
1273
            else
1274
            {
1275
                if (x == tree_left(tree_parent(x)))
×
1276
                {
1277
                    // case 2
1278
                    x = tree_parent(x);
×
1279
                    tree_rotate_right(x);
×
1280
                }
1281

1282
                // case 3
1283
                tree_color(tree_parent(x)) = entry_color::Black;
×
1284
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1285
                tree_rotate_left(tree_parent(tree_parent(x)));
×
1286
            }
1287
        }
1288
    }
1289

1290
    tree_color(tree_root(x)) = entry_color::Black;
4✔
1291
}
4✔
1292

1293
directory_id &compound_document::tree_left(directory_id id)
192✔
1294
{
1295
    return entries_.at(id).prev;
192✔
1296
}
1297

1298
directory_id &compound_document::tree_right(directory_id id)
248✔
1299
{
1300
    return entries_.at(id).next;
248✔
1301
}
1302

1303
directory_id &compound_document::tree_parent(directory_id id)
84✔
1304
{
1305
    // Note: the parent will be created, if it does not yet exist. This is fine.
1306
    return parent_[id];
84✔
1307
}
1308

1309
directory_id &compound_document::tree_root(directory_id id)
28✔
1310
{
1311
    return tree_child(parent_storage_.at(id));
28✔
1312
}
1313

1314
directory_id &compound_document::tree_child(directory_id id)
164✔
1315
{
1316
    return entries_.at(id).child;
164✔
1317
}
1318

1319
std::string compound_document::tree_key(directory_id id)
16✔
1320
{
1321
    return entries_.at(id).name();
16✔
1322
}
1323

1324
compound_document_entry::entry_color &compound_document::tree_color(directory_id id)
20✔
1325
{
1326
    return entries_.at(id).color;
20✔
1327
}
1328

1329
void compound_document::read_header()
32✔
1330
{
1331
    in_->seekg(0, std::ios::beg);
32✔
1332
    in_->read(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
32✔
1333

1334
    // Header Signature (8 bytes): Identification signature for the compound file structure, and MUST be
1335
    // set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1.
1336
    if (header_.header_signature != 0xE11AB1A1E011CFD0)
32!
1337
    {
NEW
1338
        throw xlnt::invalid_file("invalid header signature, expected 0xE11AB1A1E011CFD0 but got " + format_hex(header_.header_signature));
×
1339
    }
1340

1341
    // Header CLSID (16 bytes): Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL).
1342
    if (std::any_of(header_.header_clsid.begin(), header_.header_clsid.end(), [](std::uint8_t i) { return i != 0; }))
544!
1343
    {
NEW
1344
        std::string exception_str = "invalid header CLSID, expected only zeros but got: ";
×
NEW
1345
        for (std::uint8_t val : header_.header_clsid)
×
1346
        {
NEW
1347
            exception_str += fmt::format("{:02x} ", val);
×
1348
        }
NEW
1349
        throw xlnt::invalid_file(exception_str);
×
NEW
1350
    }
×
1351

1352
    // Major Version (2 bytes): Version number for breaking changes. This field MUST be set to either
1353
    // 0x0003 (version 3) or 0x0004 (version 4).
1354
    if (header_.major_version != 3 && header_.major_version != 4)
32!
1355
    {
NEW
1356
        throw xlnt::invalid_file("invalid major version, expected 3 or 4 but got " + std::to_string(header_.major_version));
×
1357
    }
1358

1359
    // Byte Order (2 bytes): This field MUST be set to 0xFFFE. This field is a byte order mark for all integer
1360
    // fields, specifying little-endian byte order.
1361
    if (header_.byte_order != compound_document_header::byte_order_type::little_endian)
32!
1362
    {
NEW
1363
        throw xlnt::invalid_file("invalid byte order, expected 0xFFFE (little-endian) but got " +
×
NEW
1364
            fmt::format("0x{:04X}", static_cast<std::uint16_t>(header_.byte_order)));
×
1365
    }
1366

1367
    // Sector Shift (2 bytes): This field MUST be set to 0x0009, or 0x000c, depending on the Major
1368
    // Version field. This field specifies the sector size of the compound file as a power of 2.
1369
    // - If Major Version is 3, the Sector Shift MUST be 0x0009, specifying a sector size of 512 bytes.
1370
    // - If Major Version is 4, the Sector Shift MUST be 0x000C, specifying a sector size of 4096 bytes.
1371
    if (!((header_.major_version == 3 && header_.sector_size_power == 0x0009) ||
32!
NEW
1372
        (header_.major_version == 4 && header_.sector_size_power == 0x000C)))
×
1373
    {
NEW
1374
        throw xlnt::invalid_file("invalid combination of sector size power and major version, got sector_size_power = " +
×
NEW
1375
            fmt::format("0x{:04X}", header_.sector_size_power) + "; major_version = " + std::to_string(header_.major_version));
×
1376
    }
1377

1378
    // Mini Sector Shift (2 bytes): This field MUST be set to 0x0006. This field specifies the sector size of
1379
    // the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes.
1380
    if (header_.short_sector_size_power != 0x0006)
32!
1381
    {
NEW
1382
        throw xlnt::invalid_file("invalid short sector size power, expected 0x0006 but got " + fmt::format("0x{:04X}", header_.short_sector_size_power));
×
1383
    }
1384

1385
    // Reserved (6 bytes): This field MUST be set to all zeroes.
1386
    if (std::any_of(header_.reserved.begin(), header_.reserved.end(), [](std::uint8_t i) { return i != 0; }))
224!
1387
    {
NEW
1388
        std::string exception_str = "invalid reserved field, expected only zeros but got: ";
×
NEW
1389
        for (std::uint8_t val : header_.reserved)
×
1390
        {
NEW
1391
            exception_str += fmt::format("{:02x} ", val);
×
1392
        }
NEW
1393
        throw xlnt::invalid_file(exception_str);
×
NEW
1394
    }
×
1395

1396
    // Number of Directory Sectors (4 bytes): This integer field contains the count of the number of
1397
    // directory sectors in the compound file.
1398
    // - If Major Version is 3, the Number of Directory Sectors MUST be zero. This field is not
1399
    //   supported for version 3 compound files.
1400
    if (header_.major_version == 3 && header_.num_directory_sectors != 0)
32!
1401
    {
NEW
1402
        throw xlnt::invalid_file("invalid number of directory sectors for major version 3: expected 0 directory sectors but got " +
×
NEW
1403
            std::to_string(header_.num_directory_sectors));
×
1404
    }
1405

1406
    // Mini Stream Cutoff Size (4 bytes): This integer field MUST be set to 0x00001000. This field
1407
    // specifies the maximum size of a user-defined data stream that is allocated from the mini FAT
1408
    // and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than
1409
    // or equal to this cutoff size must be allocated as normal sectors from the FAT.
1410
    if (header_.threshold != 0x00001000)
32!
1411
    {
NEW
1412
        throw xlnt::invalid_file("invalid mini stream cutoff size, expected 0x00001000 but got " + format_hex(header_.threshold));
×
1413
    }
1414

1415
    // DIFAT (436 bytes): This array of 32-bit integer fields contains the first 109 FAT sector locations of
1416
    // the compound file.
1417
    // - For version 4 compound files, the header size (512 bytes) is less than the sector size (4,096
1418
    //   bytes), so the remaining part of the header (3,584 bytes) MUST be filled with all zeroes.
1419
    if (header_.major_version == 4)
32!
1420
    {
NEW
1421
        std::array<std::uint8_t, 3584> remaining {{ 0 }};
×
NEW
1422
        in_->read(reinterpret_cast<char *>(remaining.data()), sizeof(remaining));
×
1423

NEW
1424
        if (std::any_of(remaining.begin(), remaining.end(), [](std::uint8_t i) { return i != 0; }))
×
1425
        {
NEW
1426
            std::string exception_str = "invalid remaining bytes in header (major version 4), expected only zeros but got: ";
×
NEW
1427
            for (std::uint8_t val : remaining)
×
1428
            {
NEW
1429
                exception_str += fmt::format("{:02x} ", val);
×
1430
            }
NEW
1431
            throw xlnt::invalid_file(exception_str);
×
NEW
1432
        }
×
1433
    }
1434
}
32✔
1435

1436
void compound_document::read_msat()
32✔
1437
{
1438
    msat_.clear();
32✔
1439

1440
    sector_id msat_sector = header_.extra_msat_start;
32✔
1441
    binary_writer<sector_id> msat_writer(msat_);
32✔
1442

1443
    for (std::uint32_t i = 0u; i < header_.num_msat_sectors; ++i)
91✔
1444
    {
1445
        if (i < 109u)
59!
1446
        {
1447
            msat_writer.write(header_.msat.at(i));
59✔
1448
        }
1449
        else
1450
        {
1451
            read_sector(msat_sector, msat_writer);
×
1452

NEW
1453
            msat_sector = last_elem(msat_);
×
1454
            msat_.pop_back();
×
1455
        }
1456
    }
1457
}
32✔
1458

1459
void compound_document::read_sat()
32✔
1460
{
1461
    sat_.clear();
32✔
1462
    binary_writer<sector_id> sat_writer(sat_);
32✔
1463

1464
    for (sector_id msat_sector : msat_)
91✔
1465
    {
1466
        read_sector(msat_sector, sat_writer);
59✔
1467
    }
1468
}
32✔
1469

1470
void compound_document::read_ssat()
32✔
1471
{
1472
    ssat_.clear();
32✔
1473
    binary_writer<sector_id> ssat_writer(ssat_);
32✔
1474

1475
    for (sector_id ssat_sector : follow_chain(header_.ssat_start, sat_))
64✔
1476
    {
1477
        read_sector(ssat_sector, ssat_writer);
32✔
1478
    }
32✔
1479
}
32✔
1480

NEW
1481
std::string compound_document_entry::format_info(
×
1482
    directory_id entry_id,
1483
    sector_id sector_id,
1484
    /// IMPORTANT: only show the name after the name and its length have been validated!
1485
    bool show_entry_name) const
1486
{
1487
    // The formatted IDs should be as short as possible to keep the exception message readable - so we do not add leading zeros.
NEW
1488
    std::string message = "(entry " + fmt::format("0x{:X}", entry_id);
×
NEW
1489
    if (show_entry_name)
×
1490
    {
NEW
1491
        message += " with name \"";
×
1492
        // Only add the name if the conversion does not throw an exception itself!
1493
        try
1494
        {
NEW
1495
            message += name();
×
1496
        }
NEW
1497
        catch (const std::exception &ex)
×
1498
        {
NEW
1499
            message += "INVALID (";
×
NEW
1500
            message += ex.what();
×
NEW
1501
            message.push_back(')');
×
NEW
1502
        }
×
NEW
1503
        message.push_back('"');
×
1504
    }
NEW
1505
    message += " of type " + std::to_string(static_cast<int>(type)) +
×
NEW
1506
        " in sector " + fmt::format("0x{:X}", sector_id) + ")";
×
NEW
1507
    return message;
×
NEW
1508
}
×
1509

1510
void check_empty_entry(
32✔
1511
    const compound_document_entry &entry,
1512
    directory_id id,
1513
    sector_id directory_sector)
1514
{
1515
    if (entry.type != compound_document_entry::entry_type::Empty)
32!
1516
    {
NEW
1517
        throw xlnt::invalid_parameter("invalid entry type " +
×
NEW
1518
            entry.format_info(id, directory_sector, false) +
×
NEW
1519
            ", expected 0 (Unallocated) but got " + std::to_string(static_cast<int>(entry.type)));
×
1520
    }
1521

1522
    // Free (unused) directory entries are marked with Object Type 0x0 (unknown or unallocated). The
1523
    // entire directory entry must consist of all zeroes except for the child, right sibling, and left sibling
1524
    // pointers, which must be initialized to NOSTREAM (0xFFFFFFFF).
1525

1526
    // NOTE: Some implementations seem to not initialize this buffer at all, so we cannot check it for correctness.
1527
    /*if (std::any_of(entry.name_array.begin(), entry.name_array.end(), [](char16_t i) { return i != 0; }))
1528
    {
1529
        std::string exception_str = "invalid entry name " +
1530
            entry.format_info(id, directory_sector, false) +
1531
            ", expected all zeros but got: ";
1532
        for (char16_t val : entry.name_array)
1533
        {
1534
            exception_str += fmt::format("{:04x} ", static_cast<std::uint16_t>(val));
1535
        }
1536
        throw xlnt::invalid_file(exception_str);
1537
    }*/
1538

1539
    if (entry.name_length != 0)
32!
1540
    {
NEW
1541
        throw xlnt::invalid_file("invalid entry name length " + entry.format_info(id, directory_sector, false) +
×
NEW
1542
            ", expected 0 but got " + std::to_string(entry.name_length));
×
1543
    }
1544

1545
    if (entry.color != compound_document_entry::entry_color::Red)
32!
1546
    {
NEW
1547
        throw xlnt::invalid_file("invalid entry color " + entry.format_info(id, directory_sector, false) +
×
NEW
1548
            ", expected 0 (Red) but got " + std::to_string(static_cast<int>(entry.color)));
×
1549
    }
1550

1551
    if (entry.prev != NOSTREAM || entry.next != NOSTREAM || entry.child != NOSTREAM)
32!
1552
    {
NEW
1553
        throw xlnt::invalid_file("empty entry contains invalid child or sibling " +
×
NEW
1554
            entry.format_info(id, directory_sector, false) +
×
NEW
1555
            "; prev = " + fmt::format("0x{:08X}", (entry.prev)) +
×
NEW
1556
            "; next = " + fmt::format("0x{:08X}", (entry.next)) +
×
NEW
1557
            "; child = " + fmt::format("0x{:08X}", (entry.child)));
×
1558
    }
1559

1560
    if (std::any_of(entry.clsid.begin(), entry.clsid.end(), [](std::uint8_t i) { return i != 0; }))
544!
1561
    {
NEW
1562
        std::string exception_str = "invalid entry CLSID " + entry.format_info(id, directory_sector, false) +
×
NEW
1563
            ", expected all zeros but got: ";
×
NEW
1564
        for (std::uint8_t val : entry.clsid)
×
1565
        {
NEW
1566
            exception_str += fmt::format("{:02x} ", val);
×
1567
        }
NEW
1568
        throw xlnt::invalid_file(exception_str);
×
NEW
1569
    }
×
1570

1571
    if (entry.state_bits != 0)
32!
1572
    {
NEW
1573
        throw xlnt::invalid_file("invalid entry state bits " + entry.format_info(id, directory_sector, false) +
×
NEW
1574
            ", expected 0 but got " + std::to_string(entry.state_bits));
×
1575
    }
1576

1577
    // NOTE: some implementations seem to use the timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC.
1578
    if (entry.creation_time != 0 && entry.creation_time != 116444736000000000)
32!
1579
    {
NEW
1580
        throw xlnt::invalid_file("invalid entry creation time " + entry.format_info(id, directory_sector, false) +
×
NEW
1581
            ", expected 0 or 116444736000000000, but got " + std::to_string(entry.creation_time));
×
1582
    }
1583

1584
    // NOTE: some implementations seem to use the timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC.
1585
    if (entry.modified_time != 0 && entry.modified_time != 116444736000000000)
32!
1586
    {
NEW
1587
        throw xlnt::invalid_file("invalid entry modification time " + entry.format_info(id, directory_sector, false) +
×
NEW
1588
            ", expected 0 or 116444736000000000, but got " + std::to_string(entry.modified_time));
×
1589
    }
1590

1591
    // According to the specification (see above), it must be 0, but it seems that some immplementations
1592
    // initialize it with ENDOFCHAIN or FREESECT, which is honestly not wrong either. So let's accept that.
1593
    if (entry.start != 0 && entry.start != ENDOFCHAIN && entry.start != FREESECT)
32!
1594
    {
NEW
1595
        throw xlnt::invalid_file("invalid entry start sector location " + entry.format_info(id, directory_sector, false) +
×
NEW
1596
            ", expected 0 or ENDOFCHAIN (0xFFFFFFFE) or FREESECT (0xFFFFFFFF), but got " + format_hex(entry.start));
×
1597
    }
1598

1599
    if (entry.size != 0)
32!
1600
    {
NEW
1601
        throw xlnt::invalid_file("invalid entry stream size " + entry.format_info(id, directory_sector, false) +
×
NEW
1602
            ", expected 0 but got " + std::to_string(entry.size));
×
1603
    }
1604
}
32✔
1605

1606
void check_non_empty_entry(
168✔
1607
    const compound_document_entry &entry,
1608
    directory_id id,
1609
    sector_id directory_sector)
1610
{
1611
    if (entry.type == compound_document_entry::entry_type::Empty)
168!
1612
    {
NEW
1613
        throw xlnt::invalid_parameter("invalid entry type " +
×
NEW
1614
            entry.format_info(id, directory_sector, false) +
×
NEW
1615
            ", expected different than Empty but got Empty");
×
1616
    }
1617

1618
    // First check the length, as we'll need this for the string itself.
1619
    // Directory Entry Name Length (2 bytes): This field MUST match the length of the Directory Entry
1620
    // Name Unicode string in bytes. The length MUST be a multiple of 2 and include the terminating null
1621
    // character in the count. This length MUST NOT exceed 64, the maximum size of the Directory Entry
1622
    // Name field.
1623
    if (entry.name_length < 2 || entry.name_length > 64)
168!
1624
    {
NEW
1625
        throw xlnt::invalid_file("invalid entry name length " +
×
NEW
1626
            entry.format_info(id, directory_sector, false) +
×
NEW
1627
            ", expected >= 2 and <= 64, but got " + std::to_string(entry.name_length));
×
1628
    }
1629
    else if (entry.name_length % 2 != 0)
168!
1630
    {
NEW
1631
        throw xlnt::invalid_file("invalid entry name length " +
×
NEW
1632
            entry.format_info(id, directory_sector, false) +
×
NEW
1633
            ", which must be a multiple of 2, but got " + std::to_string(entry.name_length));
×
1634
    }
1635

1636
    // Directory Entry Name (64 bytes): This field MUST contain a Unicode string for the storage or
1637
    // stream name encoded in UTF-16. The name MUST be terminated with a UTF-16 terminating null
1638
    // character. Thus, storage and stream names are limited to 32 UTF-16 code points, including the
1639
    // terminating null character. When locating an object in the compound file except for the root
1640
    // storage, the directory entry name is compared by using a special case-insensitive uppercase
1641
    // mapping, described in Red-Black Tree. The following characters are illegal and MUST NOT be part
1642
    // of the name: '/', '\', ':', '!'.
1643
    std::uint16_t name_length_characters = (entry.name_length / 2) - 1; // does NOT include \0 at the end
168✔
1644
    if (entry.name_array.at(name_length_characters) != u'\0')
168!
1645
    {
NEW
1646
        std::string exception_str = "invalid entry name " +
×
NEW
1647
            entry.format_info(id, directory_sector, false) +
×
NEW
1648
            ", which must be terminated with \\0 but is terminated with " +
×
NEW
1649
            fmt::format("0x{:04X}", static_cast<std::uint16_t>(entry.name_array.at(name_length_characters))) +
×
NEW
1650
            "\nString has a length of " + std::to_string(name_length_characters) + " characters (" +
×
NEW
1651
            std::to_string(entry.name_length) + " bytes including \\0). Full buffer contents:\n";
×
NEW
1652
        for (char16_t val : entry.name_array)
×
1653
        {
NEW
1654
            exception_str += fmt::format("{:04x} ", static_cast<std::uint16_t>(val));
×
1655
        }
1656

NEW
1657
        throw xlnt::invalid_file(exception_str);
×
NEW
1658
    }
×
1659

1660
    for (std::uint16_t n = 0; n < name_length_characters; ++n)
2,471✔
1661
    {
1662
        char16_t curr = entry.name_array.at(n);
2,303✔
1663
        if (curr == u'/' || curr == u'\\' || curr == u':' || curr == u'!')
2,303!
1664
        {
NEW
1665
            throw xlnt::invalid_file("invalid entry name " + entry.format_info(id, directory_sector, true) +
×
NEW
1666
                ", which contains invalid character " +
×
NEW
1667
                fmt::format("0x{:04X}", static_cast<std::uint16_t>(curr)) + " at position " + std::to_string(n));
×
1668
        }
1669
    }
1670

1671
    // Object Type (1 byte): This field MUST be 0x00, 0x01, 0x02, or 0x05, depending on the actual type
1672
    // of object. All other values are not valid.
1673
    // --------------------------------
1674
    // NOTE: the empty type is handled in check_empty_entry().
1675
    if (entry.type != compound_document_entry::entry_type::UserStorage &&
168✔
1676
        entry.type != compound_document_entry::entry_type::UserStream &&
132✔
1677
        entry.type != compound_document_entry::entry_type::RootStorage)
32!
1678
    {
NEW
1679
        throw xlnt::invalid_file("invalid entry object type " + entry.format_info(id, directory_sector, true) +
×
NEW
1680
            ", expected 0 (Unallocated), 1 (Storage), 2 (Stream) or 5 (RootStorage) but got " + std::to_string(static_cast<int>(entry.type)));
×
1681
    }
1682

1683
    // Color Flag (1 byte): This field MUST be 0x00 (red) or 0x01 (black). All other values are not valid.
1684
    if (entry.color != compound_document_entry::entry_color::Red && entry.color != compound_document_entry::entry_color::Black)
168!
1685
    {
NEW
1686
        throw xlnt::invalid_file("invalid entry color " + entry.format_info(id, directory_sector, true) +
×
NEW
1687
            ", expected 0 (Red) or 1 (Black), but got " + std::to_string(static_cast<int>(entry.color)));
×
1688
    }
1689

1690
    // CLSID (16 bytes): This field contains an object class GUID, if this entry is for a storage object or
1691
    // root storage object. For a stream object, this field MUST be set to all zeroes. A value containing all
1692
    // zeroes in a storage or root storage directory entry is valid, and indicates that no object class is
1693
    // associated with the storage. If an implementation of the file format enables applications to create
1694
    // storage objects without explicitly setting an object class GUID, it MUST write all zeroes by default.
1695
    // If this value is not all zeroes, the object class GUID can be used as a parameter to start
1696
    // applications.
1697
    if (entry.type == compound_document_entry::entry_type::UserStream &&
268!
1698
        std::any_of(entry.clsid.begin(), entry.clsid.end(), [](std::uint8_t i) { return i != 0; }))
1,700!
1699
    {
NEW
1700
        std::string exception_str = "invalid entry CLSID " + entry.format_info(id, directory_sector, true) +
×
NEW
1701
            " for UserStream type, expected all zeros but got: ";
×
NEW
1702
        for (std::uint8_t val : entry.clsid)
×
1703
        {
NEW
1704
            exception_str += fmt::format("{:02x} ", val);
×
1705
        }
NEW
1706
        throw xlnt::invalid_file(exception_str);
×
NEW
1707
    }
×
1708

1709
    // Creation Time (8 bytes): This field contains the creation time for a storage object, or all zeroes to
1710
    // indicate that the creation time of the storage object was not recorded. The Windows FILETIME
1711
    // structure is used to represent this field in UTC. For a stream object, this field MUST be all zeroes.
1712
    // For a root storage object, this field MUST be all zeroes, and the creation time is retrieved or set on
1713
    // the compound file itself.
1714
    // --------------------------------
1715
    // NOTE: unfortunately cannot be enforced, as some files:
1716
    // - have a root entry with timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC
1717
    // - have a stream with an actual timestamp
1718
    /*if ((entry.type == compound_document_entry::entry_type::UserStream ||
1719
        entry.type == compound_document_entry::entry_type::RootStorage) &&
1720
        entry.creation_time != 0)
1721
    {
1722
        throw xlnt::invalid_file("invalid entry creation time " + entry.format_info(id, directory_sector, true) +
1723
            " for type " + std::to_string(static_cast<int>(entry.type)) +
1724
            ", expected 0 but got " + std::to_string(entry.creation_time));
1725
    }*/
1726

1727
    // Modified Time (8 bytes): This field contains the modification time for a storage object, or all
1728
    // zeroes to indicate that the modified time of the storage object was not recorded. The Windows
1729
    // FILETIME structure is used to represent this field in UTC. For a stream object, this field MUST be
1730
    // all zeroes. For a root storage object, this field MAY<2> be set to all zeroes, and the modified time
1731
    // is retrieved or set on the compound file itself.
1732
    // --------------------------------
1733
    // NOTE: unfortunately cannot be enforced, as some files have a stream with an actual timestamp.
1734
    /*if (entry.type == compound_document_entry::entry_type::UserStream &&
1735
        entry.modified_time != 0)
1736
    {
1737
        throw xlnt::invalid_file("invalid entry modification time " + entry.format_info(id, directory_sector, true) +
1738
            " for type UserStream, expected 0 but got " + std::to_string(entry.modified_time));
1739
    }*/
1740

1741
    // Starting Sector Location (4 bytes): This field contains the first sector location if this is a stream
1742
    // object. For a root storage object, this field MUST contain the first sector of the mini stream, if the
1743
    // mini stream exists. For a storage object, this field MUST be set to all zeroes.
1744
    // --------------------------------
1745
    // It seems that some immplementations initialize it with FREESECT,
1746
    // which is honestly not wrong either. So let's accept that.
1747
    if (entry.type == compound_document_entry::entry_type::UserStorage &&
168✔
1748
        !(entry.start == 0 || entry.start == FREESECT))
36!
1749
    {
NEW
1750
        throw xlnt::invalid_file("invalid entry start sector location " + entry.format_info(id, directory_sector, true) +
×
NEW
1751
            " for type UserStorage, expected 0 or FREESECT (0xFFFFFFFF), but got " + format_hex(entry.start));
×
1752
    }
1753

1754
    // Stream Size (8 bytes): This 64-bit integer field contains the size of the user-defined data if this is
1755
    // a stream object. For a root storage object, this field contains the size of the mini stream. For a
1756
    // storage object, this field MUST be set to all zeroes.
1757
    if (entry.type == compound_document_entry::entry_type::UserStorage &&
168✔
1758
        entry.size != 0)
36!
1759
    {
NEW
1760
        throw xlnt::invalid_file("invalid entry stream size " + entry.format_info(id, directory_sector, true) +
×
NEW
1761
            " for type UserStorage, expected 0 but got " + std::to_string(entry.size));
×
1762
    }
1763
}
168✔
1764

1765
void compound_document::read_entry(directory_id id)
200✔
1766
{
1767
    const sector_chain directory_chain = follow_chain(header_.directory_start, sat_);
200✔
1768
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
200✔
1769
    const sector_id directory_sector = directory_chain.at(static_cast<std::size_t>(id / entries_per_sector));
200✔
1770
    const std::uint64_t offset = sector_size() * directory_sector + ((id % entries_per_sector) * COMPOUND_DOCUMENT_ENTRY_SIZE);
200✔
1771

1772
    in_->seekg(static_cast<std::streamoff>(sector_data_start() + offset), std::ios::beg);
200✔
1773
    compound_document_entry &entry = entries_.at(id);
200✔
1774
    // Read the fields manually due to struct padding (larger sizeof than 128 bytes).
1775
    in_->read(reinterpret_cast<char *>(entry.name_array.data()), sizeof(entry.name_array));
400✔
1776
    in_->read(reinterpret_cast<char *>(&entry.name_length), sizeof(entry.name_length));
200✔
1777
    in_->read(reinterpret_cast<char *>(&entry.type), sizeof(entry.type));
200✔
1778
    in_->read(reinterpret_cast<char *>(&entry.color), sizeof(entry.color));
200✔
1779
    in_->read(reinterpret_cast<char *>(&entry.prev), sizeof(entry.prev));
200✔
1780
    in_->read(reinterpret_cast<char *>(&entry.next), sizeof(entry.next));
200✔
1781
    in_->read(reinterpret_cast<char *>(&entry.child), sizeof(entry.child));
200✔
1782
    in_->read(reinterpret_cast<char *>(entry.clsid.data()), sizeof(entry.clsid));
400✔
1783
    in_->read(reinterpret_cast<char *>(&entry.state_bits), sizeof(entry.state_bits));
200✔
1784
    in_->read(reinterpret_cast<char *>(&entry.creation_time), sizeof(entry.creation_time));
200✔
1785
    in_->read(reinterpret_cast<char *>(&entry.modified_time), sizeof(entry.modified_time));
200✔
1786
    in_->read(reinterpret_cast<char *>(&entry.start), sizeof(entry.start));
200✔
1787
    in_->read(reinterpret_cast<char *>(&entry.size), sizeof(entry.size));
200✔
1788

1789
    // Stream Size (8 bytes): ... (see below for the rest)
1790
    // - For a version 3 compound file 512-byte sector size, the value of this field MUST be less than
1791
    //   or equal to 0x80000000. (Equivalently, this requirement can be stated: the size of a stream or
1792
    //   of the mini stream in a version 3 compound file MUST be less than or equal to 2 gigabytes
1793
    //   (GB).) Note that as a consequence of this requirement, the most significant 32 bits of this field
1794
    //   MUST be zero in a version 3 compound file. However, implementers should be aware that
1795
    //   some older implementations did not initialize the most significant 32 bits of this field, and
1796
    //   these bits might therefore be nonzero in files that are otherwise valid version 3 compound
1797
    //   files. Although this document does not normatively specify parser behavior, it is recommended
1798
    //   that parsers ignore the most significant 32 bits of this field in version 3 compound files,
1799
    //   treating it as if its value were zero, unless there is a specific reason to do otherwise (for
1800
    //   example, a parser whose purpose is to verify the correctness of a compound file).
1801
    if (header_.major_version == 3 && entry.size > 0x80000000)
200!
1802
    {
1803
        // Note: the only allowed byte order is little-endian.
NEW
1804
        entry.size = entry.size & 0x0000FFFF;
×
1805
    }
1806

1807
    if (entry.type == compound_document_entry::entry_type::Empty)
200✔
1808
    {
1809
        check_empty_entry(entry, id, directory_sector);
32✔
1810
    }
1811
    else
1812
    {
1813
        check_non_empty_entry(entry, id, directory_sector);
168✔
1814
    }
1815
}
200✔
1816

1817
void compound_document::write_header()
15✔
1818
{
1819
    out_->seekp(0, std::ios::beg);
15✔
1820
    out_->write(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
15✔
1821
}
15✔
1822

1823
void compound_document::write_msat()
11✔
1824
{
1825
    sector_id msat_sector = header_.extra_msat_start;
11✔
1826

1827
    for (std::uint32_t i = 0u; i < header_.num_msat_sectors; ++i)
39✔
1828
    {
1829
        if (i < 109u)
28!
1830
        {
1831
            header_.msat.at(i) = msat_.at(i);
28✔
1832
        }
1833
        else
1834
        {
NEW
1835
            std::vector<sector_id> sector;
×
NEW
1836
            binary_writer<sector_id> sector_writer(sector);
×
1837

1838
            read_sector(msat_sector, sector_writer);
×
1839

NEW
1840
            msat_sector = last_elem(sector);
×
1841
            sector.pop_back();
×
1842

1843
            std::copy(sector.begin(), sector.end(), std::back_inserter(msat_));
×
1844
        }
×
1845
    }
1846
}
11✔
1847

1848
void compound_document::write_sat()
1,988✔
1849
{
1850
    binary_reader<sector_id> sector_reader(sat_);
1,988✔
1851

1852
    for (sector_id sat_sector : msat_)
10,360✔
1853
    {
1854
        write_sector(sector_reader, sat_sector);
8,372✔
1855
    }
1856
}
1,988✔
1857

UNCOV
1858
void compound_document::write_ssat()
×
1859
{
NEW
1860
    binary_reader<sector_id> sector_reader(ssat_);
×
1861

NEW
1862
    for (sector_id ssat_sector : follow_chain(header_.ssat_start, sat_))
×
1863
    {
UNCOV
1864
        write_sector(sector_reader, ssat_sector);
×
UNCOV
1865
    }
×
UNCOV
1866
}
×
1867

1868
void compound_document::write_entry(directory_id id)
4,008✔
1869
{
1870
    const sector_chain directory_chain = follow_chain(header_.directory_start, sat_);
4,008✔
1871
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
4,008✔
1872
    const sector_id directory_sector = directory_chain.at(static_cast<std::size_t>(id / entries_per_sector));
4,008✔
1873
    const std::uint64_t offset = sector_data_start() + sector_size() * directory_sector
4,008✔
1874
        + ((id % entries_per_sector) * COMPOUND_DOCUMENT_ENTRY_SIZE);
4,008✔
1875

1876
    out_->seekp(static_cast<std::streamoff>(offset), std::ios::beg);
4,008✔
1877
    const compound_document_entry &entry = entries_.at(id);
4,008✔
1878
    // Write the fields manually due to struct padding (larger sizeof than 128 bytes).
1879
    out_->write(reinterpret_cast<const char *>(entry.name_array.data()), sizeof(entry.name_array));
4,008✔
1880
    out_->write(reinterpret_cast<const char *>(&entry.name_length), sizeof(entry.name_length));
4,008✔
1881
    out_->write(reinterpret_cast<const char *>(&entry.type), sizeof(entry.type));
4,008✔
1882
    out_->write(reinterpret_cast<const char *>(&entry.color), sizeof(entry.color));
4,008✔
1883
    out_->write(reinterpret_cast<const char *>(&entry.prev), sizeof(entry.prev));
4,008✔
1884
    out_->write(reinterpret_cast<const char *>(&entry.next), sizeof(entry.next));
4,008✔
1885
    out_->write(reinterpret_cast<const char *>(&entry.child), sizeof(entry.child));
4,008✔
1886
    out_->write(reinterpret_cast<const char *>(entry.clsid.data()), sizeof(entry.clsid));
4,008✔
1887
    out_->write(reinterpret_cast<const char *>(&entry.state_bits), sizeof(entry.state_bits));
4,008✔
1888
    out_->write(reinterpret_cast<const char *>(&entry.creation_time), sizeof(entry.creation_time));
4,008✔
1889
    out_->write(reinterpret_cast<const char *>(&entry.modified_time), sizeof(entry.modified_time));
4,008✔
1890
    out_->write(reinterpret_cast<const char *>(&entry.start), sizeof(entry.start));
4,008✔
1891
    out_->write(reinterpret_cast<const char *>(&entry.size), sizeof(entry.size));
4,008✔
1892
}
4,008✔
1893

1894
} // namespace detail
1895
} // namespace xlnt
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc