• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

xlnt-community / xlnt / d844fb84-9fd5-4f21-b704-14232ed162fb

16 Feb 2026 09:30PM UTC coverage: 82.903% (-1.1%) from 83.961%
d844fb84-9fd5-4f21-b704-14232ed162fb

Pull #147

circleci

doomlaur
Major refactoring of compound_document. Unit tests now pass too.
Pull Request #147: Compound document improvements

15384 of 20387 branches covered (75.46%)

268 of 477 new or added lines in 4 files covered. (56.18%)

34 existing lines in 2 files now uncovered.

12525 of 15108 relevant lines covered (82.9%)

12161.34 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.79
./source/detail/cryptography/compound_document.cpp
1
// Copyright (C) 2016-2022 Thomas Fussell
2
// Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
3
// Copyright (c) 2024-2026 xlnt-community
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions
7
// are met:
8
//
9
// 1. Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
// 2. Redistributions in binary form must reproduce the above copyright
12
// notice, this list of conditions and the following disclaimer in the
13
// documentation and/or other materials provided with the distribution.
14
//
15
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25

26
#include <algorithm>
27
#include <array>
28
#include <cstring>
29
#include <iostream>
30
#include <locale>
31
#include <string>
32
#include <vector>
33

34
#include <xlnt/utils/exceptions.hpp>
35
#include <detail/binary.hpp>
36
#include <detail/cryptography/compound_document.hpp>
37
#include <detail/unicode.hpp>
38

39
#define FMT_HEADER_ONLY
40
#include <fmt/format.h>
41

42
// NOTE: compound files are not part of the OOXML specification (ECMA-376).
43
// This implementation is based on the "[MS-CFB]: Compound File Binary File Format" specification.
44
namespace {
45

46
using namespace xlnt::detail;
47

48
template <typename T>
NEW
49
std::string format_hex(T value)
×
50
{
51
    // Format example: 0x0000660F
NEW
52
    return fmt::format("0x{:08X}", value);
×
53
}
54

55
int compare_keys(const std::string &left, const std::string &right)
8✔
56
{
57
    auto to_lower = [](std::string s) {
16✔
58
        if (s.empty())
16!
59
        {
NEW
60
            return s;
×
61
        }
62
        else
63
        {
64
            static const std::locale locale;
16!
65
            std::use_facet<std::ctype<char>>(locale).tolower(&s[0], &s[0] + s.size());
16✔
66

67
            return s;
16✔
68
        }
69
    };
70

71
    return to_lower(left).compare(to_lower(right));
8✔
72
}
73

74
std::vector<std::string> split_path(const std::string &path)
12✔
75
{
76
    std::vector<std::string> split = std::vector<std::string>();
12✔
77
    std::size_t current = path.find('/');
12✔
78
    std::size_t prev = 0;
12✔
79

80
    while (current != std::string::npos)
24✔
81
    {
82
        split.emplace_back(path.substr(prev, current - prev));
12✔
83
        prev = current + 1;
12✔
84
        current = path.find('/', prev);
12✔
85
    }
86

87
    split.emplace_back(path.substr(prev));
12✔
88

89
    return split;
12✔
90
}
×
91

92
std::string join_path(const std::vector<std::string> &path)
244✔
93
{
94
    std::string joined;
244✔
95

96
    for (const std::string &part : path)
328✔
97
    {
98
        joined.append(part);
84✔
99
        joined.push_back('/');
84✔
100
    }
101

102
    return joined;
244✔
103
}
×
104

105
template <typename T>
106
const T & last_elem(const std::vector<T> &vec)
107
{
108
    return vec.at(vec.size() - 1);
109
}
110

111
template <typename T>
112
T & last_elem(std::vector<T> &vec)
986✔
113
{
114
    return vec.at(vec.size() - 1);
986✔
115
}
116

117
} // namespace
118

119
namespace xlnt {
120
namespace detail {
121

NEW
122
bool is_chain_end(sector_id sector)
×
123
{
NEW
124
    expect_valid_sector_or_chain_end(sector);
×
NEW
125
    return sector == ENDOFCHAIN;
×
126
}
127

128
bool is_invalid_sector(sector_id sector)
248,125✔
129
{
130
    expect_valid_sector_or_chain_end_or_free(sector);
248,125✔
131
    return sector == ENDOFCHAIN || sector == FREESECT;
248,125✔
132
}
133

134
bool is_invalid_entry(directory_id entry)
20✔
135
{
136
    expect_valid_entry_or_no_stream(entry);
20✔
137
    return entry == NOSTREAM;
20✔
138
}
139

NEW
140
void expect_valid_sector_or_chain_end(sector_id sector)
×
141
{
NEW
142
    if (sector > MAXREGSECT && sector != ENDOFCHAIN)
×
143
    {
144
        throw xlnt::invalid_parameter("expected valid sector (<= MAXREGSECT, which means <= 0xFFFFFFFA) or ENDOFCHAIN (0xFFFFFFFE)"
NEW
145
            ", but got " + format_hex(sector));
×
146
    }
NEW
147
}
×
148

149
void expect_valid_sector_or_chain_end_or_free(sector_id sector)
248,125✔
150
{
151
    if (sector > MAXREGSECT && sector != ENDOFCHAIN && sector != FREESECT)
248,125!
152
    {
153
        throw xlnt::invalid_parameter("expected valid sector (<= MAXREGSECT, which means <= 0xFFFFFFFA),"
NEW
154
            " or ENDOFCHAIN (0xFFFFFFFE), or FREESECT (0xFFFFFFFF), but got " + format_hex(sector));
×
155
    }
156
}
248,125✔
157

158
void expect_valid_entry_or_no_stream(directory_id entry)
20✔
159
{
160
    if (entry > MAXREGSID && entry != NOSTREAM)
20!
161
    {
162
        throw xlnt::invalid_parameter("expected valid entry (<= MAXREGSID, which means <= 0xFFFFFFFA) or NOSTREAM (0xFFFFFFFF)"
NEW
163
            ", but got " + format_hex(entry));
×
164
    }
165
}
20✔
166

167
/// <summary>
168
/// Allows a std::vector to be read through a std::istream.
169
/// </summary>
170
class compound_document_istreambuf : public std::streambuf
171
{
172
    using int_type = std::streambuf::int_type;
173

174
public:
175
    compound_document_istreambuf(const compound_document_entry &entry, compound_document &document)
64✔
176
        : entry_(entry),
128✔
177
          document_(document),
64✔
178
          sector_writer_(current_sector_)
64✔
179
    {
180
    }
64✔
181

182
    compound_document_istreambuf(const compound_document_istreambuf &) = delete;
183
    compound_document_istreambuf &operator=(const compound_document_istreambuf &) = delete;
184

185
    ~compound_document_istreambuf() override = default;
128✔
186

187
private:
188
    std::streamsize xsgetn(char *c, std::streamsize count) override
779✔
189
    {
190
        std::streamsize bytes_read = 0;
779✔
191

192
        const sector_chain &sec_chain = short_stream() ? document_.ssat_ : document_.sat_;
779✔
193
        const sector_chain chain = document_.follow_chain(entry_.start, sec_chain);
779✔
194
        const std::uint64_t sector_size = short_stream() ? document_.short_sector_size() : document_.sector_size();
779✔
195
        sector_id current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
779✔
196
        std::uint64_t remaining = std::min(entry_.size - position_, static_cast<std::uint64_t>(count));
779✔
197

198
        while (remaining)
3,781✔
199
        {
200
            if (current_sector_.empty() || chain.at(static_cast<std::size_t>(position_ / sector_size)) != current_sector)
3,002✔
201
            {
202
                current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
2,272✔
203
                sector_writer_.reset();
2,272✔
204
                if (short_stream())
2,272✔
205
                {
206
                    document_.read_short_sector(current_sector, sector_writer_);
192✔
207
                }
208
                else
209
                {
210
                    document_.read_sector(current_sector, sector_writer_);
2,080✔
211
                }
212
            }
213

214
            const std::uint64_t available = std::min(entry_.size - position_, sector_size - position_ % sector_size);
3,002✔
215
            const std::uint64_t to_read = std::min(available, remaining);
3,002✔
216

217
            auto start = current_sector_.begin() + static_cast<std::ptrdiff_t>(position_ % sector_size);
3,002✔
218
            auto end = start + static_cast<std::ptrdiff_t>(to_read);
3,002✔
219

220
            for (auto i = start; i < end; ++i)
1,074,752✔
221
            {
222
                *(c++) = static_cast<char>(*i);
1,071,750✔
223
            }
224

225
            remaining -= to_read;
3,002✔
226
            position_ += to_read;
3,002✔
227
            bytes_read += to_read;
3,002✔
228
        }
229

230
        if (position_ < entry_.size && chain.at(static_cast<std::size_t>(position_ / sector_size)) != current_sector)
779✔
231
        {
232
            current_sector = chain.at(static_cast<std::size_t>(position_ / sector_size));
26✔
233
            sector_writer_.reset();
26✔
234
            if (short_stream())
26!
235
            {
236
                document_.read_short_sector(current_sector, sector_writer_);
26✔
237
            }
238
            else
239
            {
240
                document_.read_sector(current_sector, sector_writer_);
×
241
            }
242
        }
243

244
        return bytes_read;
779✔
245
    }
779✔
246

247
    bool short_stream()
3,856✔
248
    {
249
        return entry_.size < document_.header_.threshold;
3,856✔
250
    }
251

252
    int_type underflow() override
×
253
    {
254
        if (position_ >= entry_.size)
×
255
        {
256
            return traits_type::eof();
×
257
        }
258

NEW
259
        std::uint64_t old_position = position_;
×
NEW
260
        char result = '\0';
×
261
        xsgetn(&result, 1);
×
262
        position_ = old_position;
×
263

264
        return result;
×
265
    }
266

267
    int_type uflow() override
×
268
    {
NEW
269
        int_type result = underflow();
×
270
        ++position_;
×
271

272
        return result;
×
273
    }
274

275
    std::streamsize showmanyc() override
×
276
    {
277
        if (position_ == entry_.size)
×
278
        {
279
            return static_cast<std::streamsize>(-1);
×
280
        }
281

282
        return static_cast<std::streamsize>(entry_.size - position_);
×
283
    }
284

285
    std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
52✔
286
    {
287
        if (way == std::ios_base::beg)
52!
288
        {
289
            position_ = 0;
×
290
        }
291
        else if (way == std::ios_base::end)
52!
292
        {
293
            position_ = entry_.size;
×
294
        }
295

296
        if (off < 0)
52!
297
        {
NEW
298
            if (static_cast<std::uint64_t>(-off) > position_)
×
299
            {
300
                position_ = 0;
×
NEW
301
                return static_cast<std::streamoff>(-1);
×
302
            }
303
            else
304
            {
NEW
305
                position_ -= static_cast<std::uint64_t>(-off);
×
306
            }
307
        }
308
        else if (off > 0)
52!
309
        {
NEW
310
            if (static_cast<std::uint64_t>(off) + position_ > entry_.size)
×
311
            {
312
                position_ = entry_.size;
×
NEW
313
                return static_cast<std::streamoff>(-1);
×
314
            }
315
            else
316
            {
NEW
317
                position_ += static_cast<std::uint64_t>(off);
×
318
            }
319
        }
320

321
        return static_cast<std::streamoff>(position_);
52✔
322
    }
323

324
    std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
×
325
    {
326
        if (sp < 0)
×
327
        {
328
            position_ = 0;
×
329
        }
NEW
330
        else if (static_cast<std::uint64_t>(sp) > entry_.size)
×
331
        {
332
            position_ = entry_.size;
×
333
        }
334
        else
335
        {
NEW
336
            position_ = static_cast<std::uint64_t>(sp);
×
337
        }
338

NEW
339
        return static_cast<std::streamoff>(position_);
×
340
    }
341

342
private:
343
    const compound_document_entry &entry_;
344
    compound_document &document_;
345
    std::vector<byte> current_sector_;
346
    binary_writer<byte> sector_writer_;
347
    std::uint64_t position_ = 0;
348
};
349

350
/// <summary>
351
/// Allows a std::vector to be written through a std::ostream.
352
/// </summary>
353
class compound_document_ostreambuf : public std::streambuf
354
{
355
    using int_type = std::streambuf::int_type;
356

357
public:
358
    compound_document_ostreambuf(compound_document_entry &entry, compound_document &document)
8✔
359
        : entry_(entry),
16✔
360
          document_(document),
8✔
361
          current_sector_(document.header_.threshold),
8✔
362
          sector_reader_(current_sector_)
16✔
363
    {
364
        setp(reinterpret_cast<char *>(current_sector_.data()),
8✔
365
            reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
8✔
366
    }
8✔
367

368
    compound_document_ostreambuf(const compound_document_ostreambuf &) = delete;
369
    compound_document_ostreambuf &operator=(const compound_document_ostreambuf &) = delete;
370

371
    ~compound_document_ostreambuf() override;
372

373
private:
374
    int sync() override
986✔
375
    {
376
        auto written = static_cast<std::uint64_t>(pptr() - pbase());
986✔
377

378
        if (written == 0)
986✔
379
        {
380
            return 0;
4✔
381
        }
382

383
        sector_reader_.reset();
982✔
384

385
        if (short_stream())
982✔
386
        {
387
            if (position_ + written >= document_.header_.threshold)
8✔
388
            {
389
                convert_to_long_stream();
4✔
390
            }
391
            else
392
            {
393
                if (is_invalid_sector(entry_.start))
4!
394
                {
395
                    std::size_t num_sectors = static_cast<std::size_t>(
NEW
396
                        (position_ + written + document_.short_sector_size() - 1) / document_.short_sector_size());
×
UNCOV
397
                    chain_ = document_.allocate_short_sectors(num_sectors);
×
NEW
398
                    entry_.start = chain_.at(0);
×
399
                }
400

401
                for (sector_id link : chain_)
4!
402
                {
UNCOV
403
                    document_.write_short_sector(sector_reader_, link);
×
UNCOV
404
                    sector_reader_.offset(sector_reader_.offset() + document_.short_sector_size());
×
405
                }
406
            }
407
        }
408
        else
409
        {
410
            const std::size_t sector_index = static_cast<std::size_t>(position_ / document_.sector_size());
974✔
411
            document_.write_sector(sector_reader_, chain_.at(sector_index));
974✔
412
        }
413

414
        position_ += written;
982✔
415
        entry_.size = std::max(entry_.size, position_);
982✔
416
        document_.write_directory();
982✔
417

418
        std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
982✔
419
        setp(reinterpret_cast<char *>(current_sector_.data()),
982✔
420
            reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
982✔
421

422
        return 0;
982✔
423
    }
424

425
    bool short_stream()
1,956✔
426
    {
427
        return entry_.size < document_.header_.threshold;
1,956✔
428
    }
429

430
    int_type overflow(int_type c = traits_type::eof()) override
974✔
431
    {
432
        sync();
974✔
433

434
        if (short_stream())
974!
435
        {
NEW
436
            sector_id next_sector = document_.allocate_short_sector();
×
NEW
437
            document_.ssat_.at(last_elem(chain_)) = next_sector;
×
438
            chain_.push_back(next_sector);
×
439
            document_.write_ssat();
×
440
        }
441
        else
442
        {
443
            sector_id next_sector = document_.allocate_sector();
974✔
444
            document_.sat_.at(last_elem(chain_)) = next_sector;
974✔
445
            chain_.push_back(next_sector);
974✔
446
            document_.write_sat();
974✔
447
        }
448

449
        auto value = static_cast<std::uint8_t>(c);
974✔
450

451
        if (c != traits_type::eof())
974!
452
        {
453
            std::size_t sector_index = static_cast<std::size_t>(position_ % current_sector_.size());
974✔
454
            current_sector_.at(sector_index) = value;
974✔
455
        }
456

457
        pbump(1);
974✔
458

459
        return traits_type::to_int_type(static_cast<char>(value));
974✔
460
    }
461

462
    void convert_to_long_stream()
4✔
463
    {
464
        sector_reader_.reset();
4✔
465

466
        std::size_t num_sectors = static_cast<std::size_t>(current_sector_.size() / document_.sector_size());
4✔
467
        sector_chain new_chain = document_.allocate_sectors(num_sectors);
4✔
468

469
        for (sector_id link : new_chain)
36✔
470
        {
471
            document_.write_sector(sector_reader_, link);
32✔
472
            sector_reader_.offset(sector_reader_.offset() + document_.short_sector_size());
32✔
473
        }
474

475
        current_sector_.resize(document_.sector_size(), 0);
4✔
476
        std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
4✔
477

478
        if (is_invalid_sector(entry_.start))
4!
479
        {
480
            // TODO: deallocate short sectors here
UNCOV
481
            if (document_.header_.num_short_sectors == 0)
×
482
            {
NEW
483
                document_.entries_.at(0).start = ENDOFCHAIN;
×
484
            }
485
        }
486

487
        chain_ = new_chain;
4✔
488
        entry_.start = chain_.at(0);
4✔
489
        document_.write_directory();
4✔
490
    }
4✔
491

492
    std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
×
493
    {
494
        if (way == std::ios_base::beg)
×
495
        {
496
            position_ = 0;
×
497
        }
498
        else if (way == std::ios_base::end)
×
499
        {
500
            position_ = entry_.size;
×
501
        }
502

503
        if (off < 0)
×
504
        {
NEW
505
            if (static_cast<std::uint64_t>(-off) > position_)
×
506
            {
507
                position_ = 0;
×
NEW
508
                return static_cast<std::streamoff>(-1);
×
509
            }
510
            else
511
            {
NEW
512
                position_ -= static_cast<std::uint64_t>(-off);
×
513
            }
514
        }
515
        else if (off > 0)
×
516
        {
NEW
517
            if (static_cast<std::uint64_t>(off) + position_ > entry_.size)
×
518
            {
519
                position_ = entry_.size;
×
NEW
520
                return static_cast<std::streamoff>(-1);
×
521
            }
522
            else
523
            {
NEW
524
                position_ += static_cast<std::uint64_t>(off);
×
525
            }
526
        }
527

NEW
528
        return static_cast<std::streamoff>(position_);
×
529
    }
530

531
    std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
×
532
    {
533
        if (sp < 0)
×
534
        {
535
            position_ = 0;
×
536
        }
NEW
537
        else if (static_cast<std::uint64_t>(sp) > entry_.size)
×
538
        {
539
            position_ = entry_.size;
×
540
        }
541
        else
542
        {
NEW
543
            position_ = static_cast<std::uint64_t>(sp);
×
544
        }
545

NEW
546
        return static_cast<std::streamoff>(position_);
×
547
    }
548

549
private:
550
    compound_document_entry &entry_;
551
    compound_document &document_;
552
    std::vector<byte> current_sector_;
553
    binary_reader<byte> sector_reader_;
554
    std::uint64_t position_ = 0;
555
    sector_chain chain_;
556
};
557

558
compound_document_ostreambuf::~compound_document_ostreambuf()
16✔
559
{
560
    sync();
8✔
561
}
16✔
562

563
compound_document::compound_document(std::ostream &out)
4✔
564
    : out_(&out),
4✔
565
      stream_in_(nullptr),
4✔
566
      stream_out_(nullptr)
8✔
567
{
568
    write_header();
4✔
569
    insert_entry("/Root Entry", compound_document_entry::entry_type::RootStorage);
4✔
570
}
4✔
571

572
compound_document::compound_document(std::istream &in)
32✔
573
    : in_(&in),
32✔
574
      stream_in_(nullptr),
32✔
575
      stream_out_(nullptr)
64✔
576
{
577
    read_header();
32✔
578
    read_msat();
32✔
579
    read_sat();
32✔
580
    read_ssat();
32✔
581
    read_directory();
32✔
582
}
32✔
583

584
compound_document::~compound_document()
36✔
585
{
586
    close();
36✔
587
}
36✔
588

589
void compound_document::close()
36✔
590
{
591
    stream_out_buffer_.reset(nullptr);
36✔
592
}
36✔
593

594
std::uint64_t compound_document::sector_size()
40,768✔
595
{
596
    return static_cast<std::uint64_t>(1) << header_.sector_size_power;
40,768✔
597
}
598

599
std::uint64_t compound_document::short_sector_size()
960✔
600
{
601
    return static_cast<std::uint64_t>(1) << header_.short_sector_size_power;
960✔
602
}
603

604
std::istream &compound_document::open_read_stream(const std::string &name)
64✔
605
{
606
    if (!contains_entry(name, compound_document_entry::entry_type::UserStream))
64!
607
    {
NEW
608
        throw xlnt::invalid_file("compound document entry of type UserStream not found at path: " + name);
×
609
    }
610

611
    const directory_id entry_id = find_entry(name, compound_document_entry::entry_type::UserStream);
64✔
612
    const compound_document_entry &entry = entries_.at(entry_id);
64✔
613

614
    stream_in_buffer_.reset(new compound_document_istreambuf(entry, *this));
64!
615
    stream_in_.rdbuf(stream_in_buffer_.get());
64✔
616

617
    return stream_in_;
64✔
618
}
619

620
std::ostream &compound_document::open_write_stream(const std::string &name)
8✔
621
{
622
    directory_id entry_id = contains_entry(name, compound_document_entry::entry_type::UserStream)
8✔
623
        ? find_entry(name, compound_document_entry::entry_type::UserStream)
8!
624
        : insert_entry(name, compound_document_entry::entry_type::UserStream);
8✔
625
    compound_document_entry &entry = entries_.at(entry_id);
8✔
626

627
    stream_out_buffer_.reset(new compound_document_ostreambuf(entry, *this));
8!
628
    stream_out_.rdbuf(stream_out_buffer_.get());
8✔
629

630
    return stream_out_;
8✔
631
}
632

633
template <typename T>
634
void compound_document::write_sector(binary_reader<T> &reader, sector_id id)
10,399✔
635
{
636
    out_->seekp(static_cast<std::streampos>(sector_data_start() + sector_size() * id));
10,399✔
637
    out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
10,399✔
638
        static_cast<std::streamsize>(std::min(sector_size(), static_cast<std::uint64_t>(reader.bytes() - reader.offset()))));
10,399✔
639
}
10,399✔
640

641
template <typename T>
UNCOV
642
void compound_document::write_short_sector(binary_reader<T> &reader, sector_id id)
×
643
{
NEW
644
    sector_chain chain = follow_chain(entries_.at(0).start, sat_);
×
NEW
645
    sector_id sector_id = chain.at(static_cast<std::size_t>(id / (sector_size() / short_sector_size())));
×
NEW
646
    std::uint64_t sector_offset = id % (sector_size() / short_sector_size()) * short_sector_size();
×
NEW
647
    out_->seekp(static_cast<std::streampos>(sector_data_start() + sector_size() * sector_id + sector_offset));
×
UNCOV
648
    out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
×
NEW
649
        static_cast<std::streamsize>(std::min(short_sector_size(), static_cast<std::uint64_t>(reader.bytes() - reader.offset()))));
×
UNCOV
650
}
×
651

652
template <typename T>
653
void compound_document::read_sector(sector_id id, binary_writer<T> &writer)
2,743✔
654
{
655
    in_->seekg(static_cast<std::streampos>(sector_data_start() + sector_size() * id));
2,743✔
656
    std::vector<byte> sector(sector_size(), 0);
2,743✔
657
    in_->read(reinterpret_cast<char *>(sector.data()), static_cast<std::streamsize>(sector_size()));
2,743✔
658
    writer.append(sector);
2,743✔
659
}
2,743✔
660

661
template <typename T>
662
void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer)
663
{
664
    for (sector_id link : follow_chain(start, sat_))
665
    {
666
        read_sector(link, writer);
667
    }
668
}
669

670
template <typename T>
671
void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
672
{
673
    sector_chain chain = follow_chain(start, sat_);
674

675
    for (std::size_t i = 0; i < count; ++i)
676
    {
677
        read_sector(chain.at(offset + i), writer);
678
    }
679
}
680

681
template <typename T>
682
void compound_document::read_short_sector(sector_id id, binary_writer<T> &writer)
218✔
683
{
684
    const sector_chain container_chain = follow_chain(entries_.at(0).start, sat_);
218✔
685
    std::vector<byte> container;
218✔
686
    binary_writer<byte> container_writer(container);
218✔
687

688
    for (sector_id sector : container_chain)
790✔
689
    {
690
        read_sector(sector, container_writer);
572✔
691
    }
692

693
    binary_reader<byte> container_reader(container);
218✔
694
    container_reader.offset(static_cast<std::size_t>(id * short_sector_size()));
218✔
695

696
    writer.append(container_reader, short_sector_size());
218✔
697
}
218✔
698

699
template <typename T>
700
void compound_document::read_short_sector_chain(sector_id start, binary_writer<T> &writer)
701
{
702
    for (sector_id link : follow_chain(start, ssat_))
703
    {
704
        read_short_sector(link, writer);
705
    }
706
}
707

708
template <typename T>
709
void compound_document::read_short_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
710
{
711
    sector_chain chain = follow_chain(start, ssat_);
712

713
    for (std::size_t i = 0; i < count; ++i)
714
    {
715
        read_short_sector(chain.at(offset + i), writer);
716
    }
717
}
718

719
sector_id compound_document::allocate_sector()
1,010✔
720
{
721
    const auto sectors_per_sector = static_cast<std::size_t>(sector_size() / sizeof(sector_id));
1,010✔
722
    auto next_free_iter = std::find(sat_.begin(), sat_.end(), FREESECT);
1,010✔
723

724
    if (next_free_iter == sat_.end())
1,010✔
725
    {
726
        std::uint32_t next_msat_index = header_.num_msat_sectors;
11✔
727
        auto new_sat_sector_id = static_cast<sector_id>(sat_.size());
11✔
728

729
        msat_.push_back(new_sat_sector_id);
11✔
730
        write_msat();
11✔
731

732
        header_.msat.at(msat_.size() - 1) = new_sat_sector_id;
11✔
733
        ++header_.num_msat_sectors;
11✔
734
        write_header();
11✔
735

736
        sat_.resize(sat_.size() + sectors_per_sector, FREESECT);
11✔
737
        sat_.at(new_sat_sector_id) = FATSECT;
11✔
738

739
        binary_reader<sector_id> sat_reader(sat_);
11✔
740
        sat_reader.offset(next_msat_index * sectors_per_sector);
11✔
741
        write_sector(sat_reader, new_sat_sector_id);
11✔
742

743
        next_free_iter = std::find(sat_.begin(), sat_.end(), FREESECT);
11✔
744
    }
745

746
    auto next_free = static_cast<sector_id>(next_free_iter - sat_.begin());
1,010✔
747
    sat_.at(next_free) = ENDOFCHAIN;
1,010✔
748

749
    write_sat();
1,010✔
750

751
    std::vector<byte> empty_sector(sector_size());
1,010✔
752
    binary_reader<byte> empty_sector_reader(empty_sector);
1,010✔
753
    write_sector(empty_sector_reader, next_free);
1,010✔
754

755
    return next_free;
1,010✔
756
}
1,010✔
757

758
sector_chain compound_document::allocate_sectors(std::size_t count)
4✔
759
{
760
    if (count == 0) return {};
4!
761

762
    sector_chain chain;
4✔
763
    chain.reserve(count);
4✔
764
    sector_id current = allocate_sector();
4✔
765

766
    for (std::size_t i = 1; i < count; ++i)
32✔
767
    {
768
        chain.push_back(current);
28✔
769
        sector_id next = allocate_sector();
28✔
770
        sat_.at(current) = next;
28✔
771
        current = next;
28✔
772
    }
773

774
    chain.push_back(current);
4✔
775
    write_sat();
4✔
776

777
    return chain;
4✔
778
}
4✔
779

780
sector_chain compound_document::follow_chain(sector_id start, const sector_chain &table)
5,269✔
781
{
782
    sector_chain chain;
5,269✔
783
    sector_id current = start;
5,269✔
784

785
    while (!is_invalid_sector(current))
247,701✔
786
    {
787
        chain.push_back(current);
242,432✔
788
        current = table.at(current);
242,432✔
789
    }
790

791
    return chain;
5,269✔
792
}
×
793

UNCOV
794
sector_chain compound_document::allocate_short_sectors(std::size_t count)
×
795
{
NEW
796
    if (count == 0) return {};
×
797

NEW
798
    sector_chain chain;
×
NEW
799
    chain.reserve(count);
×
NEW
800
    sector_id current = allocate_short_sector();
×
801

NEW
802
    for (std::size_t i = 1; i < count; ++i)
×
803
    {
UNCOV
804
        chain.push_back(current);
×
NEW
805
        sector_id next = allocate_short_sector();
×
NEW
806
        ssat_.at(current) = next;
×
UNCOV
807
        current = next;
×
808
    }
809

UNCOV
810
    chain.push_back(current);
×
UNCOV
811
    write_ssat();
×
812

UNCOV
813
    return chain;
×
UNCOV
814
}
×
815

UNCOV
816
sector_id compound_document::allocate_short_sector()
×
817
{
NEW
818
    const auto sectors_per_sector = static_cast<std::size_t>(sector_size() / sizeof(sector_id));
×
NEW
819
    auto next_free_iter = std::find(ssat_.begin(), ssat_.end(), FREESECT);
×
820

UNCOV
821
    if (next_free_iter == ssat_.end())
×
822
    {
NEW
823
        sector_id new_ssat_sector_id = allocate_sector();
×
824

NEW
825
        if (is_invalid_sector(header_.ssat_start))
×
826
        {
UNCOV
827
            header_.ssat_start = new_ssat_sector_id;
×
828
        }
829
        else
830
        {
NEW
831
            sector_chain ssat_chain = follow_chain(header_.ssat_start, sat_);
×
NEW
832
            sat_.at(last_elem(ssat_chain)) = new_ssat_sector_id;
×
833
            write_sat();
×
834
        }
×
835

UNCOV
836
        write_header();
×
837

NEW
838
        std::size_t old_size = ssat_.size();
×
NEW
839
        ssat_.resize(old_size + sectors_per_sector, FREESECT);
×
840

NEW
841
        binary_reader<sector_id> ssat_reader(ssat_);
×
UNCOV
842
        ssat_reader.offset(old_size / sectors_per_sector);
×
UNCOV
843
        write_sector(ssat_reader, new_ssat_sector_id);
×
844

NEW
845
        next_free_iter = std::find(ssat_.begin(), ssat_.end(), FREESECT);
×
846
    }
847

UNCOV
848
    ++header_.num_short_sectors;
×
UNCOV
849
    write_header();
×
850

NEW
851
    auto next_free = static_cast<sector_id>(next_free_iter - ssat_.begin());
×
NEW
852
    ssat_.at(next_free) = ENDOFCHAIN;
×
853

UNCOV
854
    write_ssat();
×
855

NEW
856
    const std::uint64_t short_sectors_per_sector = sector_size() / short_sector_size();
×
NEW
857
    const std::uint64_t required_container_sectors = next_free / short_sectors_per_sector + 1;
×
858

UNCOV
859
    if (required_container_sectors > 0)
×
860
    {
NEW
861
        if (is_invalid_sector(entries_.at(0).start))
×
862
        {
NEW
863
            entries_.at(0).start = allocate_sector();
×
UNCOV
864
            write_entry(0);
×
865
        }
866

NEW
867
        sector_chain container_chain = follow_chain(entries_.at(0).start, sat_);
×
868

UNCOV
869
        if (required_container_sectors > container_chain.size())
×
870
        {
NEW
871
            sat_.at(last_elem(container_chain)) = allocate_sector();
×
UNCOV
872
            write_sat();
×
873
        }
UNCOV
874
    }
×
875

UNCOV
876
    return next_free;
×
877
}
878

879
directory_id compound_document::next_empty_entry()
12✔
880
{
881
    directory_id entry_id = 0;
12✔
882

883
    for (; entry_id < entries_.size(); ++entry_id)
24✔
884
    {
885
        if (entries_.at(entry_id).type == compound_document_entry::entry_type::Empty)
20✔
886
        {
887
            return entry_id;
8✔
888
        }
889
    }
890

891
    // entry_id is now equal to entries_.size()
892

893
    if (is_invalid_sector(header_.directory_start))
4!
894
    {
895
        header_.directory_start = allocate_sector();
4✔
896
    }
897
    else
898
    {
NEW
899
        sector_chain directory_chain = follow_chain(header_.directory_start, sat_);
×
NEW
900
        sat_.at(last_elem(directory_chain)) = allocate_sector();
×
901
        write_sat();
×
902
    }
×
903

904
    const auto entries_per_sector = static_cast<std::size_t>(sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE);
4✔
905

906
    entries_.reserve(entries_.size() + entries_per_sector);
4✔
907
    for (std::size_t i = 0; i < entries_per_sector; ++i)
20✔
908
    {
909
        entries_.emplace_back();
16✔
910
        write_entry(entry_id + static_cast<directory_id>(i));
16✔
911
    }
912

913
    return entry_id;
4✔
914
}
915

916
directory_id compound_document::insert_entry(
12✔
917
    const std::string &name,
918
    compound_document_entry::entry_type type)
919
{
920
    directory_id entry_id = next_empty_entry();
12✔
921
    compound_document_entry &entry = entries_.at(entry_id);
12✔
922

923
    directory_id parent_id = 0;
12✔
924
    std::vector<std::string> split = split_path(name);
12✔
925
    std::string filename = last_elem(split);
12✔
926
    split.pop_back();
12✔
927

928
    if (split.size() > 1)
12!
929
    {
NEW
930
        std::string joined_path = join_path(split);
×
931
        parent_id = find_entry(joined_path, compound_document_entry::entry_type::UserStorage);
×
932

NEW
933
        if (is_invalid_sector(parent_id))
×
934
        {
NEW
935
            throw xlnt::key_not_found("parent compound document entry of type UserStorage not found at path \"" + joined_path + "\", "
×
NEW
936
                "necessary to insert entry \"" + name + "\" of type " + std::to_string(static_cast<int>(type)));
×
937
        }
938

939
        parent_storage_[entry_id] = parent_id;
×
940
    }
×
941

942
    entry.name(filename);
12✔
943
    entry.type = type;
12✔
944

945
    tree_insert(entry_id, parent_id);
12✔
946
    write_directory();
12✔
947

948
    return entry_id;
12✔
949
}
12✔
950

951
std::uint64_t compound_document::sector_data_start()
17,350✔
952
{
953
    return sizeof(compound_document_header);
17,350✔
954
}
955

956
bool compound_document::contains_entry(const std::string &path,
72✔
957
    compound_document_entry::entry_type type)
958
{
959
    return !is_invalid_sector(find_entry(path, type));
72✔
960
}
961

962
directory_id compound_document::find_entry(const std::string &name,
136✔
963
    compound_document_entry::entry_type type)
964
{
965
    if (type == compound_document_entry::entry_type::RootStorage
136✔
966
        && (name == "/" || name == "/Root Entry")) return 0;
136!
967

968
    directory_id entry_id = 0;
136✔
969

970
    for (const compound_document_entry &entry : entries_)
456✔
971
    {
972
        if (entry.type == type && tree_path(entry_id) == name)
448!
973
        {
974
            return entry_id;
128✔
975
        }
976

977
        ++entry_id;
320✔
978
    }
979

980
    return NOSTREAM;
8✔
981
}
982

983
void compound_document::print_directory()
×
984
{
NEW
985
    directory_id entry_id = 0;
×
986

NEW
987
    for (const compound_document_entry &entry : entries_)
×
988
    {
989
        if (entry.type == compound_document_entry::entry_type::UserStream)
×
990
        {
991
            std::cout << tree_path(entry_id) << std::endl;
×
992
        }
993

994
        ++entry_id;
×
995
    }
996
}
×
997

998
void compound_document::write_directory()
998✔
999
{
1000
    for (std::size_t entry_id = 0; entry_id < entries_.size(); ++entry_id)
4,990✔
1001
    {
1002
        write_entry(static_cast<directory_id>(entry_id));
3,992✔
1003
    }
1004
}
998✔
1005

1006
void compound_document::read_directory()
32✔
1007
{
1008
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
32✔
1009
    const std::size_t num_entries = static_cast<std::size_t>(
1010
        follow_chain(header_.directory_start, sat_).size() * entries_per_sector);
32✔
1011

1012
    entries_.reserve(entries_.size() + num_entries);
32✔
1013
    for (std::size_t entry_id = 0; entry_id < num_entries; ++entry_id)
232✔
1014
    {
1015
        entries_.emplace_back();
200✔
1016
        read_entry(static_cast<directory_id>(entry_id));
200✔
1017
    }
1018

1019
    std::vector<directory_id> stack;
32✔
1020
    std::vector<directory_id> storage_siblings;
32✔
1021
    std::vector<directory_id> stream_siblings;
32✔
1022

1023
    std::vector<directory_id> directory_stack;
32✔
1024
    directory_stack.push_back(0u);
32✔
1025

1026
    while (!directory_stack.empty())
100✔
1027
    {
1028
        directory_id current_storage_id = directory_stack.back();
68✔
1029
        directory_stack.pop_back();
68✔
1030

1031
        if (is_invalid_sector(tree_child(current_storage_id))) continue;
68!
1032

1033
        std::vector<directory_id> storage_stack;
68✔
1034
        directory_id storage_root_id = tree_child(current_storage_id);
68✔
1035
        parent_[storage_root_id] = NOSTREAM;
68✔
1036
        storage_stack.push_back(storage_root_id);
68✔
1037

1038
        while (!storage_stack.empty())
204✔
1039
        {
1040
            directory_id current_entry_id = storage_stack.back();
136✔
1041
            const compound_document_entry &current_entry = entries_.at(current_entry_id);
136✔
1042
            storage_stack.pop_back();
136✔
1043

1044
            parent_storage_[current_entry_id] = current_storage_id;
136✔
1045

1046
            if (current_entry.type == compound_document_entry::entry_type::UserStorage)
136✔
1047
            {
1048
                directory_stack.push_back(current_entry_id);
36✔
1049
            }
1050

1051
            if (!is_invalid_sector(tree_left(current_entry_id)))
136✔
1052
            {
1053
                storage_stack.push_back(tree_left(current_entry_id));
18✔
1054
                tree_parent(tree_left(current_entry_id)) = current_entry_id;
18✔
1055
            }
1056

1057
            if (!is_invalid_sector(tree_right(current_entry_id)))
136✔
1058
            {
1059
                storage_stack.push_back(tree_right(current_entry_id));
50✔
1060
                tree_parent(tree_right(current_entry_id)) = current_entry_id;
50✔
1061
            }
1062
        }
1063
    }
68✔
1064
}
32✔
1065

1066
void compound_document::tree_insert(directory_id new_id, directory_id storage_id)
12✔
1067
{
1068
    using entry_color = compound_document_entry::entry_color;
1069

1070
    parent_storage_[new_id] = storage_id;
12✔
1071

1072
    tree_left(new_id) = NOSTREAM;
12✔
1073
    tree_right(new_id) = NOSTREAM;
12✔
1074

1075
    if (is_invalid_entry(tree_root(new_id)))
12✔
1076
    {
1077
        if (new_id != 0)
8✔
1078
        {
1079
            tree_root(new_id) = new_id;
4✔
1080
        }
1081

1082
        tree_color(new_id) = entry_color::Black;
8✔
1083
        tree_parent(new_id) = NOSTREAM;
8✔
1084

1085
        return;
8✔
1086
    }
1087

1088
    // normal tree insert
1089
    // (will probably unbalance the tree, fix after)
1090
    directory_id x = tree_root(new_id);
4✔
1091
    directory_id y = NOSTREAM;
4✔
1092

1093
    while (!is_invalid_entry(x))
8✔
1094
    {
1095
        y = x;
4✔
1096

1097
        if (compare_keys(tree_key(new_id), tree_key(x)) > 0)
4!
1098
        {
1099
            x = tree_right(x);
×
1100
        }
1101
        else
1102
        {
1103
            x = tree_left(x);
4✔
1104
        }
1105
    }
1106

1107
    tree_parent(new_id) = y;
4✔
1108

1109
    if (compare_keys(tree_key(new_id), tree_key(y)) > 0)
4!
1110
    {
1111
        tree_right(y) = new_id;
×
1112
    }
1113
    else
1114
    {
1115
        tree_left(y) = new_id;
4✔
1116
    }
1117

1118
    tree_insert_fixup(new_id);
4✔
1119
}
1120

1121
std::string compound_document::tree_path(directory_id id)
244✔
1122
{
1123
    directory_id storage_id = parent_storage_.at(id);
244✔
1124
    std::vector<std::string> result;
244✔
1125

1126
    while (storage_id > 0)
328✔
1127
    {
1128
        storage_id = parent_storage_.at(storage_id);
84✔
1129
        result.emplace_back(entries_.at(storage_id).name());
84✔
1130
    }
1131

1132
    return "/" + join_path(result) + entries_.at(id).name();
488✔
1133
}
244✔
1134

1135
void compound_document::tree_rotate_left(directory_id x)
×
1136
{
NEW
1137
    directory_id y = tree_right(x);
×
1138

1139
    // turn y's left subtree into x's right subtree
1140
    tree_right(x) = tree_left(y);
×
1141

NEW
1142
    if (!is_invalid_entry(tree_left(y)))
×
1143
    {
1144
        tree_parent(tree_left(y)) = x;
×
1145
    }
1146

1147
    // link x's parent to y
1148
    tree_parent(y) = tree_parent(x);
×
1149

NEW
1150
    if (is_invalid_entry(tree_parent(x)))
×
1151
    {
1152
        tree_root(x) = y;
×
1153
    }
1154
    else if (x == tree_left(tree_parent(x)))
×
1155
    {
1156
        tree_left(tree_parent(x)) = y;
×
1157
    }
1158
    else
1159
    {
1160
        tree_right(tree_parent(x)) = y;
×
1161
    }
1162

1163
    // put x on y's left
1164
    tree_left(y) = x;
×
1165
    tree_parent(x) = y;
×
1166
}
×
1167

1168
void compound_document::tree_rotate_right(directory_id y)
×
1169
{
NEW
1170
    directory_id x = tree_left(y);
×
1171

1172
    // turn x's right subtree into y's left subtree
1173
    tree_left(y) = tree_right(x);
×
1174

NEW
1175
    if (!is_invalid_entry(tree_right(x)))
×
1176
    {
1177
        tree_parent(tree_right(x)) = y;
×
1178
    }
1179

1180
    // link y's parent to x
1181
    tree_parent(x) = tree_parent(y);
×
1182

NEW
1183
    if (is_invalid_entry(tree_parent(y)))
×
1184
    {
1185
        tree_root(y) = x;
×
1186
    }
1187
    else if (y == tree_left(tree_parent(y)))
×
1188
    {
1189
        tree_left(tree_parent(y)) = x;
×
1190
    }
1191
    else
1192
    {
1193
        tree_right(tree_parent(y)) = x;
×
1194
    }
1195

1196
    // put y on x's right
1197
    tree_right(x) = y;
×
1198
    tree_parent(y) = x;
×
1199
}
×
1200

1201
void compound_document::tree_insert_fixup(directory_id x)
4✔
1202
{
1203
    using entry_color = compound_document_entry::entry_color;
1204

1205
    tree_color(x) = entry_color::Red;
4✔
1206

1207
    while (x != tree_root(x) && tree_color(tree_parent(x)) == entry_color::Red)
4!
1208
    {
1209
        if (tree_parent(x) == tree_left(tree_parent(tree_parent(x))))
×
1210
        {
NEW
1211
            directory_id y = tree_right(tree_parent(tree_parent(x)));
×
1212

NEW
1213
            if (!is_invalid_sector(y) && tree_color(y) == entry_color::Red)
×
1214
            {
1215
                // case 1
1216
                tree_color(tree_parent(x)) = entry_color::Black;
×
1217
                tree_color(y) = entry_color::Black;
×
1218
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1219
                x = tree_parent(tree_parent(x));
×
1220
            }
1221
            else
1222
            {
1223
                if (x == tree_right(tree_parent(x)))
×
1224
                {
1225
                    // case 2
1226
                    x = tree_parent(x);
×
1227
                    tree_rotate_left(x);
×
1228
                }
1229

1230
                // case 3
1231
                tree_color(tree_parent(x)) = entry_color::Black;
×
1232
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1233
                tree_rotate_right(tree_parent(tree_parent(x)));
×
1234
            }
1235
        }
1236
        else // same as above with left and right switched
1237
        {
NEW
1238
            directory_id y = tree_left(tree_parent(tree_parent(x)));
×
1239

NEW
1240
            if (!is_invalid_sector(y) && tree_color(y) == entry_color::Red)
×
1241
            {
1242
                //case 1
1243
                tree_color(tree_parent(x)) = entry_color::Black;
×
1244
                tree_color(y) = entry_color::Black;
×
1245
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1246
                x = tree_parent(tree_parent(x));
×
1247
            }
1248
            else
1249
            {
1250
                if (x == tree_left(tree_parent(x)))
×
1251
                {
1252
                    // case 2
1253
                    x = tree_parent(x);
×
1254
                    tree_rotate_right(x);
×
1255
                }
1256

1257
                // case 3
1258
                tree_color(tree_parent(x)) = entry_color::Black;
×
1259
                tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
×
1260
                tree_rotate_left(tree_parent(tree_parent(x)));
×
1261
            }
1262
        }
1263
    }
1264

1265
    tree_color(tree_root(x)) = entry_color::Black;
4✔
1266
}
4✔
1267

1268
directory_id &compound_document::tree_left(directory_id id)
192✔
1269
{
1270
    return entries_.at(id).prev;
192✔
1271
}
1272

1273
directory_id &compound_document::tree_right(directory_id id)
248✔
1274
{
1275
    return entries_.at(id).next;
248✔
1276
}
1277

1278
directory_id &compound_document::tree_parent(directory_id id)
84✔
1279
{
1280
    // Note: the parent will be created, if it does not yet exist. This is fine.
1281
    return parent_[id];
84✔
1282
}
1283

1284
directory_id &compound_document::tree_root(directory_id id)
28✔
1285
{
1286
    return tree_child(parent_storage_.at(id));
28✔
1287
}
1288

1289
directory_id &compound_document::tree_child(directory_id id)
164✔
1290
{
1291
    return entries_.at(id).child;
164✔
1292
}
1293

1294
std::string compound_document::tree_key(directory_id id)
16✔
1295
{
1296
    return entries_.at(id).name();
16✔
1297
}
1298

1299
compound_document_entry::entry_color &compound_document::tree_color(directory_id id)
20✔
1300
{
1301
    return entries_.at(id).color;
20✔
1302
}
1303

1304
void compound_document::read_header()
32✔
1305
{
1306
    in_->seekg(0, std::ios::beg);
32✔
1307
    in_->read(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
32✔
1308

1309
    // Header Signature (8 bytes): Identification signature for the compound file structure, and MUST be
1310
    // set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1.
1311
    if (header_.header_signature != 0xE11AB1A1E011CFD0)
32!
1312
    {
NEW
1313
        throw xlnt::invalid_file("invalid header signature, expected 0xE11AB1A1E011CFD0 but got " + format_hex(header_.header_signature));
×
1314
    }
1315

1316
    // Header CLSID (16 bytes): Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL).
1317
    if (std::any_of(header_.header_clsid.begin(), header_.header_clsid.end(), [](std::uint8_t i) { return i != 0; }))
544!
1318
    {
NEW
1319
        std::string exception_str = "invalid header CLSID, expected only zeros but got: ";
×
NEW
1320
        for (std::uint8_t val : header_.header_clsid)
×
1321
        {
NEW
1322
            exception_str += fmt::format("{:02x} ", val);
×
1323
        }
NEW
1324
        throw xlnt::invalid_file(exception_str);
×
NEW
1325
    }
×
1326

1327
    // Major Version (2 bytes): Version number for breaking changes. This field MUST be set to either
1328
    // 0x0003 (version 3) or 0x0004 (version 4).
1329
    if (header_.major_version != 3 && header_.major_version != 4)
32!
1330
    {
NEW
1331
        throw xlnt::invalid_file("invalid major version, expected 3 or 4 but got " + std::to_string(header_.major_version));
×
1332
    }
1333

1334
    // Byte Order (2 bytes): This field MUST be set to 0xFFFE. This field is a byte order mark for all integer
1335
    // fields, specifying little-endian byte order.
1336
    if (static_cast<std::uint16_t>(header_.byte_order) != 0xFFFE)
32!
1337
    {
NEW
1338
        throw xlnt::invalid_file("invalid byte order, expected 0xFFFE (little-endian) but got " +
×
NEW
1339
            fmt::format("0x{:04X}", static_cast<std::uint16_t>(header_.byte_order)));
×
1340
    }
1341

1342
    // Sector Shift (2 bytes): This field MUST be set to 0x0009, or 0x000c, depending on the Major
1343
    // Version field. This field specifies the sector size of the compound file as a power of 2.
1344
    // - If Major Version is 3, the Sector Shift MUST be 0x0009, specifying a sector size of 512 bytes.
1345
    // - If Major Version is 4, the Sector Shift MUST be 0x000C, specifying a sector size of 4096 bytes.
1346
    if (!((header_.major_version == 3 && header_.sector_size_power == 0x0009) ||
32!
NEW
1347
        (header_.major_version == 4 && header_.sector_size_power == 0x000C)))
×
1348
    {
NEW
1349
        throw xlnt::invalid_file("invalid combination of sector size power and major version, got sector_size_power = " +
×
NEW
1350
            fmt::format("0x{:04X}", header_.sector_size_power) + "; major_version = " + std::to_string(header_.major_version));
×
1351
    }
1352

1353
    // Mini Sector Shift (2 bytes): This field MUST be set to 0x0006. This field specifies the sector size of
1354
    // the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes.
1355
    if (header_.short_sector_size_power != 0x0006)
32!
1356
    {
NEW
1357
        throw xlnt::invalid_file("invalid short sector size power, expected 0x0006 but got " + fmt::format("0x{:04X}", header_.short_sector_size_power));
×
1358
    }
1359

1360
    // Reserved (6 bytes): This field MUST be set to all zeroes.
1361
    if (std::any_of(header_.reserved.begin(), header_.reserved.end(), [](std::uint8_t i) { return i != 0; }))
224!
1362
    {
NEW
1363
        std::string exception_str = "invalid reserved field, expected only zeros but got: ";
×
NEW
1364
        for (std::uint8_t val : header_.reserved)
×
1365
        {
NEW
1366
            exception_str += fmt::format("{:02x} ", val);
×
1367
        }
NEW
1368
        throw xlnt::invalid_file(exception_str);
×
NEW
1369
    }
×
1370

1371
    // Number of Directory Sectors (4 bytes): This integer field contains the count of the number of
1372
    // directory sectors in the compound file.
1373
    // - If Major Version is 3, the Number of Directory Sectors MUST be zero. This field is not
1374
    //   supported for version 3 compound files.
1375
    if (header_.major_version == 3 && header_.num_directory_sectors != 0)
32!
1376
    {
NEW
1377
        throw xlnt::invalid_file("invalid number of directory sectors for major version 3: expected 0 directory sectors but got " +
×
NEW
1378
            std::to_string(header_.num_directory_sectors));
×
1379
    }
1380

1381
    // Mini Stream Cutoff Size (4 bytes): This integer field MUST be set to 0x00001000. This field
1382
    // specifies the maximum size of a user-defined data stream that is allocated from the mini FAT
1383
    // and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than
1384
    // or equal to this cutoff size must be allocated as normal sectors from the FAT.
1385
    if (header_.threshold != 0x00001000)
32!
1386
    {
NEW
1387
        throw xlnt::invalid_file("invalid mini stream cutoff size, expected 0x00001000 but got " + format_hex(header_.threshold));
×
1388
    }
1389

1390
    // DIFAT (436 bytes): This array of 32-bit integer fields contains the first 109 FAT sector locations of
1391
    // the compound file.
1392
    // - For version 4 compound files, the header size (512 bytes) is less than the sector size (4,096
1393
    //   bytes), so the remaining part of the header (3,584 bytes) MUST be filled with all zeroes.
1394
    if (header_.major_version == 4)
32!
1395
    {
NEW
1396
        std::array<std::uint8_t, 3584> remaining {{ 0 }};
×
NEW
1397
        in_->read(reinterpret_cast<char *>(remaining.data()), sizeof(remaining));
×
1398

NEW
1399
        if (std::any_of(remaining.begin(), remaining.end(), [](std::uint8_t i) { return i != 0; }))
×
1400
        {
NEW
1401
            std::string exception_str = "invalid remaining bytes in header (major version 4), expected only zeros but got: ";
×
NEW
1402
            for (std::uint8_t val : remaining)
×
1403
            {
NEW
1404
                exception_str += fmt::format("{:02x} ", val);
×
1405
            }
NEW
1406
            throw xlnt::invalid_file(exception_str);
×
NEW
1407
        }
×
1408
    }
1409
}
32✔
1410

1411
void compound_document::read_msat()
32✔
1412
{
1413
    msat_.clear();
32✔
1414

1415
    sector_id msat_sector = header_.extra_msat_start;
32✔
1416
    binary_writer<sector_id> msat_writer(msat_);
32✔
1417

1418
    for (std::uint32_t i = 0u; i < header_.num_msat_sectors; ++i)
91✔
1419
    {
1420
        if (i < 109u)
59!
1421
        {
1422
            msat_writer.write(header_.msat.at(i));
59✔
1423
        }
1424
        else
1425
        {
1426
            read_sector(msat_sector, msat_writer);
×
1427

NEW
1428
            msat_sector = last_elem(msat_);
×
1429
            msat_.pop_back();
×
1430
        }
1431
    }
1432
}
32✔
1433

1434
void compound_document::read_sat()
32✔
1435
{
1436
    sat_.clear();
32✔
1437
    binary_writer<sector_id> sat_writer(sat_);
32✔
1438

1439
    for (sector_id msat_sector : msat_)
91✔
1440
    {
1441
        read_sector(msat_sector, sat_writer);
59✔
1442
    }
1443
}
32✔
1444

1445
void compound_document::read_ssat()
32✔
1446
{
1447
    ssat_.clear();
32✔
1448
    binary_writer<sector_id> ssat_writer(ssat_);
32✔
1449

1450
    for (sector_id ssat_sector : follow_chain(header_.ssat_start, sat_))
64✔
1451
    {
1452
        read_sector(ssat_sector, ssat_writer);
32✔
1453
    }
32✔
1454
}
32✔
1455

NEW
1456
std::string compound_document_entry::format_info(
×
1457
    directory_id entry_id,
1458
    sector_id sector_id,
1459
    /// IMPORTANT: only show the name after the name and its length have been validated!
1460
    bool show_entry_name) const
1461
{
1462
    // The formatted IDs should be as short as possible to keep the exception message readable - so we do not add leading zeros.
NEW
1463
    std::string message = "(entry " + fmt::format("0x{:X}", entry_id);
×
NEW
1464
    if (show_entry_name)
×
1465
    {
NEW
1466
        message += " with name \"";
×
1467
        // Only add the name if the conversion does not throw an exception itself!
1468
        try
1469
        {
NEW
1470
            message += name();
×
1471
        }
NEW
1472
        catch (const std::exception &ex)
×
1473
        {
NEW
1474
            message += "INVALID (";
×
NEW
1475
            message += ex.what();
×
NEW
1476
            message.push_back(')');
×
NEW
1477
        }
×
NEW
1478
        message.push_back('"');
×
1479
    }
NEW
1480
    message += " of type " + std::to_string(static_cast<int>(type)) +
×
NEW
1481
        " in sector " + fmt::format("0x{:X}", sector_id) + ")";
×
NEW
1482
    return message;
×
NEW
1483
}
×
1484

1485
void check_empty_entry(
32✔
1486
    const compound_document_entry &entry,
1487
    directory_id id,
1488
    sector_id directory_sector)
1489
{
1490
    if (entry.type != compound_document_entry::entry_type::Empty)
32!
1491
    {
NEW
1492
        throw xlnt::invalid_parameter("invalid entry type " +
×
NEW
1493
            entry.format_info(id, directory_sector, false) +
×
NEW
1494
            ", expected Empty but got " + std::to_string(static_cast<int>(entry.type)));
×
1495
    }
1496

1497
    // Free (unused) directory entries are marked with Object Type 0x0 (unknown or unallocated). The
1498
    // entire directory entry must consist of all zeroes except for the child, right sibling, and left sibling
1499
    // pointers, which must be initialized to NOSTREAM (0xFFFFFFFF).
1500

1501
    // NOTE: Some implementations seem to not initialize this buffer at all, so we cannot check it for correctness.
1502
    /*if (std::any_of(entry.name_array.begin(), entry.name_array.end(), [](char16_t i) { return i != 0; }))
1503
    {
1504
        std::string exception_str = "invalid entry name " +
1505
            entry.format_info(id, directory_sector, false) +
1506
            ", expected all zeros but got: ";
1507
        for (char16_t val : entry.name_array)
1508
        {
1509
            exception_str += fmt::format("{:04x} ", static_cast<std::uint16_t>(val));
1510
        }
1511
        throw xlnt::invalid_file(exception_str);
1512
    }*/
1513

1514
    if (entry.name_length != 0)
32!
1515
    {
NEW
1516
        throw xlnt::invalid_file("invalid entry name length " + entry.format_info(id, directory_sector, false) +
×
NEW
1517
            ", expected 0 but got " + std::to_string(entry.name_length));
×
1518
    }
1519

1520
    if (static_cast<std::uint8_t>(entry.color) != 0)
32!
1521
    {
NEW
1522
        throw xlnt::invalid_file("invalid entry color " + entry.format_info(id, directory_sector, false) +
×
NEW
1523
            ", expected 0 but got " + std::to_string(static_cast<int>(entry.color)));
×
1524
    }
1525

1526
    if (entry.prev != NOSTREAM || entry.next != NOSTREAM || entry.child != NOSTREAM)
32!
1527
    {
NEW
1528
        throw xlnt::invalid_file("empty entry contains invalid child or sibling " +
×
NEW
1529
            entry.format_info(id, directory_sector, false) +
×
NEW
1530
            "; prev = " + fmt::format("0x{:08X}", (entry.prev)) +
×
NEW
1531
            "; next = " + fmt::format("0x{:08X}", (entry.next)) +
×
NEW
1532
            "; child = " + fmt::format("0x{:08X}", (entry.child)));
×
1533
    }
1534

1535
    if (std::any_of(entry.clsid.begin(), entry.clsid.end(), [](std::uint8_t i) { return i != 0; }))
544!
1536
    {
NEW
1537
        std::string exception_str = "invalid entry CLSID " + entry.format_info(id, directory_sector, false) +
×
NEW
1538
            ", expected all zeros but got: ";
×
NEW
1539
        for (std::uint8_t val : entry.clsid)
×
1540
        {
NEW
1541
            exception_str += fmt::format("{:02x} ", val);
×
1542
        }
NEW
1543
        throw xlnt::invalid_file(exception_str);
×
NEW
1544
    }
×
1545

1546
    if (entry.state_bits != 0)
32!
1547
    {
NEW
1548
        throw xlnt::invalid_file("invalid entry state bits " + entry.format_info(id, directory_sector, false) +
×
NEW
1549
            ", expected 0 but got " + std::to_string(entry.state_bits));
×
1550
    }
1551

1552
    // NOTE: some implementations seem to use the timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC.
1553
    if (entry.creation_time != 0 && entry.creation_time != 116444736000000000)
32!
1554
    {
NEW
1555
        throw xlnt::invalid_file("invalid entry creation time " + entry.format_info(id, directory_sector, false) +
×
NEW
1556
            ", expected 0 or 116444736000000000, but got " + std::to_string(entry.creation_time));
×
1557
    }
1558

1559
    // NOTE: some implementations seem to use the timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC.
1560
    if (entry.modified_time != 0 && entry.modified_time != 116444736000000000)
32!
1561
    {
NEW
1562
        throw xlnt::invalid_file("invalid entry modification time " + entry.format_info(id, directory_sector, false) +
×
NEW
1563
            ", expected 0 or 116444736000000000, but got " + std::to_string(entry.modified_time));
×
1564
    }
1565

1566
    // According to the specification (see above), it must be 0, but it seems that some immplementations
1567
    // initialize it with ENDOFCHAIN or FREESECT, which is honestly not wrong either. So let's accept that.
1568
    if (entry.start != 0 && entry.start != ENDOFCHAIN && entry.start != FREESECT)
32!
1569
    {
NEW
1570
        throw xlnt::invalid_file("invalid entry start sector location " + entry.format_info(id, directory_sector, false) +
×
NEW
1571
            ", expected 0 or ENDOFCHAIN (0xFFFFFFFE) or FREESECT (0xFFFFFFFF), but got " + format_hex(entry.start));
×
1572
    }
1573

1574
    if (entry.size != 0)
32!
1575
    {
NEW
1576
        throw xlnt::invalid_file("invalid entry stream size " + entry.format_info(id, directory_sector, false) +
×
NEW
1577
            ", expected 0 but got " + std::to_string(entry.size));
×
1578
    }
1579
}
32✔
1580

1581
void check_non_empty_entry(
168✔
1582
    const compound_document_entry &entry,
1583
    directory_id id,
1584
    sector_id directory_sector)
1585
{
1586
    if (entry.type == compound_document_entry::entry_type::Empty)
168!
1587
    {
NEW
1588
        throw xlnt::invalid_parameter("invalid entry type " +
×
NEW
1589
            entry.format_info(id, directory_sector, false) +
×
NEW
1590
            ", expected different than Empty but got Empty");
×
1591
    }
1592

1593
    // First check the length, as we'll need this for the string itself.
1594
    // Directory Entry Name Length (2 bytes): This field MUST match the length of the Directory Entry
1595
    // Name Unicode string in bytes. The length MUST be a multiple of 2 and include the terminating null
1596
    // character in the count. This length MUST NOT exceed 64, the maximum size of the Directory Entry
1597
    // Name field.
1598
    if (entry.name_length < 2 || entry.name_length > 64)
168!
1599
    {
NEW
1600
        throw xlnt::invalid_file("invalid entry name length " +
×
NEW
1601
            entry.format_info(id, directory_sector, false) +
×
NEW
1602
            ", expected >= 2 and <= 64, but got " + std::to_string(entry.name_length));
×
1603
    }
1604
    else if (entry.name_length % 2 != 0)
168!
1605
    {
NEW
1606
        throw xlnt::invalid_file("invalid entry name length " +
×
NEW
1607
            entry.format_info(id, directory_sector, false) +
×
NEW
1608
            ", which must be a multiple of 2, but got " + std::to_string(entry.name_length));
×
1609
    }
1610

1611
    // Directory Entry Name (64 bytes): This field MUST contain a Unicode string for the storage or
1612
    // stream name encoded in UTF-16. The name MUST be terminated with a UTF-16 terminating null
1613
    // character. Thus, storage and stream names are limited to 32 UTF-16 code points, including the
1614
    // terminating null character. When locating an object in the compound file except for the root
1615
    // storage, the directory entry name is compared by using a special case-insensitive uppercase
1616
    // mapping, described in Red-Black Tree. The following characters are illegal and MUST NOT be part
1617
    // of the name: '/', '\', ':', '!'.
1618
    std::uint16_t name_length_characters = (entry.name_length / 2) - 1; // does NOT include \0 at the end
168✔
1619
    if (entry.name_array.at(name_length_characters) != u'\0')
168!
1620
    {
NEW
1621
        std::string exception_str = "invalid entry name " +
×
NEW
1622
            entry.format_info(id, directory_sector, false) +
×
NEW
1623
            ", which must be terminated with \\0 but is terminated with " +
×
NEW
1624
            fmt::format("0x{:04X}", static_cast<std::uint16_t>(entry.name_array.at(name_length_characters))) +
×
NEW
1625
            "\nString has a length of " + std::to_string(name_length_characters) + " characters (" +
×
NEW
1626
            std::to_string(entry.name_length) + " bytes including \\0). Full buffer contents:\n";
×
NEW
1627
        for (char16_t val : entry.name_array)
×
1628
        {
NEW
1629
            exception_str += fmt::format("{:04x} ", static_cast<std::uint16_t>(val));
×
1630
        }
1631

NEW
1632
        throw xlnt::invalid_file(exception_str);
×
NEW
1633
    }
×
1634

1635
    for (std::uint16_t n = 0; n < name_length_characters; ++n)
2,471✔
1636
    {
1637
        char16_t curr = entry.name_array.at(n);
2,303✔
1638
        if (curr == u'/' || curr == u'\\' || curr == u':' || curr == u'!')
2,303!
1639
        {
NEW
1640
            throw xlnt::invalid_file("invalid entry name " + entry.format_info(id, directory_sector, true) +
×
NEW
1641
                ", which contains invalid character " +
×
NEW
1642
                fmt::format("0x{:04X}", static_cast<std::uint16_t>(curr)) + " at position " + std::to_string(n));
×
1643
        }
1644
    }
1645

1646
    // Object Type (1 byte): This field MUST be 0x00, 0x01, 0x02, or 0x05, depending on the actual type
1647
    // of object. All other values are not valid.
1648
    // --------------------------------
1649
    // NOTE: the empty type is handled in check_empty_entry().
1650
    if (static_cast<std::uint8_t>(entry.type) != 0x01 && // UserStorage
168✔
1651
        static_cast<std::uint8_t>(entry.type) != 0x02 && // UserStream
132✔
1652
        static_cast<std::uint8_t>(entry.type) != 0x05) // RootStorage
32!
1653
    {
NEW
1654
        throw xlnt::invalid_file("invalid entry object type " + entry.format_info(id, directory_sector, true) +
×
NEW
1655
            ", expected 0, 1, 2 or 5 but got " + std::to_string(static_cast<int>(entry.type)));
×
1656
    }
1657

1658
    // Color Flag (1 byte): This field MUST be 0x00 (red) or 0x01 (black). All other values are not valid.
1659
    if (static_cast<std::uint8_t>(entry.color) != 0 && static_cast<std::uint8_t>(entry.color) != 1)
168!
1660
    {
NEW
1661
        throw xlnt::invalid_file("invalid entry color " + entry.format_info(id, directory_sector, true) +
×
NEW
1662
            ", expected 0 or 1, but got " + std::to_string(static_cast<int>(entry.color)));
×
1663
    }
1664

1665
    // CLSID (16 bytes): This field contains an object class GUID, if this entry is for a storage object or
1666
    // root storage object. For a stream object, this field MUST be set to all zeroes. A value containing all
1667
    // zeroes in a storage or root storage directory entry is valid, and indicates that no object class is
1668
    // associated with the storage. If an implementation of the file format enables applications to create
1669
    // storage objects without explicitly setting an object class GUID, it MUST write all zeroes by default.
1670
    // If this value is not all zeroes, the object class GUID can be used as a parameter to start
1671
    // applications.
1672
    if (entry.type == compound_document_entry::entry_type::UserStream &&
268!
1673
        std::any_of(entry.clsid.begin(), entry.clsid.end(), [](std::uint8_t i) { return i != 0; }))
1,700!
1674
    {
NEW
1675
        std::string exception_str = "invalid entry CLSID " + entry.format_info(id, directory_sector, true) +
×
NEW
1676
            " for UserStream type, expected all zeros but got: ";
×
NEW
1677
        for (std::uint8_t val : entry.clsid)
×
1678
        {
NEW
1679
            exception_str += fmt::format("{:02x} ", val);
×
1680
        }
NEW
1681
        throw xlnt::invalid_file(exception_str);
×
NEW
1682
    }
×
1683

1684
    // Creation Time (8 bytes): This field contains the creation time for a storage object, or all zeroes to
1685
    // indicate that the creation time of the storage object was not recorded. The Windows FILETIME
1686
    // structure is used to represent this field in UTC. For a stream object, this field MUST be all zeroes.
1687
    // For a root storage object, this field MUST be all zeroes, and the creation time is retrieved or set on
1688
    // the compound file itself.
1689
    // --------------------------------
1690
    // NOTE: unfortunately cannot be enforced, as some files:
1691
    // - have a root entry with timestamp 116444736000000000, which is 1970-01-01 00:00:00 UTC
1692
    // - have a stream with an actual timestamp
1693
    /*if ((entry.type == compound_document_entry::entry_type::UserStream ||
1694
        entry.type == compound_document_entry::entry_type::RootStorage) &&
1695
        entry.creation_time != 0)
1696
    {
1697
        throw xlnt::invalid_file("invalid entry creation time " + entry.format_info(id, directory_sector, true) +
1698
            " for type " + std::to_string(static_cast<int>(entry.type)) +
1699
            ", expected 0 but got " + std::to_string(entry.creation_time));
1700
    }*/
1701

1702
    // Modified Time (8 bytes): This field contains the modification time for a storage object, or all
1703
    // zeroes to indicate that the modified time of the storage object was not recorded. The Windows
1704
    // FILETIME structure is used to represent this field in UTC. For a stream object, this field MUST be
1705
    // all zeroes. For a root storage object, this field MAY<2> be set to all zeroes, and the modified time
1706
    // is retrieved or set on the compound file itself.
1707
    // --------------------------------
1708
    // NOTE: unfortunately cannot be enforced, as some files have a stream with an actual timestamp.
1709
    /*if (entry.type == compound_document_entry::entry_type::UserStream &&
1710
        entry.modified_time != 0)
1711
    {
1712
        throw xlnt::invalid_file("invalid entry modification time " + entry.format_info(id, directory_sector, true) +
1713
            " for type UserStream, expected 0 but got " + std::to_string(entry.modified_time));
1714
    }*/
1715

1716
    // Starting Sector Location (4 bytes): This field contains the first sector location if this is a stream
1717
    // object. For a root storage object, this field MUST contain the first sector of the mini stream, if the
1718
    // mini stream exists. For a storage object, this field MUST be set to all zeroes.
1719
    // --------------------------------
1720
    // It seems that some immplementations initialize it with FREESECT,
1721
    // which is honestly not wrong either. So let's accept that.
1722
    if (entry.type == compound_document_entry::entry_type::UserStorage &&
168✔
1723
        !(entry.start == 0 || entry.start == FREESECT))
36!
1724
    {
NEW
1725
        throw xlnt::invalid_file("invalid entry start sector location " + entry.format_info(id, directory_sector, true) +
×
NEW
1726
            " for type UserStorage, expected 0 or FREESECT (0xFFFFFFFF), but got " + format_hex(entry.start));
×
1727
    }
1728

1729
    // Stream Size (8 bytes): This 64-bit integer field contains the size of the user-defined data if this is
1730
    // a stream object. For a root storage object, this field contains the size of the mini stream. For a
1731
    // storage object, this field MUST be set to all zeroes.
1732
    if (entry.type == compound_document_entry::entry_type::UserStorage &&
168✔
1733
        entry.size != 0)
36!
1734
    {
NEW
1735
        throw xlnt::invalid_file("invalid entry stream size " + entry.format_info(id, directory_sector, true) +
×
NEW
1736
            " for type UserStorage, expected 0 but got " + std::to_string(entry.size));
×
1737
    }
1738
}
168✔
1739

1740
void compound_document::read_entry(directory_id id)
200✔
1741
{
1742
    const sector_chain directory_chain = follow_chain(header_.directory_start, sat_);
200✔
1743
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
200✔
1744
    const sector_id directory_sector = directory_chain.at(static_cast<std::size_t>(id / entries_per_sector));
200✔
1745
    const std::uint64_t offset = sector_size() * directory_sector + ((id % entries_per_sector) * COMPOUND_DOCUMENT_ENTRY_SIZE);
200✔
1746

1747
    in_->seekg(static_cast<std::streamoff>(sector_data_start() + offset), std::ios::beg);
200✔
1748
    compound_document_entry &entry = entries_.at(id);
200✔
1749
    // Read the fields manually due to struct padding (larger sizeof than 128 bytes).
1750
    in_->read(reinterpret_cast<char *>(entry.name_array.data()), sizeof(entry.name_array));
400✔
1751
    in_->read(reinterpret_cast<char *>(&entry.name_length), sizeof(entry.name_length));
200✔
1752
    in_->read(reinterpret_cast<char *>(&entry.type), sizeof(entry.type));
200✔
1753
    in_->read(reinterpret_cast<char *>(&entry.color), sizeof(entry.color));
200✔
1754
    in_->read(reinterpret_cast<char *>(&entry.prev), sizeof(entry.prev));
200✔
1755
    in_->read(reinterpret_cast<char *>(&entry.next), sizeof(entry.next));
200✔
1756
    in_->read(reinterpret_cast<char *>(&entry.child), sizeof(entry.child));
200✔
1757
    in_->read(reinterpret_cast<char *>(entry.clsid.data()), sizeof(entry.clsid));
400✔
1758
    in_->read(reinterpret_cast<char *>(&entry.state_bits), sizeof(entry.state_bits));
200✔
1759
    in_->read(reinterpret_cast<char *>(&entry.creation_time), sizeof(entry.creation_time));
200✔
1760
    in_->read(reinterpret_cast<char *>(&entry.modified_time), sizeof(entry.modified_time));
200✔
1761
    in_->read(reinterpret_cast<char *>(&entry.start), sizeof(entry.start));
200✔
1762
    in_->read(reinterpret_cast<char *>(&entry.size), sizeof(entry.size));
200✔
1763

1764
    // Stream Size (8 bytes): ... (see below for the rest)
1765
    // - For a version 3 compound file 512-byte sector size, the value of this field MUST be less than
1766
    //   or equal to 0x80000000. (Equivalently, this requirement can be stated: the size of a stream or
1767
    //   of the mini stream in a version 3 compound file MUST be less than or equal to 2 gigabytes
1768
    //   (GB).) Note that as a consequence of this requirement, the most significant 32 bits of this field
1769
    //   MUST be zero in a version 3 compound file. However, implementers should be aware that
1770
    //   some older implementations did not initialize the most significant 32 bits of this field, and
1771
    //   these bits might therefore be nonzero in files that are otherwise valid version 3 compound
1772
    //   files. Although this document does not normatively specify parser behavior, it is recommended
1773
    //   that parsers ignore the most significant 32 bits of this field in version 3 compound files,
1774
    //   treating it as if its value were zero, unless there is a specific reason to do otherwise (for
1775
    //   example, a parser whose purpose is to verify the correctness of a compound file).
1776
    if (header_.major_version == 3 && entry.size > 0x80000000)
200!
1777
    {
1778
        // Note: the only allowed byte order is little-endian.
NEW
1779
        entry.size = entry.size & 0x0000FFFF;
×
1780
    }
1781

1782
    if (entry.type == compound_document_entry::entry_type::Empty)
200✔
1783
    {
1784
        check_empty_entry(entry, id, directory_sector);
32✔
1785
    }
1786
    else
1787
    {
1788
        check_non_empty_entry(entry, id, directory_sector);
168✔
1789
    }
1790
}
200✔
1791

1792
void compound_document::write_header()
15✔
1793
{
1794
    out_->seekp(0, std::ios::beg);
15✔
1795
    out_->write(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
15✔
1796
}
15✔
1797

1798
void compound_document::write_msat()
11✔
1799
{
1800
    sector_id msat_sector = header_.extra_msat_start;
11✔
1801

1802
    for (std::uint32_t i = 0u; i < header_.num_msat_sectors; ++i)
39✔
1803
    {
1804
        if (i < 109u)
28!
1805
        {
1806
            header_.msat.at(i) = msat_.at(i);
28✔
1807
        }
1808
        else
1809
        {
NEW
1810
            std::vector<sector_id> sector;
×
NEW
1811
            binary_writer<sector_id> sector_writer(sector);
×
1812

1813
            read_sector(msat_sector, sector_writer);
×
1814

NEW
1815
            msat_sector = last_elem(sector);
×
1816
            sector.pop_back();
×
1817

1818
            std::copy(sector.begin(), sector.end(), std::back_inserter(msat_));
×
1819
        }
×
1820
    }
1821
}
11✔
1822

1823
void compound_document::write_sat()
1,988✔
1824
{
1825
    binary_reader<sector_id> sector_reader(sat_);
1,988✔
1826

1827
    for (sector_id sat_sector : msat_)
10,360✔
1828
    {
1829
        write_sector(sector_reader, sat_sector);
8,372✔
1830
    }
1831
}
1,988✔
1832

UNCOV
1833
void compound_document::write_ssat()
×
1834
{
NEW
1835
    binary_reader<sector_id> sector_reader(ssat_);
×
1836

NEW
1837
    for (sector_id ssat_sector : follow_chain(header_.ssat_start, sat_))
×
1838
    {
UNCOV
1839
        write_sector(sector_reader, ssat_sector);
×
UNCOV
1840
    }
×
UNCOV
1841
}
×
1842

1843
void compound_document::write_entry(directory_id id)
4,008✔
1844
{
1845
    const sector_chain directory_chain = follow_chain(header_.directory_start, sat_);
4,008✔
1846
    const std::uint64_t entries_per_sector = sector_size() / COMPOUND_DOCUMENT_ENTRY_SIZE;
4,008✔
1847
    const sector_id directory_sector = directory_chain.at(static_cast<std::size_t>(id / entries_per_sector));
4,008✔
1848
    const std::uint64_t offset = sector_data_start() + sector_size() * directory_sector
4,008✔
1849
        + ((id % entries_per_sector) * COMPOUND_DOCUMENT_ENTRY_SIZE);
4,008✔
1850

1851
    out_->seekp(static_cast<std::streamoff>(offset), std::ios::beg);
4,008✔
1852
    const compound_document_entry &entry = entries_.at(id);
4,008✔
1853
    // Write the fields manually due to struct padding (larger sizeof than 128 bytes).
1854
    out_->write(reinterpret_cast<const char *>(entry.name_array.data()), sizeof(entry.name_array));
4,008✔
1855
    out_->write(reinterpret_cast<const char *>(&entry.name_length), sizeof(entry.name_length));
4,008✔
1856
    out_->write(reinterpret_cast<const char *>(&entry.type), sizeof(entry.type));
4,008✔
1857
    out_->write(reinterpret_cast<const char *>(&entry.color), sizeof(entry.color));
4,008✔
1858
    out_->write(reinterpret_cast<const char *>(&entry.prev), sizeof(entry.prev));
4,008✔
1859
    out_->write(reinterpret_cast<const char *>(&entry.next), sizeof(entry.next));
4,008✔
1860
    out_->write(reinterpret_cast<const char *>(&entry.child), sizeof(entry.child));
4,008✔
1861
    out_->write(reinterpret_cast<const char *>(entry.clsid.data()), sizeof(entry.clsid));
4,008✔
1862
    out_->write(reinterpret_cast<const char *>(&entry.state_bits), sizeof(entry.state_bits));
4,008✔
1863
    out_->write(reinterpret_cast<const char *>(&entry.creation_time), sizeof(entry.creation_time));
4,008✔
1864
    out_->write(reinterpret_cast<const char *>(&entry.modified_time), sizeof(entry.modified_time));
4,008✔
1865
    out_->write(reinterpret_cast<const char *>(&entry.start), sizeof(entry.start));
4,008✔
1866
    out_->write(reinterpret_cast<const char *>(&entry.size), sizeof(entry.size));
4,008✔
1867
}
4,008✔
1868

1869
} // namespace detail
1870
} // namespace xlnt
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc