• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ArkScript-lang / Ark / 11304355606

12 Oct 2024 08:53AM UTC coverage: 76.391% (+1.2%) from 75.225%
11304355606

push

github

SuperFola
feat(ir, compiler): implementing the IR optimizer

62 of 75 new or added lines in 3 files covered. (82.67%)

402 existing lines in 9 files now uncovered.

5054 of 6616 relevant lines covered (76.39%)

9231.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.77
/src/arkreactor/Compiler/BytecodeReader.cpp
1
#include <Ark/Compiler/BytecodeReader.hpp>
2

3
#include <Ark/Compiler/Instructions.hpp>
4
#include <Ark/Builtins/Builtins.hpp>
5

6
#include <iomanip>
7
#include <unordered_map>
8
#include <picosha2.h>
9
#include <fmt/core.h>
10
#include <fmt/color.h>
11

12
namespace Ark
13
{
14
    using namespace Ark::internal;
15

16
    void BytecodeReader::feed(const bytecode_t& bytecode)
100✔
17
    {
100✔
18
        m_bytecode = bytecode;
100✔
19
    }
100✔
20

21
    void BytecodeReader::feed(const std::string& file)
1✔
22
    {
1✔
23
        std::ifstream ifs(file, std::ios::binary | std::ios::ate);
1✔
24
        if (!ifs.good())
1✔
UNCOV
25
            throw std::runtime_error(fmt::format("[BytecodeReader] Couldn't open file '{}'", file));
×
26

27
        const auto pos = ifs.tellg();
1✔
28
        // reserve appropriate number of bytes
29
        std::vector<char> temp(static_cast<std::size_t>(pos));
1✔
30
        ifs.seekg(0, std::ios::beg);
1✔
31
        ifs.read(&temp[0], pos);
1✔
32
        ifs.close();
1✔
33

34
        m_bytecode = bytecode_t(static_cast<std::size_t>(pos));
1✔
35
        for (std::size_t i = 0; i < static_cast<std::size_t>(pos); ++i)
277✔
36
            m_bytecode[i] = static_cast<uint8_t>(temp[i]);
276✔
37
    }
1✔
38

39
    bool BytecodeReader::checkMagic() const
236✔
40
    {
236✔
41
        return m_bytecode.size() >= 4 && m_bytecode[0] == 'a' &&
398✔
42
            m_bytecode[1] == 'r' && m_bytecode[2] == 'k' &&
324✔
43
            m_bytecode[3] == internal::Instruction::NOP;
162✔
44
    }
45

46
    const bytecode_t& BytecodeReader::bytecode() noexcept
×
47
    {
×
UNCOV
48
        return m_bytecode;
×
49
    }
50

51
    Version BytecodeReader::version() const
27✔
52
    {
27✔
53
        if (!checkMagic() || m_bytecode.size() < 10)
27✔
UNCOV
54
            return Version { 0, 0, 0 };
×
55

56
        return Version {
108✔
57
            .major = static_cast<uint16_t>((m_bytecode[4] << 8) + m_bytecode[5]),
27✔
58
            .minor = static_cast<uint16_t>((m_bytecode[6] << 8) + m_bytecode[7]),
27✔
59
            .patch = static_cast<uint16_t>((m_bytecode[8] << 8) + m_bytecode[9])
27✔
UNCOV
60
        };
×
61
    }
27✔
62

63
    unsigned long long BytecodeReader::timestamp() const
1✔
64
    {
1✔
65
        // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes
66
        if (!checkMagic() || m_bytecode.size() < 18)
1✔
UNCOV
67
            return 0;
×
68

69
        // reading the timestamp in big endian
70
        using timestamp_t = unsigned long long;
71
        return (static_cast<timestamp_t>(m_bytecode[10]) << 56) +
3✔
72
            (static_cast<timestamp_t>(m_bytecode[11]) << 48) +
2✔
73
            (static_cast<timestamp_t>(m_bytecode[12]) << 40) +
2✔
74
            (static_cast<timestamp_t>(m_bytecode[13]) << 32) +
2✔
75
            (static_cast<timestamp_t>(m_bytecode[14]) << 24) +
2✔
76
            (static_cast<timestamp_t>(m_bytecode[15]) << 16) +
2✔
77
            (static_cast<timestamp_t>(m_bytecode[16]) << 8) +
2✔
78
            static_cast<timestamp_t>(m_bytecode[17]);
1✔
79
    }
1✔
80

81
    std::vector<unsigned char> BytecodeReader::sha256() const
27✔
82
    {
27✔
83
        if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size)
27✔
UNCOV
84
            return {};
×
85

86
        std::vector<unsigned char> sha(picosha2::k_digest_size);
27✔
87
        for (std::size_t i = 0; i < picosha2::k_digest_size; ++i)
891✔
88
            sha[i] = m_bytecode[18 + i];
864✔
89
        return sha;
27✔
90
    }
54✔
91

92
    Symbols BytecodeReader::symbols() const
27✔
93
    {
27✔
94
        if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size ||
54✔
95
            m_bytecode[18 + picosha2::k_digest_size] != SYM_TABLE_START)
27✔
UNCOV
96
            return {};
×
97

98
        std::size_t i = 18 + picosha2::k_digest_size + 1;
27✔
99
        const uint16_t size = readNumber(i);
27✔
100
        i++;
27✔
101

102
        Symbols block;
27✔
103
        block.start = 18 + picosha2::k_digest_size;
27✔
104
        block.symbols.reserve(size);
27✔
105

106
        for (uint16_t j = 0; j < size; ++j)
289✔
107
        {
108
            std::string content;
262✔
109
            while (m_bytecode[i] != 0)
2,121✔
110
                content.push_back(static_cast<char>(m_bytecode[i++]));
1,859✔
111
            i++;
262✔
112

113
            block.symbols.push_back(content);
262✔
114
        }
262✔
115

116
        block.end = i;
27✔
117
        return block;
27✔
118
    }
27✔
119

120
    Values BytecodeReader::values(const Symbols& symbols) const
27✔
121
    {
27✔
122
        if (!checkMagic())
27✔
UNCOV
123
            return {};
×
124

125
        std::size_t i = symbols.end;
27✔
126
        if (m_bytecode[i] != VAL_TABLE_START)
27✔
UNCOV
127
            return {};
×
128
        i++;
27✔
129

130
        const uint16_t size = readNumber(i);
27✔
131
        i++;
27✔
132
        Values block;
27✔
133
        block.start = symbols.end;
27✔
134
        block.values.reserve(size);
27✔
135

136
        for (uint16_t j = 0; j < size; ++j)
673✔
137
        {
138
            const uint8_t type = m_bytecode[i];
646✔
139
            i++;
646✔
140

141
            if (type == NUMBER_TYPE)
646✔
142
            {
143
                std::string val;
112✔
144
                while (m_bytecode[i] != 0)
1,042✔
145
                    val.push_back(static_cast<char>(m_bytecode[i++]));
930✔
146
                block.values.emplace_back(std::stod(val));
112✔
147
            }
112✔
148
            else if (type == STRING_TYPE)
534✔
149
            {
150
                std::string val;
430✔
151
                while (m_bytecode[i] != 0)
6,407✔
152
                    val.push_back(static_cast<char>(m_bytecode[i++]));
5,977✔
153
                block.values.emplace_back(val);
430✔
154
            }
430✔
155
            else if (type == FUNC_TYPE)
104✔
156
            {
157
                const uint16_t addr = readNumber(i);
104✔
158
                i++;
104✔
159
                block.values.emplace_back(addr);
104✔
160
            }
104✔
161
            else
UNCOV
162
                throw std::runtime_error(fmt::format("Unknown value type: {:x}", type));
×
163
            i++;
646✔
164
        }
646✔
165

166
        block.end = i;
27✔
167
        return block;
27✔
168
    }
27✔
169

170
    Code BytecodeReader::code(const Values& values) const
27✔
171
    {
27✔
172
        if (!checkMagic())
27✔
UNCOV
173
            return {};
×
174

175
        std::size_t i = values.end;
27✔
176

177
        Code block;
27✔
178
        block.start = i;
27✔
179

180
        while (m_bytecode[i] == CODE_SEGMENT_START)
131✔
181
        {
182
            i++;
131✔
183
            const std::size_t size = readNumber(i) * 4;
131✔
184
            i++;
131✔
185

186
            block.pages.emplace_back().reserve(size);
131✔
187
            for (std::size_t j = 0; j < size; ++j)
30,439✔
188
                block.pages.back().push_back(m_bytecode[i++]);
30,308✔
189

190
            if (i == m_bytecode.size())
131✔
191
                break;
27✔
192
        }
131✔
193

194
        return block;
27✔
195
    }
27✔
196

UNCOV
197
    void BytecodeReader::display(const BytecodeSegment segment,
×
198
                                 const std::optional<uint16_t> sStart,
199
                                 const std::optional<uint16_t> sEnd,
200
                                 const std::optional<uint16_t> cPage) const
201
    {
×
UNCOV
202
        if (!checkMagic())
×
203
        {
204
            fmt::print("Invalid format");
×
UNCOV
205
            return;
×
206
        }
207

208
        auto [major, minor, patch] = version();
×
209
        fmt::println("Version:   {}.{}.{}", major, minor, patch);
×
210
        fmt::println("Timestamp: {}", timestamp());
×
211
        fmt::print("SHA256:    ");
×
212
        for (const auto sha = sha256(); unsigned char h : sha)
×
213
            fmt::print("{:02x}", h);
×
UNCOV
214
        fmt::print("\n\n");
×
215

216
        // reading the different tables, one after another
217

UNCOV
218
        if ((sStart.has_value() && !sEnd.has_value()) || (!sStart.has_value() && sEnd.has_value()))
×
219
        {
220
            fmt::print(fmt::fg(fmt::color::red), "Both start and end parameter need to be provided together\n");
×
UNCOV
221
            return;
×
222
        }
UNCOV
223
        if (sStart.has_value() && sEnd.has_value() && sStart.value() >= sEnd.value())
×
224
        {
225
            fmt::print(fmt::fg(fmt::color::red), "Invalid slice start and end arguments\n");
×
UNCOV
226
            return;
×
227
        }
228

229
        const auto syms = symbols();
×
230
        const auto vals = values(syms);
×
UNCOV
231
        const auto code_block = code(vals);
×
232

233
        // symbols table
234
        {
235
            std::size_t size = syms.symbols.size();
×
236
            std::size_t sliceSize = size;
×
UNCOV
237
            bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols);
×
238

239
            if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
×
240
                fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
×
241
            else if (showSym && sStart.has_value() && sEnd.has_value())
×
UNCOV
242
                sliceSize = sEnd.value() - sStart.value() + 1;
×
243

244
            if (showSym || segment == BytecodeSegment::HeadersOnly)
×
UNCOV
245
                fmt::println("{} (length: {})", fmt::styled("Symbols table", fmt::fg(fmt::color::cyan)), sliceSize);
×
246

UNCOV
247
            for (std::size_t j = 0; j < size; ++j)
×
248
            {
249
                if (auto start = sStart; auto end = sEnd)
×
UNCOV
250
                    showSym = showSym && (j >= start.value() && j <= end.value());
×
251

252
                if (showSym)
×
253
                    fmt::println("{}) {}", j, syms.symbols[j]);
×
UNCOV
254
            }
×
255

256
            if (showSym)
×
257
                fmt::print("\n");
×
258
            if (segment == BytecodeSegment::Symbols)
×
259
                return;
×
UNCOV
260
        }
×
261

262
        // values table
263
        {
264
            std::size_t size = vals.values.size();
×
UNCOV
265
            std::size_t sliceSize = size;
×
266

267
            bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values);
×
268
            if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
×
269
                fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
×
270
            else if (showVal && sStart.has_value() && sEnd.has_value())
×
UNCOV
271
                sliceSize = sEnd.value() - sStart.value() + 1;
×
272

273
            if (showVal || segment == BytecodeSegment::HeadersOnly)
×
UNCOV
274
                fmt::println("{} (length: {})", fmt::styled("Constants table", fmt::fg(fmt::color::cyan)), sliceSize);
×
275

UNCOV
276
            for (std::size_t j = 0; j < size; ++j)
×
277
            {
278
                if (auto start = sStart; auto end = sEnd)
×
UNCOV
279
                    showVal = showVal && (j >= start.value() && j <= end.value());
×
280

UNCOV
281
                if (showVal)
×
282
                {
283
                    switch (const auto val = vals.values[j]; val.valueType())
×
UNCOV
284
                    {
×
285
                        case ValueType::Number:
286
                            fmt::println("{}) (Number) {}", j, val.number());
×
UNCOV
287
                            break;
×
288
                        case ValueType::String:
289
                            fmt::println("{}) (String) {}", j, val.string());
×
UNCOV
290
                            break;
×
291
                        case ValueType::PageAddr:
292
                            fmt::println("{}) (PageAddr) {}", j, val.pageAddr());
×
UNCOV
293
                            break;
×
294
                        default:
295
                            fmt::print(fmt::fg(fmt::color::red), "Value type not handled: {}\n", types_to_str[static_cast<std::size_t>(val.valueType())]);
×
296
                            break;
×
297
                    }
×
298
                }
×
UNCOV
299
            }
×
300

301
            if (showVal)
×
302
                fmt::print("\n");
×
303
            if (segment == BytecodeSegment::Values)
×
304
                return;
×
UNCOV
305
        }
×
306

307
        const auto stringify_value = [](const Value& val) -> std::string {
×
308
            switch (val.valueType())
×
UNCOV
309
            {
×
310
                case ValueType::Number:
UNCOV
311
                    return fmt::format("{} (Number)", val.number());
×
312
                case ValueType::String:
UNCOV
313
                    return fmt::format("{} (String)", val.string());
×
314
                case ValueType::PageAddr:
UNCOV
315
                    return fmt::format("{} (PageAddr)", val.pageAddr());
×
316
                default:
UNCOV
317
                    return "";
×
318
            }
UNCOV
319
        };
×
320

321
        enum class ArgKind
322
        {
323
            Symbol,
324
            Value,
325
            Builtin,
326
            Raw
327
        };
328

329
        struct Arg
330
        {
331
            ArgKind kind;
332
            uint16_t arg;
333
        };
334

335
        const std::unordered_map<Instruction, ArgKind> arg_kinds = {
×
336
            { LOAD_SYMBOL, ArgKind::Symbol },
337
            { LOAD_CONST, ArgKind::Value },
338
            { POP_JUMP_IF_TRUE, ArgKind::Raw },
339
            { STORE, ArgKind::Symbol },
340
            { SET_VAL, ArgKind::Symbol },
341
            { POP_JUMP_IF_FALSE, ArgKind::Raw },
342
            { JUMP, ArgKind::Raw },
343
            { CALL, ArgKind::Raw },
344
            { CAPTURE, ArgKind::Symbol },
345
            { BUILTIN, ArgKind::Builtin },
346
            { DEL, ArgKind::Symbol },
347
            { MAKE_CLOSURE, ArgKind::Value },
348
            { GET_FIELD, ArgKind::Symbol },
349
            { PLUGIN, ArgKind::Value },
350
            { LIST, ArgKind::Raw },
351
            { APPEND, ArgKind::Raw },
352
            { CONCAT, ArgKind::Raw },
353
            { APPEND_IN_PLACE, ArgKind::Raw },
354
            { CONCAT_IN_PLACE, ArgKind::Raw }
355
        };
356

UNCOV
357
        const auto color_print_inst = [&syms, &vals, &stringify_value](const std::string& name, std::optional<Arg> arg = std::nullopt) {
×
358
            fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold)));
×
UNCOV
359
            if (arg.has_value())
×
360
            {
UNCOV
361
                switch (auto [kind, idx] = arg.value(); kind)
×
362
                {
×
363
                    case ArgKind::Symbol:
364
                        fmt::print(fmt::fg(fmt::color::green), " {}\n", syms.symbols[idx]);
×
UNCOV
365
                        break;
×
366
                    case ArgKind::Value:
367
                        fmt::print(fmt::fg(fmt::color::magenta), " {}\n", stringify_value(vals.values[idx]));
×
UNCOV
368
                        break;
×
369
                    case ArgKind::Builtin:
370
                        fmt::print(" {}\n", Builtins::builtins[idx].first);
×
371
                        break;
×
372
                    case ArgKind::Raw:
373
                        fmt::print(fmt::fg(fmt::color::red), " ({})\n", idx);
×
374
                        break;
×
UNCOV
375
                }
×
376
            }
×
377
            else
378
                fmt::print("\n");
×
379
        };
×
380

UNCOV
381
        if (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly)
×
382
        {
383
            uint16_t pp = 0;
×
384

385
            for (const auto& page : code_block.pages)
×
386
            {
387
                bool displayCode = true;
×
388

389
                if (auto wanted_page = cPage)
×
390
                    displayCode = pp == wanted_page.value();
×
391

UNCOV
392
                if (displayCode)
×
393
                    fmt::println(
×
UNCOV
394
                        "{} {} (length: {})",
×
395
                        fmt::styled("Code segment", fmt::fg(fmt::color::magenta)),
×
396
                        fmt::styled(pp, fmt::fg(fmt::color::magenta)),
×
397
                        page.size());
×
398

UNCOV
399
                if (page.empty())
×
400
                {
UNCOV
401
                    if (displayCode)
×
402
                        fmt::print("NOP");
×
UNCOV
403
                }
×
404
                else
405
                {
406
                    if (cPage.value_or(pp) != pp)
×
407
                        continue;
×
408
                    if (segment == BytecodeSegment::HeadersOnly)
×
409
                        continue;
×
410
                    if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > page.size()) || (sEnd.value() > page.size())))
×
411
                    {
412
                        fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", page.size());
×
413
                        return;
×
414
                    }
415

416
                    for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4)
×
417
                    {
418
                        const uint8_t inst = page[j];
×
419
                        // TEMP
420
                        const uint8_t padding = page[j + 1];
×
421
                        const auto arg = static_cast<uint16_t>((page[j + 2] << 8) + page[j + 3]);
×
422

423
                        // instruction number
424
                        fmt::print(fmt::fg(fmt::color::cyan), "{:>4}", j / 4);
×
425
                        // padding inst arg arg
426
                        fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]);
×
427

428
                        if (const auto idx = static_cast<std::size_t>(inst); idx < InstructionNames.size())
×
429
                        {
430
                            const auto inst_name = InstructionNames[idx];
×
431
                            if (const auto iinst = static_cast<Instruction>(inst); arg_kinds.contains(iinst))
×
432
                                color_print_inst(inst_name, Arg { arg_kinds.at(iinst), arg });
×
433
                            else
434
                                color_print_inst(inst_name);
×
435
                        }
×
436
                        else
437
                            fmt::println("Unknown instruction");
×
438
                    }
×
439
                }
440
                if (displayCode && segment != BytecodeSegment::HeadersOnly)
×
441
                    fmt::print("\n");
×
442

443
                ++pp;
×
444
            }
×
445
        }
×
446
    }
×
447

448
    uint16_t BytecodeReader::readNumber(std::size_t& i) const
289✔
449
    {
289✔
450
        const auto x = static_cast<uint16_t>(m_bytecode[i] << 8);
289✔
451
        const uint16_t y = m_bytecode[++i];
289✔
452
        return x + y;
578✔
453
    }
289✔
454
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc