• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ArkScript-lang / Ark / 20376190996

19 Dec 2025 04:33PM UTC coverage: 90.499% (-0.2%) from 90.661%
20376190996

Pull #623

github

web-flow
Merge 536011ee7 into c2be10ac3
Pull Request #623: feat(iroptimizer, vm): …

64 of 87 new or added lines in 7 files covered. (73.56%)

1 existing line in 1 file now uncovered.

8258 of 9125 relevant lines covered (90.5%)

244847.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.3
/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp
1
#include <Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp>
2

3
#include <chrono>
4
#include <utility>
5
#include <optional>
6
#include <unordered_map>
7
#include <Proxy/Picosha2.hpp>
8
#include <fmt/ostream.h>
9

10
#include <Ark/Constants.hpp>
11
#include <Ark/Utils/Literals.hpp>
12
#include <Ark/Compiler/IntermediateRepresentation/InstLoc.hpp>
13
#include <Ark/Compiler/Serialization/IntegerSerializer.hpp>
14
#include <Ark/Compiler/Serialization/IEEE754Serializer.hpp>
15

16
namespace Ark::internal
17
{
18
    using namespace literals;
19

20
    IRCompiler::IRCompiler(const unsigned debug) :
700✔
21
        m_logger("IRCompiler", debug)
350✔
22
    {}
700✔
23

24
    void IRCompiler::process(const std::vector<IR::Block>& pages, const std::vector<std::string>& symbols, const std::vector<ValTableElem>& values)
225✔
25
    {
225✔
26
        m_logger.traceStart("process");
225✔
27
        pushFileHeader();
225✔
28
        pushSymbolTable(symbols);
225✔
29
        pushValueTable(values);
225✔
30

31
        // compute a list of unique filenames
32
        for (const auto& page : pages)
4,058✔
33
        {
34
            for (const auto& inst : page)
101,611✔
35
            {
36
                if (std::ranges::find(m_filenames, inst.filename()) == m_filenames.end() && inst.hasValidSourceLocation())
97,778✔
37
                    m_filenames.push_back(inst.filename());
359✔
38
            }
97,778✔
39
        }
3,833✔
40

41
        pushFilenameTable();
225✔
42
        pushInstLocTable(pages);
225✔
43

44
        m_ir = pages;
225✔
45
        compile();
225✔
46

47
        if (m_ir.empty())
225✔
48
        {
49
            // code segment with a single instruction
50
            m_bytecode.push_back(CODE_SEGMENT_START);
×
51
            m_bytecode.push_back(0_u8);
×
52
            m_bytecode.push_back(1_u8);
×
53

54
            m_bytecode.push_back(0_u8);
×
55
            m_bytecode.push_back(HALT);
×
56
            m_bytecode.push_back(0_u8);
×
57
            m_bytecode.push_back(0_u8);
×
58
        }
×
59

60
        // generate a hash of the tables + bytecode
61
        std::vector<unsigned char> hash_out(picosha2::k_digest_size);
225✔
62
        picosha2::hash256(m_bytecode.begin() + bytecode::HeaderSize, m_bytecode.end(), hash_out);
225✔
63
        m_bytecode.insert(m_bytecode.begin() + bytecode::HeaderSize, hash_out.begin(), hash_out.end());
225✔
64

65
        m_logger.traceEnd();
225✔
66
    }
225✔
67

68
    void IRCompiler::dumpToStream(std::ostream& stream) const
17✔
69
    {
17✔
70
        std::size_t index = 0;
17✔
71
        for (const auto& block : m_ir)
61✔
72
        {
73
            fmt::println(stream, "page_{}", index);
44✔
74
            for (const auto& entity : block)
888✔
75
            {
76
                switch (entity.kind())
844✔
77
                {
129✔
78
                    case IR::Kind::Label:
79
                        fmt::println(stream, ".L{}:", entity.label());
129✔
80
                        break;
238✔
81

82
                    case IR::Kind::Goto:
83
                        fmt::println(stream, "\t{} L{}", InstructionNames[entity.inst()], entity.label());
109✔
84
                        break;
123✔
85

86
                    case IR::Kind::GotoWithArg:
87
                        fmt::println(stream, "\t{} L{}, {}", InstructionNames[entity.inst()], entity.label(), entity.primaryArg());
14✔
88
                        break;
486✔
89

90
                    case IR::Kind::Opcode:
91
                        fmt::println(stream, "\t{} {}", InstructionNames[entity.inst()], entity.primaryArg());
472✔
92
                        break;
592✔
93

94
                    case IR::Kind::Opcode2Args:
95
                        fmt::println(stream, "\t{} {}, {}", InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg());
120✔
96
                        break;
120✔
97

98
                    case IR::Kind::Opcode3Args:
NEW
99
                        fmt::println(stream, "\t{} {}, {}, {}", InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg(), entity.tertiaryArg());
×
NEW
100
                        break;
×
101
                }
844✔
102
            }
844✔
103

104
            fmt::println(stream, "");
44✔
105
            ++index;
44✔
106
        }
44✔
107
    }
17✔
108

109
    const bytecode_t& IRCompiler::bytecode() const noexcept
225✔
110
    {
225✔
111
        return m_bytecode;
225✔
112
    }
113

114
    void IRCompiler::compile()
225✔
115
    {
225✔
116
        // push the different code segments
117
        for (std::size_t i = 0, end = m_ir.size(); i < end; ++i)
4,058✔
118
        {
119
            IR::Block& page = m_ir[i];
3,833✔
120
            // just in case we got too far, always add a HALT to be sure the
121
            // VM won't do anything crazy
122
            page.emplace_back(HALT);
3,833✔
123

124
            // push number of elements
125
            const auto page_size = std::ranges::count_if(page, [](const auto& a) {
105,444✔
126
                return a.kind() != IR::Kind::Label;
101,611✔
127
            });
128
            if (std::cmp_greater(page_size, MaxValue16Bits))
3,833✔
129
                throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of {}", i, MaxValue16Bits));
×
130

131
            m_bytecode.push_back(CODE_SEGMENT_START);
3,833✔
132
            serializeOn2BytesToVecBE(page_size, m_bytecode);
3,833✔
133

134
            // register labels position
135
            uint16_t pos = 0;
3,833✔
136
            std::unordered_map<IR::label_t, uint16_t> label_to_position;
3,833✔
137
            for (auto& inst : page)
105,444✔
138
            {
139
                switch (inst.kind())
101,611✔
140
                {
14,663✔
141
                    case IR::Kind::Label:
142
                        label_to_position[inst.label()] = pos;
14,663✔
143
                        break;
101,611✔
144

145
                    default:
146
                        ++pos;
86,948✔
147
                }
101,611✔
148
            }
101,611✔
149

150
            for (auto& inst : page)
105,444✔
151
            {
152
                switch (inst.kind())
101,611✔
153
                {
12,694✔
154
                    case IR::Kind::Goto:
155
                        pushWord(Word(inst.inst(), label_to_position[inst.label()]));
12,694✔
156
                        break;
14,018✔
157

158
                    case IR::Kind::GotoWithArg:
159
                        pushWord(Word(inst.inst(), inst.primaryArg(), label_to_position[inst.label()]));
1,324✔
160
                        break;
74,254✔
161

162
                    case IR::Kind::Opcode:
163
                        [[fallthrough]];
164
                    case IR::Kind::Opcode2Args:
165
                        [[fallthrough]];
166
                    case IR::Kind::Opcode3Args:
167
                        pushWord(inst.bytecode());
72,930✔
168
                        break;
87,593✔
169

170
                    default:
171
                        break;
14,663✔
172
                }
101,611✔
173
            }
101,611✔
174
        }
3,833✔
175
    }
225✔
176

177
    void IRCompiler::pushWord(const Word& word)
86,948✔
178
    {
86,948✔
179
        m_bytecode.push_back(word.opcode);
86,948✔
180
        m_bytecode.push_back(word.byte_1);
86,948✔
181
        m_bytecode.push_back(word.byte_2);
86,948✔
182
        m_bytecode.push_back(word.byte_3);
86,948✔
183
    }
86,948✔
184

185
    void IRCompiler::pushFileHeader() noexcept
225✔
186
    {
225✔
187
        /*
188
            Generating headers:
189
                - lang name (to be sure we are executing an ArkScript file)
190
                    on 4 bytes (ark + padding)
191
                - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
192
                - timestamp (8 bytes, unix format)
193
        */
194

195
        m_bytecode.push_back('a');
225✔
196
        m_bytecode.push_back('r');
225✔
197
        m_bytecode.push_back('k');
225✔
198
        m_bytecode.push_back(0_u8);
225✔
199

200
        // push version
201
        for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
900✔
202
            serializeOn2BytesToVecBE(n, m_bytecode);
675✔
203

204
        // push timestamp
205
        const long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
225✔
206
                                        std::chrono::system_clock::now().time_since_epoch())
225✔
207
                                        .count();
225✔
208
        for (long i = 0; i < 8; ++i)
2,025✔
209
        {
210
            const long shift = 8 * (7 - i);
1,800✔
211
            const auto ts_byte = static_cast<uint8_t>((timestamp & (0xffLL << shift)) >> shift);
1,800✔
212
            m_bytecode.push_back(ts_byte);
1,800✔
213
        }
1,800✔
214
    }
225✔
215

216
    void IRCompiler::pushSymbolTable(const std::vector<std::string>& symbols)
225✔
217
    {
225✔
218
        const std::size_t symbol_size = symbols.size();
225✔
219
        if (std::cmp_greater(symbol_size, MaxValue16Bits))
225✔
220
            throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of {}", symbol_size, MaxValue16Bits));
×
221

222
        m_bytecode.push_back(SYM_TABLE_START);
225✔
223
        serializeOn2BytesToVecBE(symbol_size, m_bytecode);
225✔
224

225
        for (const auto& sym : symbols)
7,222✔
226
        {
227
            // push the string, null terminated
228
            std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) {
79,264✔
229
                return static_cast<uint8_t>(i);
72,267✔
230
            });
231
            m_bytecode.push_back(0_u8);
6,997✔
232
        }
6,997✔
233
    }
225✔
234

235
    void IRCompiler::pushValueTable(const std::vector<ValTableElem>& values)
225✔
236
    {
225✔
237
        const std::size_t value_size = values.size();
225✔
238
        if (std::cmp_greater(value_size, MaxValue16Bits))
225✔
239
            throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of {}", value_size, MaxValue16Bits));
×
240

241
        m_bytecode.push_back(VAL_TABLE_START);
225✔
242
        serializeOn2BytesToVecBE(value_size, m_bytecode);
225✔
243

244
        for (const ValTableElem& val : values)
7,193✔
245
        {
246
            switch (val.type)
6,968✔
247
            {
954✔
248
                case ValTableElemType::Number:
249
                {
250
                    m_bytecode.push_back(NUMBER_TYPE);
954✔
251
                    const auto n = std::get<double>(val.value);
954✔
252
                    const auto [exponent, mantissa] = ieee754::serialize(n);
954✔
253
                    serializeToVecLE(exponent, m_bytecode);
954✔
254
                    serializeToVecLE(mantissa, m_bytecode);
954✔
255
                    break;
256
                }
3,360✔
257

258
                case ValTableElemType::String:
259
                {
260
                    m_bytecode.push_back(STRING_TYPE);
2,406✔
261
                    auto t = std::get<std::string>(val.value);
2,406✔
262
                    std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
47,825✔
263
                        return static_cast<uint8_t>(i);
45,419✔
264
                    });
265
                    break;
266
                }
6,014✔
267

268
                case ValTableElemType::PageAddr:
269
                {
270
                    m_bytecode.push_back(FUNC_TYPE);
3,608✔
271
                    const std::size_t addr = std::get<std::size_t>(val.value);
3,608✔
272
                    serializeOn2BytesToVecBE(addr, m_bytecode);
3,608✔
273
                    break;
274
                }
3,608✔
275
            }
6,968✔
276

277
            m_bytecode.push_back(0_u8);
6,968✔
278
        }
6,968✔
279
    }
225✔
280

281
    void IRCompiler::pushFilenameTable()
225✔
282
    {
225✔
283
        if (std::cmp_greater(m_filenames.size(), MaxValue16Bits))
225✔
284
            throw std::overflow_error(fmt::format("Too many filenames: {}, exceeds the maximum size of {}", m_filenames.size(), MaxValue16Bits));
×
285

286
        m_bytecode.push_back(FILENAMES_TABLE_START);
225✔
287
        // push number of elements
288
        serializeOn2BytesToVecBE(m_filenames.size(), m_bytecode);
225✔
289

290
        for (const auto& name : m_filenames)
584✔
291
        {
292
            std::ranges::transform(name, std::back_inserter(m_bytecode), [](const char i) {
27,533✔
293
                return static_cast<uint8_t>(i);
27,174✔
294
            });
295
            m_bytecode.push_back(0_u8);
359✔
296
        }
359✔
297
    }
225✔
298

299
    void IRCompiler::pushInstLocTable(const std::vector<IR::Block>& pages)
225✔
300
    {
225✔
301
        std::vector<internal::InstLoc> locations;
225✔
302
        for (std::size_t i = 0, end = pages.size(); i < end; ++i)
4,058✔
303
        {
304
            const auto& page = pages[i];
3,833✔
305
            uint16_t ip = 0;
3,833✔
306

307
            for (const auto& inst : page)
101,611✔
308
            {
309
                if (inst.hasValidSourceLocation())
97,778✔
310
                {
311
                    // we are guaranteed to have a value since we listed all existing filenames in IRCompiler::process before,
312
                    // thus we do not have to check if std::ranges::find returned a valid iterator.
313
                    auto file_id = static_cast<uint16_t>(std::distance(m_filenames.begin(), std::ranges::find(m_filenames, inst.filename())));
61,843✔
314

315
                    std::optional<internal::InstLoc> prev = std::nullopt;
61,843✔
316
                    if (!locations.empty())
61,843✔
317
                        prev = locations.back();
61,618✔
318

319
                    // skip redundant instruction location
320
                    if (!(prev.has_value() && prev->filename_id == file_id && prev->line == inst.sourceLine() && prev->page_pointer == i))
61,843✔
321
                        locations.push_back(
24,952✔
322
                            { .page_pointer = static_cast<uint16_t>(i),
99,808✔
323
                              .inst_pointer = ip,
24,952✔
324
                              .filename_id = file_id,
24,952✔
325
                              .line = static_cast<uint32_t>(inst.sourceLine()) });
24,952✔
326
                }
61,843✔
327

328
                if (inst.kind() != IR::Kind::Label)
97,778✔
329
                    ++ip;
83,115✔
330
            }
97,778✔
331
        }
3,833✔
332

333
        m_bytecode.push_back(INST_LOC_TABLE_START);
225✔
334
        serializeOn2BytesToVecBE(locations.size(), m_bytecode);
225✔
335

336
        std::optional<internal::InstLoc> prev = std::nullopt;
225✔
337

338
        for (const auto& loc : locations)
25,177✔
339
        {
340
            serializeOn2BytesToVecBE(loc.page_pointer, m_bytecode);
24,952✔
341
            serializeOn2BytesToVecBE(loc.inst_pointer, m_bytecode);
24,952✔
342
            serializeOn2BytesToVecBE(loc.filename_id, m_bytecode);
24,952✔
343
            serializeToVecBE(loc.line, m_bytecode);
24,952✔
344

345
            prev = loc;
24,952✔
346
        }
24,952✔
347
    }
225✔
348
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc