• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ArkScript-lang / Ark / 15214772429

23 May 2025 04:19PM UTC coverage: 86.895% (+0.2%) from 86.726%
15214772429

push

github

SuperFola
feat(compiler, vm): adding new AT_SYM_SYM and AT_SYM_INDEX_SYM_INDEX super instructions to get elements from list in a single instruction

40 of 44 new or added lines in 2 files covered. (90.91%)

178 existing lines in 8 files now uncovered.

7095 of 8165 relevant lines covered (86.9%)

86688.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.2
/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp
1
#include <Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp>
2

3
#include <chrono>
4
#include <utility>
5
#include <optional>
6
#include <unordered_map>
7
#include <Proxy/Picosha2.hpp>
8
#include <fmt/ostream.h>
9

10
#include <Ark/Constants.hpp>
11
#include <Ark/Literals.hpp>
12
#include <Ark/Compiler/IntermediateRepresentation/InstLoc.hpp>
13
#include <Ark/Compiler/Serialization/IntegerSerializer.hpp>
14
#include <Ark/Compiler/Serialization/IEEE754Serializer.hpp>
15

16
namespace Ark::internal
17
{
18
    using namespace literals;
19

20
    IRCompiler::IRCompiler(const unsigned debug) :
546✔
21
        m_logger("IRCompiler", debug)
273✔
22
    {}
546✔
23

24
    void IRCompiler::process(const std::vector<IR::Block>& pages, const std::vector<std::string>& symbols, const std::vector<ValTableElem>& values)
168✔
25
    {
168✔
26
        m_logger.traceStart("process");
168✔
27
        pushFileHeader();
168✔
28
        pushSymbolTable(symbols);
168✔
29
        pushValueTable(values);
168✔
30

31
        // compute a list of unique filenames
32
        for (const auto& page : pages)
1,680✔
33
        {
34
            for (const auto& inst : page)
40,135✔
35
            {
36
                if (std::ranges::find(m_filenames, inst.filename()) == m_filenames.end() && inst.hasValidSourceLocation())
38,623✔
37
                    m_filenames.push_back(inst.filename());
255✔
38
            }
38,623✔
39
        }
1,512✔
40

41
        pushFilenameTable();
168✔
42
        pushInstLocTable(pages);
168✔
43

44
        m_ir = pages;
168✔
45
        compile();
168✔
46

47
        if (m_ir.empty())
168✔
48
        {
49
            // code segment with a single instruction
50
            m_bytecode.push_back(CODE_SEGMENT_START);
×
51
            m_bytecode.push_back(0_u8);
×
52
            m_bytecode.push_back(1_u8);
×
53

54
            m_bytecode.push_back(0_u8);
×
55
            m_bytecode.push_back(HALT);
×
56
            m_bytecode.push_back(0_u8);
×
57
            m_bytecode.push_back(0_u8);
×
58
        }
×
59

60
        // generate a hash of the tables + bytecode
61
        std::vector<unsigned char> hash_out(picosha2::k_digest_size);
168✔
62
        picosha2::hash256(m_bytecode.begin() + bytecode::HeaderSize, m_bytecode.end(), hash_out);
168✔
63
        m_bytecode.insert(m_bytecode.begin() + bytecode::HeaderSize, hash_out.begin(), hash_out.end());
168✔
64

65
        m_logger.traceEnd();
168✔
66
    }
168✔
67

68
    void IRCompiler::dumpToStream(std::ostream& stream) const
12✔
69
    {
12✔
70
        std::size_t index = 0;
12✔
71
        for (const auto& block : m_ir)
41✔
72
        {
73
            fmt::println(stream, "page_{}", index);
29✔
74
            for (const auto& entity : block)
543✔
75
            {
76
                switch (entity.kind())
514✔
77
                {
32✔
78
                    case IR::Kind::Label:
79
                        fmt::println(stream, ".L{}:", entity.label());
32✔
80
                        break;
59✔
81

82
                    case IR::Kind::Goto:
83
                        fmt::println(stream, "\t{} L{}", InstructionNames[entity.inst()], entity.label());
27✔
84
                        break;
32✔
85

86
                    case IR::Kind::GotoWithArg:
87
                        fmt::println(stream, "\t{} L{}, {}", InstructionNames[entity.inst()], entity.label(), entity.primaryArg());
5✔
88
                        break;
382✔
89

90
                    case IR::Kind::Opcode:
91
                        fmt::println(stream, "\t{} {}", InstructionNames[entity.inst()], entity.primaryArg());
377✔
92
                        break;
450✔
93

94
                    case IR::Kind::Opcode2Args:
95
                        fmt::println(stream, "\t{} {}, {}", InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg());
73✔
96
                        break;
73✔
97
                }
514✔
98
            }
514✔
99

100
            fmt::println(stream, "");
29✔
101
            ++index;
29✔
102
        }
29✔
103
    }
12✔
104

105
    const bytecode_t& IRCompiler::bytecode() const noexcept
168✔
106
    {
168✔
107
        return m_bytecode;
168✔
108
    }
109

110
    void IRCompiler::compile()
168✔
111
    {
168✔
112
        // push the different code segments
113
        for (std::size_t i = 0, end = m_ir.size(); i < end; ++i)
1,680✔
114
        {
115
            IR::Block& page = m_ir[i];
1,512✔
116
            // just in case we got too far, always add a HALT to be sure the
117
            // VM won't do anything crazy
118
            page.emplace_back(HALT);
1,512✔
119

120
            // push number of elements
121
            const auto page_size = std::ranges::count_if(page, [](const auto& a) {
41,647✔
122
                return a.kind() != IR::Kind::Label;
40,135✔
123
            });
124
            if (std::cmp_greater(page_size, std::numeric_limits<uint16_t>::max()))
1,512✔
UNCOV
125
                throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i));
×
126

127
            m_bytecode.push_back(CODE_SEGMENT_START);
1,512✔
128
            serializeOn2BytesToVecBE(page_size, m_bytecode);
1,512✔
129

130
            // register labels position
131
            uint16_t pos = 0;
1,512✔
132
            std::unordered_map<IR::label_t, uint16_t> label_to_position;
1,512✔
133
            for (auto& inst : page)
41,647✔
134
            {
135
                switch (inst.kind())
40,135✔
136
                {
3,398✔
137
                    case IR::Kind::Label:
138
                        label_to_position[inst.label()] = pos;
3,398✔
139
                        break;
40,135✔
140

141
                    default:
142
                        ++pos;
36,737✔
143
                }
40,135✔
144
            }
40,135✔
145

146
            for (auto& inst : page)
41,647✔
147
            {
148
                switch (inst.kind())
40,135✔
149
                {
2,992✔
150
                    case IR::Kind::Goto:
151
                        pushWord(Word(inst.inst(), label_to_position[inst.label()]));
2,992✔
152
                        break;
3,407✔
153

154
                    case IR::Kind::GotoWithArg:
155
                        pushWord(Word(inst.inst(), inst.primaryArg(), label_to_position[inst.label()]));
415✔
156
                        break;
33,745✔
157

158
                    case IR::Kind::Opcode:
159
                        [[fallthrough]];
160
                    case IR::Kind::Opcode2Args:
161
                        pushWord(inst.bytecode());
33,330✔
162
                        break;
36,728✔
163

164
                    default:
165
                        break;
3,398✔
166
                }
40,135✔
167
            }
40,135✔
168
        }
1,512✔
169
    }
168✔
170

171
    void IRCompiler::pushWord(const Word& word)
36,737✔
172
    {
36,737✔
173
        m_bytecode.push_back(word.opcode);
36,737✔
174
        m_bytecode.push_back(word.byte_1);
36,737✔
175
        m_bytecode.push_back(word.byte_2);
36,737✔
176
        m_bytecode.push_back(word.byte_3);
36,737✔
177
    }
36,737✔
178

179
    void IRCompiler::pushFileHeader() noexcept
168✔
180
    {
168✔
181
        /*
182
            Generating headers:
183
                - lang name (to be sure we are executing an ArkScript file)
184
                    on 4 bytes (ark + padding)
185
                - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
186
                - timestamp (8 bytes, unix format)
187
        */
188

189
        m_bytecode.push_back('a');
168✔
190
        m_bytecode.push_back('r');
168✔
191
        m_bytecode.push_back('k');
168✔
192
        m_bytecode.push_back(0_u8);
168✔
193

194
        // push version
195
        for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
672✔
196
            serializeOn2BytesToVecBE(n, m_bytecode);
504✔
197

198
        // push timestamp
199
        const long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
168✔
200
                                        std::chrono::system_clock::now().time_since_epoch())
168✔
201
                                        .count();
168✔
202
        for (long i = 0; i < 8; ++i)
1,512✔
203
        {
204
            const long shift = 8 * (7 - i);
1,344✔
205
            const auto ts_byte = static_cast<uint8_t>((timestamp & (0xffLL << shift)) >> shift);
1,344✔
206
            m_bytecode.push_back(ts_byte);
1,344✔
207
        }
1,344✔
208
    }
168✔
209

210
    void IRCompiler::pushSymbolTable(const std::vector<std::string>& symbols)
168✔
211
    {
168✔
212
        const std::size_t symbol_size = symbols.size();
168✔
213
        if (symbol_size > std::numeric_limits<uint16_t>::max())
168✔
UNCOV
214
            throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size));
×
215

216
        m_bytecode.push_back(SYM_TABLE_START);
168✔
217
        serializeOn2BytesToVecBE(symbol_size, m_bytecode);
168✔
218

219
        for (const auto& sym : symbols)
2,886✔
220
        {
221
            // push the string, null terminated
222
            std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) {
27,099✔
223
                return static_cast<uint8_t>(i);
24,381✔
224
            });
225
            m_bytecode.push_back(0_u8);
2,718✔
226
        }
2,718✔
227
    }
168✔
228

229
    void IRCompiler::pushValueTable(const std::vector<ValTableElem>& values)
168✔
230
    {
168✔
231
        const std::size_t value_size = values.size();
168✔
232
        if (value_size > std::numeric_limits<uint16_t>::max())
168✔
UNCOV
233
            throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size));
×
234

235
        m_bytecode.push_back(VAL_TABLE_START);
168✔
236
        serializeOn2BytesToVecBE(value_size, m_bytecode);
168✔
237

238
        for (const ValTableElem& val : values)
3,472✔
239
        {
240
            switch (val.type)
3,304✔
241
            {
643✔
242
                case ValTableElemType::Number:
243
                {
244
                    m_bytecode.push_back(NUMBER_TYPE);
643✔
245
                    const auto n = std::get<double>(val.value);
643✔
246
                    const auto [exponent, mantissa] = ieee754::serialize(n);
643✔
247
                    serializeToVecLE(exponent, m_bytecode);
643✔
248
                    serializeToVecLE(mantissa, m_bytecode);
643✔
249
                    break;
250
                }
1,960✔
251

252
                case ValTableElemType::String:
253
                {
254
                    m_bytecode.push_back(STRING_TYPE);
1,317✔
255
                    auto t = std::get<std::string>(val.value);
1,317✔
256
                    std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
24,452✔
257
                        return static_cast<uint8_t>(i);
23,135✔
258
                    });
259
                    break;
260
                }
2,661✔
261

262
                case ValTableElemType::PageAddr:
263
                {
264
                    m_bytecode.push_back(FUNC_TYPE);
1,344✔
265
                    const std::size_t addr = std::get<std::size_t>(val.value);
1,344✔
266
                    serializeOn2BytesToVecBE(addr, m_bytecode);
1,344✔
267
                    break;
268
                }
1,344✔
269
            }
3,304✔
270

271
            m_bytecode.push_back(0_u8);
3,304✔
272
        }
3,304✔
273
    }
168✔
274

275
    void IRCompiler::pushFilenameTable()
168✔
276
    {
168✔
277
        if (m_filenames.size() > std::numeric_limits<uint16_t>::max())
168✔
UNCOV
278
            throw std::overflow_error(fmt::format("Too many filenames: {}, exceeds the maximum size of 2^16 - 1", m_filenames.size()));
×
279

280
        m_bytecode.push_back(FILENAMES_TABLE_START);
168✔
281
        // push number of elements
282
        serializeOn2BytesToVecBE(m_filenames.size(), m_bytecode);
168✔
283

284
        for (const auto& name : m_filenames)
423✔
285
        {
286
            std::ranges::transform(name, std::back_inserter(m_bytecode), [](const char i) {
20,259✔
287
                return static_cast<uint8_t>(i);
20,004✔
288
            });
289
            m_bytecode.push_back(0_u8);
255✔
290
        }
255✔
291
    }
168✔
292

293
    void IRCompiler::pushInstLocTable(const std::vector<IR::Block>& pages)
168✔
294
    {
168✔
295
        std::vector<internal::InstLoc> locations;
168✔
296
        for (std::size_t i = 0, end = pages.size(); i < end; ++i)
1,680✔
297
        {
298
            const auto& page = pages[i];
1,512✔
299
            uint16_t ip = 0;
1,512✔
300

301
            for (const auto& inst : page)
40,135✔
302
            {
303
                if (inst.hasValidSourceLocation())
38,623✔
304
                {
305
                    // we are guaranteed to have a value since we listed all existing filenames in IRCompiler::process before,
306
                    // thus we do not have to check if std::ranges::find returned a valid iterator.
307
                    auto file_id = static_cast<uint16_t>(std::distance(m_filenames.begin(), std::ranges::find(m_filenames, inst.filename())));
15,903✔
308

309
                    std::optional<internal::InstLoc> prev = std::nullopt;
15,903✔
310
                    if (!locations.empty())
15,903✔
311
                        prev = locations.back();
15,735✔
312

313
                    // skip redundant instruction location
314
                    if (!(prev.has_value() && prev->filename_id == file_id && prev->line == inst.sourceLine() && prev->page_pointer == i))
15,903✔
315
                        locations.push_back(
9,269✔
316
                            { .page_pointer = static_cast<uint16_t>(i),
37,076✔
317
                              .inst_pointer = ip,
9,269✔
318
                              .filename_id = file_id,
9,269✔
319
                              .line = static_cast<uint32_t>(inst.sourceLine()) });
9,269✔
320
                }
15,903✔
321

322
                if (inst.kind() != IR::Kind::Label)
38,623✔
323
                    ++ip;
35,225✔
324
            }
38,623✔
325
        }
1,512✔
326

327
        m_bytecode.push_back(INST_LOC_TABLE_START);
168✔
328
        serializeOn2BytesToVecBE(locations.size(), m_bytecode);
168✔
329

330
        std::optional<internal::InstLoc> prev = std::nullopt;
168✔
331

332
        for (const auto& loc : locations)
9,437✔
333
        {
334
            serializeOn2BytesToVecBE(loc.page_pointer, m_bytecode);
9,269✔
335
            serializeOn2BytesToVecBE(loc.inst_pointer, m_bytecode);
9,269✔
336
            serializeOn2BytesToVecBE(loc.filename_id, m_bytecode);
9,269✔
337
            serializeToVecBE(loc.line, m_bytecode);
9,269✔
338

339
            prev = loc;
9,269✔
340
        }
9,269✔
341
    }
168✔
342
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc