• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ArkScript-lang / Ark / 14600291923

22 Apr 2025 04:53PM UTC coverage: 83.196% (+2.8%) from 80.417%
14600291923

Pull #530

github

web-flow
Merge 8232728b1 into 32828a045
Pull Request #530: Feat/inst locations

290 of 380 new or added lines in 20 files covered. (76.32%)

7 existing lines in 3 files now uncovered.

6560 of 7885 relevant lines covered (83.2%)

79291.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.31
/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp
1
#include <Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp>
2

3
#include <chrono>
4
#include <utility>
5
#include <unordered_map>
6
#include <Proxy/Picosha2.hpp>
7
#include <fmt/ostream.h>
8

9
#include <Ark/Constants.hpp>
10
#include <Ark/Literals.hpp>
11
#include <Ark/Compiler/IntermediateRepresentation/InstLoc.hpp>
12
#include <Ark/Compiler/Serialization/IntegerSerializer.hpp>
13
#include <Ark/Compiler/Serialization/IEEE754Serializer.hpp>
14

15
namespace Ark::internal
16
{
17
    using namespace literals;
18

19
    IRCompiler::IRCompiler(const unsigned debug) :
454✔
20
        m_logger("IRCompiler", debug)
227✔
21
    {}
454✔
22

23
    void IRCompiler::process(const std::vector<IR::Block>& pages, const std::vector<std::string>& symbols, const std::vector<ValTableElem>& values)
123✔
24
    {
123✔
25
        m_logger.traceStart("process");
123✔
26
        pushFileHeader();
123✔
27
        pushSymbolTable(symbols);
123✔
28
        pushValueTable(values);
123✔
29

30
        // compute a list of unique filenames
31
        for (const auto& page : pages)
1,527✔
32
        {
33
            for (const auto& inst : page)
42,227✔
34
            {
35
                if (std::ranges::find(m_filenames, inst.filename()) == m_filenames.end() && inst.hasValidSourceLocation())
40,823✔
36
                    m_filenames.push_back(inst.filename());
210✔
37
            }
40,823✔
38
        }
1,404✔
39

40
        pushFilenameTable();
123✔
41
        pushInstLocTable(pages);
123✔
42

43
        m_ir = pages;
123✔
44
        compile();
123✔
45

46
        if (m_ir.empty())
123✔
47
        {
48
            // code segment with a single instruction
49
            m_bytecode.push_back(CODE_SEGMENT_START);
×
50
            m_bytecode.push_back(0_u8);
×
51
            m_bytecode.push_back(1_u8);
×
52

53
            m_bytecode.push_back(0_u8);
×
54
            m_bytecode.push_back(HALT);
×
55
            m_bytecode.push_back(0_u8);
×
56
            m_bytecode.push_back(0_u8);
×
57
        }
×
58

59
        // generate a hash of the tables + bytecode
60
        std::vector<unsigned char> hash_out(picosha2::k_digest_size);
123✔
61
        picosha2::hash256(m_bytecode.begin() + bytecode::HeaderSize, m_bytecode.end(), hash_out);
123✔
62
        m_bytecode.insert(m_bytecode.begin() + bytecode::HeaderSize, hash_out.begin(), hash_out.end());
123✔
63

64
        m_logger.traceEnd();
123✔
65
    }
123✔
66

67
    void IRCompiler::dumpToStream(std::ostream& stream) const
11✔
68
    {
11✔
69
        std::size_t index = 0;
11✔
70
        for (const auto& block : m_ir)
37✔
71
        {
72
            fmt::println(stream, "page_{}", index);
26✔
73
            for (const auto& entity : block)
498✔
74
            {
75
                switch (entity.kind())
472✔
76
                {
24✔
77
                    case IR::Kind::Label:
78
                        fmt::println(stream, ".L{}:", entity.label());
24✔
79
                        break;
36✔
80

81
                    case IR::Kind::Goto:
82
                        fmt::println(stream, "\tGOTO L{}", entity.label());
12✔
83
                        break;
20✔
84

85
                    case IR::Kind::GotoIfTrue:
86
                        fmt::println(stream, "\tGOTO_IF_TRUE L{}", entity.label());
8✔
87
                        break;
12✔
88

89
                    case IR::Kind::GotoIfFalse:
90
                        fmt::println(stream, "\tGOTO_IF_FALSE L{}", entity.label());
4✔
91
                        break;
378✔
92

93
                    case IR::Kind::Opcode:
94
                        fmt::println(stream, "\t{} {}", InstructionNames[entity.inst()], entity.primaryArg());
374✔
95
                        break;
424✔
96

97
                    case IR::Kind::Opcode2Args:
98
                        fmt::println(stream, "\t{} {}, {}", InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg());
50✔
99
                        break;
50✔
100
                }
472✔
101
            }
472✔
102

103
            fmt::println(stream, "");
26✔
104
            ++index;
26✔
105
        }
26✔
106
    }
11✔
107

108
    const bytecode_t& IRCompiler::bytecode() const noexcept
123✔
109
    {
123✔
110
        return m_bytecode;
123✔
111
    }
112

113
    void IRCompiler::compile()
123✔
114
    {
123✔
115
        // push the different code segments
116
        for (std::size_t i = 0, end = m_ir.size(); i < end; ++i)
1,527✔
117
        {
118
            IR::Block& page = m_ir[i];
1,404✔
119
            // just in case we got too far, always add a HALT to be sure the
120
            // VM won't do anything crazy
121
            page.emplace_back(HALT);
1,404✔
122

123
            // push number of elements
124
            const auto page_size = std::ranges::count_if(page, [](const auto& a) {
43,631✔
125
                return a.kind() != IR::Kind::Label;
42,227✔
126
            });
127
            if (std::cmp_greater(page_size, std::numeric_limits<uint16_t>::max()))
1,404✔
128
                throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i));
×
129

130
            m_bytecode.push_back(CODE_SEGMENT_START);
1,404✔
131
            serializeOn2BytesToVecBE(page_size, m_bytecode);
1,404✔
132

133
            // register labels position
134
            uint16_t pos = 0;
1,404✔
135
            std::unordered_map<IR::label_t, uint16_t> label_to_position;
1,404✔
136
            for (auto& inst : page)
43,631✔
137
            {
138
                switch (inst.kind())
42,227✔
139
                {
3,124✔
140
                    case IR::Kind::Label:
141
                        label_to_position[inst.label()] = pos;
3,124✔
142
                        break;
42,227✔
143

144
                    default:
145
                        ++pos;
39,103✔
146
                }
42,227✔
147
            }
42,227✔
148

149
            for (auto& inst : page)
43,631✔
150
            {
151
                switch (inst.kind())
42,227✔
152
                {
1,499✔
153
                    case IR::Kind::Goto:
154
                        pushWord(Word(JUMP, label_to_position[inst.label()]));
1,499✔
155
                        break;
2,515✔
156

157
                    case IR::Kind::GotoIfTrue:
158
                        pushWord(Word(POP_JUMP_IF_TRUE, label_to_position[inst.label()]));
1,016✔
159
                        break;
1,634✔
160

161
                    case IR::Kind::GotoIfFalse:
162
                        pushWord(Word(POP_JUMP_IF_FALSE, label_to_position[inst.label()]));
618✔
163
                        break;
36,588✔
164

165
                    case IR::Kind::Opcode:
166
                        [[fallthrough]];
167
                    case IR::Kind::Opcode2Args:
168
                        pushWord(inst.bytecode());
35,970✔
169
                        break;
39,094✔
170

171
                    default:
172
                        break;
3,124✔
173
                }
42,227✔
174
            }
42,227✔
175
        }
1,404✔
176
    }
123✔
177

178
    void IRCompiler::pushWord(const Word& word)
39,103✔
179
    {
39,103✔
180
        m_bytecode.push_back(word.opcode);
39,103✔
181
        m_bytecode.push_back(word.byte_1);
39,103✔
182
        m_bytecode.push_back(word.byte_2);
39,103✔
183
        m_bytecode.push_back(word.byte_3);
39,103✔
184
    }
39,103✔
185

186
    void IRCompiler::pushFileHeader() noexcept
123✔
187
    {
123✔
188
        /*
189
            Generating headers:
190
                - lang name (to be sure we are executing an ArkScript file)
191
                    on 4 bytes (ark + padding)
192
                - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
193
                - timestamp (8 bytes, unix format)
194
        */
195

196
        m_bytecode.push_back('a');
123✔
197
        m_bytecode.push_back('r');
123✔
198
        m_bytecode.push_back('k');
123✔
199
        m_bytecode.push_back(0_u8);
123✔
200

201
        // push version
202
        for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
492✔
203
            serializeOn2BytesToVecBE(n, m_bytecode);
369✔
204

205
        // push timestamp
206
        const long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
123✔
207
                                        std::chrono::system_clock::now().time_since_epoch())
123✔
208
                                        .count();
123✔
209
        for (long i = 0; i < 8; ++i)
1,107✔
210
        {
211
            const long shift = 8 * (7 - i);
984✔
212
            const auto ts_byte = static_cast<uint8_t>((timestamp & (0xffLL << shift)) >> shift);
984✔
213
            m_bytecode.push_back(ts_byte);
984✔
214
        }
984✔
215
    }
123✔
216

217
    void IRCompiler::pushSymbolTable(const std::vector<std::string>& symbols)
123✔
218
    {
123✔
219
        const std::size_t symbol_size = symbols.size();
123✔
220
        if (symbol_size > std::numeric_limits<uint16_t>::max())
123✔
221
            throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size));
×
222

223
        m_bytecode.push_back(SYM_TABLE_START);
123✔
224
        serializeOn2BytesToVecBE(symbol_size, m_bytecode);
123✔
225

226
        for (const auto& sym : symbols)
2,690✔
227
        {
228
            // push the string, null terminated
229
            std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) {
25,783✔
230
                return static_cast<uint8_t>(i);
23,216✔
231
            });
232
            m_bytecode.push_back(0_u8);
2,567✔
233
        }
2,567✔
234
    }
123✔
235

236
    void IRCompiler::pushValueTable(const std::vector<ValTableElem>& values)
123✔
237
    {
123✔
238
        const std::size_t value_size = values.size();
123✔
239
        if (value_size > std::numeric_limits<uint16_t>::max())
123✔
240
            throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size));
×
241

242
        m_bytecode.push_back(VAL_TABLE_START);
123✔
243
        serializeOn2BytesToVecBE(value_size, m_bytecode);
123✔
244

245
        for (const ValTableElem& val : values)
3,239✔
246
        {
247
            switch (val.type)
3,116✔
248
            {
615✔
249
                case ValTableElemType::Number:
250
                {
251
                    m_bytecode.push_back(NUMBER_TYPE);
615✔
252
                    const auto n = std::get<double>(val.value);
615✔
253
                    const auto [exponent, mantissa] = ieee754::serialize(n);
615✔
254
                    serializeToVecLE(exponent, m_bytecode);
615✔
255
                    serializeToVecLE(mantissa, m_bytecode);
615✔
256
                    break;
257
                }
1,835✔
258

259
                case ValTableElemType::String:
260
                {
261
                    m_bytecode.push_back(STRING_TYPE);
1,220✔
262
                    auto t = std::get<std::string>(val.value);
1,220✔
263
                    std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
22,318✔
264
                        return static_cast<uint8_t>(i);
21,098✔
265
                    });
266
                    break;
267
                }
2,501✔
268

269
                case ValTableElemType::PageAddr:
270
                {
271
                    m_bytecode.push_back(FUNC_TYPE);
1,281✔
272
                    const std::size_t addr = std::get<std::size_t>(val.value);
1,281✔
273
                    serializeOn2BytesToVecBE(addr, m_bytecode);
1,281✔
274
                    break;
275
                }
1,281✔
276
            }
3,116✔
277

278
            m_bytecode.push_back(0_u8);
3,116✔
279
        }
3,116✔
280
    }
123✔
281

282
    void IRCompiler::pushFilenameTable()
123✔
283
    {
123✔
284
        if (m_filenames.size() > std::numeric_limits<uint16_t>::max())
123✔
NEW
285
            throw std::overflow_error(fmt::format("Too many filenames: {}, exceeds the maximum size of 2^16 - 1", m_filenames.size()));
×
286

287
        m_bytecode.push_back(FILENAMES_TABLE_START);
123✔
288
        // push number of elements
289
        serializeOn2BytesToVecBE(m_filenames.size(), m_bytecode);
123✔
290

291
        for (const auto& name : m_filenames)
333✔
292
        {
293
            std::ranges::transform(name, std::back_inserter(m_bytecode), [](const char i) {
15,717✔
294
                return static_cast<uint8_t>(i);
15,507✔
295
            });
296
            m_bytecode.push_back(0_u8);
210✔
297
        }
210✔
298
    }
123✔
299

300
    void IRCompiler::pushInstLocTable(const std::vector<IR::Block>& pages)
123✔
301
    {
123✔
302
        std::vector<internal::InstLoc> locations;
123✔
303
        for (std::size_t i = 0, end = pages.size(); i < end; ++i)
1,527✔
304
        {
305
            const auto& page = pages[i];
1,404✔
306
            uint16_t ip = 0;
1,404✔
307

308
            for (const auto& inst : page)
42,227✔
309
            {
310
                if (inst.hasValidSourceLocation())
40,823✔
311
                {
312
                    // we are guaranteed to have a value since we listed all existing filenames in IRCompiler::process before,
313
                    // thus we do not have to check if std::ranges::find returned a valid iterator.
314
                    auto file_id = static_cast<uint16_t>(std::distance(m_filenames.begin(), std::ranges::find(m_filenames, inst.filename())));
14,911✔
315

316
                    std::optional<internal::InstLoc> prev = std::nullopt;
14,911✔
317
                    if (!locations.empty())
14,911✔
318
                        prev = locations.back();
14,788✔
319

320
                    // skip redundant instruction location
321
                    if (!(prev.has_value() && prev->filename_id == file_id && prev->line == inst.sourceLine() && prev->page_pointer == i))
14,911✔
322
                        locations.push_back(
8,579✔
323
                            { .page_pointer = static_cast<uint16_t>(i),
34,316✔
324
                              .inst_pointer = ip,
8,579✔
325
                              .filename_id = file_id,
8,579✔
326
                              .line = static_cast<uint32_t>(inst.sourceLine()) });
8,579✔
327
                }
14,911✔
328

329
                if (inst.kind() != IR::Kind::Label)
40,823✔
330
                    ++ip;
37,699✔
331
            }
40,823✔
332
        }
1,404✔
333

334
        m_bytecode.push_back(INST_LOC_TABLE_START);
123✔
335
        serializeOn2BytesToVecBE(locations.size(), m_bytecode);
123✔
336

337
        std::optional<internal::InstLoc> prev = std::nullopt;
123✔
338

339
        for (const auto& loc : locations)
8,702✔
340
        {
341
            serializeOn2BytesToVecBE(loc.page_pointer, m_bytecode);
8,579✔
342
            serializeOn2BytesToVecBE(loc.inst_pointer, m_bytecode);
8,579✔
343
            serializeOn2BytesToVecBE(loc.filename_id, m_bytecode);
8,579✔
344
            serializeToVecBE(loc.line, m_bytecode);
8,579✔
345

346
            prev = loc;
8,579✔
347
        }
8,579✔
348
    }
123✔
349
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc