• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vla5924-practice / compiler-project / 13926013161

18 Mar 2025 02:40PM UTC coverage: 82.886% (+6.2%) from 76.638%
13926013161

Pull #212

github

web-flow
Merge fde87dfba into 993c9a6a4
Pull Request #212: Implement fold control flow optimization

35 of 37 new or added lines in 1 file covered. (94.59%)

234 existing lines in 10 files now uncovered.

4722 of 5697 relevant lines covered (82.89%)

270.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.38
/compiler/lib/frontend/parser/parser.cpp
1
#include "parser/parser.hpp"
2

3
#include <cassert>
4
#include <functional>
5
#include <iterator>
6
#include <memory>
7
#include <stack>
8
#include <stdexcept>
9
#include <string>
10
#include <unordered_map>
11
#include <variant>
12

13
#include "compiler/ast/node.hpp"
14
#include "compiler/ast/node_type.hpp"
15
#include "compiler/ast/types.hpp"
16
#include "compiler/utils/error_buffer.hpp"
17

18
#include "lexer/token.hpp"
19
#include "lexer/token_types.hpp"
20
#include "parser/parser_context.hpp"
21
#include "parser/parser_error.hpp"
22
#include "parser/type_registry.hpp"
23

24
using namespace ast;
25
using namespace lexer;
26
using namespace parser;
27

28
namespace {
29

30
using SubExpression = std::variant<TokenIterator, Node::Ptr>;
31

32
bool isVariableDeclaration(const TokenIterator &tokenIter, const TokenIterator &tokenEnd) {
325✔
33
    if (tokenIter == tokenEnd || std::next(tokenIter) == tokenEnd || std::next(tokenIter, 2) == tokenEnd)
325✔
34
        return false;
13✔
35

36
    const Token &varName = *tokenIter;
312✔
37
    const Token &colon = *std::next(tokenIter);
312✔
38
    const Token &varType = *std::next(tokenIter, 2);
312✔
39
    return varName.type == TokenType::Identifier && colon.is(Special::Colon) || TypeRegistry::isTypename(varType);
312✔
40
}
41

42
enum class OperationType {
43
    Unknown,
44
    Unary,
45
    Binary,
46
};
47

48
enum class ExpressionTokenType {
49
    Unknown,
50
    Operation,
51
    Operand,
52
    OpeningBrace,
53
    ClosingBrace,
54
    RectBrace,
55
};
56

57
OperationType getOperationType(const Token &token) {
738✔
58
    if (token.type == TokenType::Operator) {
738✔
59
        const auto &op = token.op();
703✔
60
        switch (op) {
703✔
61
        case Operator::Add:
703✔
62
        case Operator::Sub:
63
        case Operator::Mult:
64
        case Operator::Div:
65
        case Operator::Equal:
66
        case Operator::NotEqual:
67
        case Operator::Less:
68
        case Operator::Greater:
69
        case Operator::LessEqual:
70
        case Operator::GreaterEqual:
71
        case Operator::Assign:
72
            return OperationType::Binary;
703✔
73
        default:
×
UNCOV
74
            return OperationType::Unknown;
×
75
        }
76
    }
77
    if (token.type == TokenType::Keyword) {
35✔
78
        const auto &kw = token.kw();
35✔
79
        switch (kw) {
35✔
80
        case Keyword::And:
35✔
81
        case Keyword::Or:
82
            return OperationType::Binary;
35✔
83
        case Keyword::Not:
×
84
            return OperationType::Unary;
×
85
        default:
×
UNCOV
86
            return OperationType::Unknown;
×
87
        }
88
    }
UNCOV
89
    return OperationType::Unknown;
×
90
}
91

92
OperationType getOperationType(const Node &node) {
1,218✔
93
    switch (node.type) {
1,218✔
94
    case NodeType::BinaryOperation:
717✔
95
        return OperationType::Binary;
717✔
96
    case NodeType::UnaryOperation:
×
UNCOV
97
        return OperationType::Unary;
×
98
    default:
501✔
99
        return OperationType::Unknown;
501✔
100
    }
101
}
102

103
ExpressionTokenType getExpressionTokenType(const Token &token) {
1,802✔
104
    if (token.type == TokenType::Identifier || token.type == TokenType::IntegerLiteral ||
1,278✔
105
        token.type == TokenType::FloatingPointLiteral || token.type == TokenType::StringLiteral ||
784✔
106
        token.is(Keyword::True) || token.is(Keyword::False))
3,080✔
107
        return ExpressionTokenType::Operand;
1,152✔
108
    if (token.is(Operator::LeftBrace))
650✔
109
        return ExpressionTokenType::OpeningBrace;
58✔
110
    if (token.is(Operator::RightBrace))
592✔
111
        return ExpressionTokenType::ClosingBrace;
29✔
112
    if (token.is(Operator::RectLeftBrace) || token.is(Operator::RectRightBrace))
563✔
113
        return ExpressionTokenType::RectBrace;
64✔
114
    if (getOperationType(token) != OperationType::Unknown)
499✔
115
        return ExpressionTokenType::Operation;
499✔
UNCOV
116
    return ExpressionTokenType::Unknown;
×
117
}
118

119
ExpressionTokenType getExpressionTokenType(const SubExpression &subexpr) {
×
120
    if (std::holds_alternative<Node::Ptr>(subexpr))
×
121
        return ExpressionTokenType::Operand;
×
UNCOV
122
    return getExpressionTokenType(*std::get<TokenIterator>(subexpr));
×
123
}
124

125
BinaryOperation getBinaryOperation(const Token &token) {
425✔
126
    if (token.type == TokenType::Operator) {
425✔
127
        const auto &op = token.op();
399✔
128
        switch (op) {
399✔
129
        case Operator::Add:
138✔
130
            return BinaryOperation::Add;
138✔
131
        case Operator::Sub:
14✔
132
            return BinaryOperation::Sub;
14✔
133
        case Operator::Mult:
49✔
134
            return BinaryOperation::Mult;
49✔
135
        case Operator::Div:
16✔
136
            return BinaryOperation::Div;
16✔
137
        case Operator::Equal:
8✔
138
            return BinaryOperation::Equal;
8✔
139
        case Operator::NotEqual:
2✔
140
            return BinaryOperation::NotEqual;
2✔
141
        case Operator::Less:
5✔
142
            return BinaryOperation::Less;
5✔
143
        case Operator::Greater:
9✔
144
            return BinaryOperation::Greater;
9✔
145
        case Operator::LessEqual:
2✔
146
            return BinaryOperation::LessEqual;
2✔
147
        case Operator::GreaterEqual:
2✔
148
            return BinaryOperation::GreaterEqual;
2✔
149
        case Operator::Assign:
128✔
150
            return BinaryOperation::Assign;
128✔
151
        default:
26✔
152
            return BinaryOperation::Unknown;
26✔
153
        }
154
    }
155
    if (token.type == TokenType::Keyword) {
26✔
156
        const auto &kw = token.kw();
26✔
157
        switch (kw) {
26✔
158
        case Keyword::And:
9✔
159
            return BinaryOperation::And;
9✔
160
        case Keyword::Or:
17✔
161
            return BinaryOperation::Or;
17✔
162
        default:
×
UNCOV
163
            return BinaryOperation::Unknown;
×
164
        }
165
    }
UNCOV
166
    return BinaryOperation::Unknown;
×
167
}
168

169
UnaryOperation getUnaryOperation(const Token &token) {
45✔
170
    if (token.type == TokenType::Operator) {
45✔
171
        const auto &op = token.op();
35✔
172
        switch (op) {
35✔
173
        case Operator::Sub:
35✔
174
            return UnaryOperation::Negative;
35✔
175
        default:
×
UNCOV
176
            return UnaryOperation::Unknown;
×
177
        }
178
    }
179
    if (token.type == TokenType::Keyword) {
10✔
180
        const auto &kw = token.kw();
10✔
181
        switch (kw) {
10✔
182
        case Keyword::Not:
10✔
183
            return UnaryOperation::Not;
10✔
184
        default:
×
UNCOV
185
            return UnaryOperation::Unknown;
×
186
        }
187
    }
UNCOV
188
    return UnaryOperation::Unknown;
×
189
}
190

191
size_t getOperationPriority(const Token &token) {
186✔
192
    BinaryOperation op = getBinaryOperation(token);
186✔
193
    switch (op) {
186✔
194
    case BinaryOperation::Mult:
37✔
195
    case BinaryOperation::Div:
196
        return 10;
37✔
197
    case BinaryOperation::Add:
77✔
198
    case BinaryOperation::Sub:
199
        return 20;
77✔
200
    case BinaryOperation::Less:
4✔
201
    case BinaryOperation::LessEqual:
202
    case BinaryOperation::Greater:
203
    case BinaryOperation::GreaterEqual:
204
        return 30;
4✔
205
    case BinaryOperation::Equal:
2✔
206
    case BinaryOperation::NotEqual:
207
        return 35;
2✔
208
    case BinaryOperation::And:
4✔
209
        return 40;
4✔
210
    case BinaryOperation::Or:
11✔
211
        return 50;
11✔
212
    case BinaryOperation::Assign:
25✔
213
        return 60;
25✔
214
    default:
26✔
215
        return -1;
26✔
216
    }
217
    return -1;
218
}
219

220
size_t getOperandCount(OperationType type) {
1,218✔
221
    if (type == OperationType::Binary)
1,218✔
222
        return 2U;
717✔
223
    if (type == OperationType::Unary)
501✔
UNCOV
224
        return 1U;
×
225
    return -1;
501✔
226
}
227

228
bool isFunctionCall(const TokenIterator &tokenIter) {
1,146✔
229
    return tokenIter->type == TokenType::Identifier && std::next(tokenIter)->is(Operator::LeftBrace);
1,146✔
230
}
231

232
bool isListAccessor(const TokenIterator &tokenIter) {
1,055✔
233
    return tokenIter->type == TokenType::Identifier && std::next(tokenIter)->is(Operator::RectLeftBrace);
1,055✔
234
}
235

236
bool canBeUnaryOperation(const TokenIterator &tokenIter) {
982✔
237
    if (tokenIter->type == TokenType::Operator) {
982✔
238
        return tokenIter->op() == Operator::Sub || tokenIter->op() == Operator::Add;
385✔
239
    }
240
    if (tokenIter->type == TokenType::Keyword) {
597✔
241
        return tokenIter->kw() == Keyword::Not;
27✔
242
    }
243
    return false;
570✔
244
}
245

246
bool isUnaryOperation(const TokenIterator &tokenIter, const ExpressionTokenType &prevTokenIter) {
982✔
247
    return canBeUnaryOperation(tokenIter) &&
1,102✔
248
           (prevTokenIter == ExpressionTokenType::Operation || prevTokenIter == ExpressionTokenType::OpeningBrace);
1,102✔
249
}
250

251
std::string unescapeStringLiteral(const std::string &str) {
20✔
252
    std::string result;
20✔
253
    result.reserve(str.size());
20✔
254
    std::string::value_type prev = '\0';
20✔
255
    bool backslash = false;
20✔
256
    for (auto c : str) {
62✔
257
        if (!backslash && prev == '\\') {
42✔
258
            backslash = false;
7✔
259
            if (c == '\\')
7✔
260
                backslash = true;
1✔
261
            else if (c == 'n')
6✔
262
                result.back() = '\n';
6✔
263
            else if (c == 'r')
×
UNCOV
264
                result.back() = '\r';
×
265
            else if (c == 't')
×
UNCOV
266
                result.back() = '\t';
×
267
            else
UNCOV
268
                result.push_back(c);
×
269
        } else {
270
            result.push_back(c);
35✔
271
        }
272
        prev = c;
42✔
273
    }
274
    result.shrink_to_fit();
20✔
275
    return result;
20✔
UNCOV
276
}
×
277

278
void buildExpressionSubtree(std::stack<SubExpression> &postfixForm, const Node::Ptr &root, ErrorBuffer &errors) {
501✔
279
    Node::Ptr currNode = root;
501✔
280
    while (!postfixForm.empty()) {
1,480✔
281
        const SubExpression &subexpr = postfixForm.top();
979✔
282
        if (std::holds_alternative<TokenIterator>(subexpr)) {
979✔
283
            const Token &token = *std::get<TokenIterator>(subexpr);
815✔
284
            ExpressionTokenType expType = getExpressionTokenType(token);
815✔
285
            if (expType == ExpressionTokenType::Operation) {
815✔
286
                OperationType opType = getOperationType(token);
239✔
287
                if (opType == OperationType::Binary) {
239✔
288
                    currNode = ParserContext::unshiftChildNode(currNode, NodeType::BinaryOperation, token.ref);
239✔
289
                    currNode->value = getBinaryOperation(token);
239✔
UNCOV
290
                } else if (opType == OperationType::Unary) {
×
UNCOV
291
                    currNode = ParserContext::unshiftChildNode(currNode, NodeType::UnaryOperation, token.ref);
×
292
                } else {
UNCOV
293
                    errors.push<ParserError>(token,
×
294
                                             "Unknown operator found in expression, it must be either unary or binary");
295
                }
296
            } else if (expType == ExpressionTokenType::Operand) {
576✔
297
                if (token.type == TokenType::Identifier) {
576✔
298
                    Node::Ptr node = ParserContext::unshiftChildNode(currNode, NodeType::VariableName, token.ref);
262✔
299
                    node->value = token.id();
262✔
300
                } else if (token.type == TokenType::IntegerLiteral) {
576✔
301
                    Node::Ptr node =
302
                        ParserContext::unshiftChildNode(currNode, NodeType::IntegerLiteralValue, token.ref);
247✔
303
                    try {
304
                        node->value = std::stol(token.literal().c_str());
249✔
305
                    } catch (const std::out_of_range &) {
1✔
306
                        errors.push<ParserError>(token,
1✔
307
                                                 "Failed to convert integer literal. The number is out of range");
308
                    }
1✔
309
                } else if (token.type == TokenType::FloatingPointLiteral) {
314✔
310
                    Node::Ptr node =
311
                        ParserContext::unshiftChildNode(currNode, NodeType::FloatingPointLiteralValue, token.ref);
41✔
312
                    try {
313
                        node->value = std::stod(token.literal());
41✔
314
                    } catch (const std::out_of_range &) {
2✔
315
                        errors.push<ParserError>(token, "Failed to convert float literal. The number is out of range");
2✔
316
                    }
2✔
317
                } else if (token.type == TokenType::StringLiteral) {
67✔
318
                    Node::Ptr node = ParserContext::unshiftChildNode(currNode, NodeType::StringLiteralValue, token.ref);
20✔
319
                    node->value = unescapeStringLiteral(token.literal());
20✔
320
                } else if (token.is(Keyword::False) || token.is(Keyword::True)) {
26✔
321
                    Node::Ptr node =
322
                        ParserContext::unshiftChildNode(currNode, NodeType::BooleanLiteralValue, token.ref);
6✔
323
                    if (token.is(Keyword::True))
6✔
324
                        node->value = true;
3✔
325
                    else if (token.is(Keyword::False))
3✔
326
                        node->value = false;
3✔
327
                }
6✔
328
            }
329
        } else {
330
            // can be FunctionCall node and list ListAccessor
331
            Node::Ptr callNode = std::get<Node::Ptr>(subexpr);
164✔
332
            assert(callNode->type == NodeType::FunctionCall or callNode->type == NodeType::ListAccessor or
164✔
333
                   callNode->type == NodeType::UnaryOperation);
334
            callNode->parent = currNode;
164✔
335
            currNode->children.push_front(callNode);
164✔
336
        }
164✔
337
        while (currNode->numChildren() >= getOperandCount(getOperationType(*currNode)))
1,218✔
338
            currNode = currNode->parent;
239✔
339
        postfixForm.pop();
979✔
340
    }
341
}
501✔
342

343
std::stack<SubExpression> generatePostfixForm(TokenIterator tokenIterBegin, TokenIterator tokenIterEnd,
501✔
344
                                              ErrorBuffer &errors) {
345
    std::stack<SubExpression> postfixForm;
501✔
346
    std::stack<TokenIterator> operations;
501✔
347
    ExpressionTokenType prevExpType = ExpressionTokenType::Operation;
501✔
348
    for (auto tokenIter = tokenIterBegin; tokenIter != tokenIterEnd; tokenIter++) {
1,602✔
349
        const Token &token = *tokenIter;
1,101✔
350
        if (isFunctionCall(tokenIter)) {
1,101✔
351
            Node::Ptr funcCallNode = std::make_shared<Node>(NodeType::FunctionCall);
87✔
352
            auto node = ParserContext::pushChildNode(funcCallNode, NodeType::FunctionName, token.ref);
87✔
353
            node->value = token.id();
87✔
354
            auto argsBegin = std::next(tokenIter);
87✔
355
            auto it = argsBegin;
87✔
356
            unsigned nestingLevel = 0;
87✔
357
            do {
358
                if (it->is(Operator::RightBrace))
443✔
359
                    nestingLevel--;
100✔
360
                else if (it->is(Operator::LeftBrace))
343✔
361
                    nestingLevel++;
100✔
362
                it++;
443✔
363
            } while (nestingLevel > 0);
443✔
364
            auto argsEnd = std::prev(it);
87✔
365
            if (std::distance(argsBegin, argsEnd) > 1) {
87✔
366
                auto argsNode = ParserContext::pushChildNode(funcCallNode, NodeType::FunctionArguments, token.ref);
64✔
367
                auto argBegin = std::next(argsBegin);
64✔
368
                for (auto argsIter = argBegin; argsIter != std::next(argsEnd); argsIter++) {
397✔
369
                    if (!argsIter->is(Operator::Comma) && argsIter != argsEnd)
333✔
370
                        continue;
226✔
371
                    const Token &token = *argsIter;
107✔
372
                    std::stack<SubExpression> argPostfixForm = generatePostfixForm(argBegin, argsIter, errors);
107✔
373
                    auto exprNode = ParserContext::pushChildNode(argsNode, NodeType::Expression, token.ref);
107✔
374
                    buildExpressionSubtree(argPostfixForm, exprNode, errors);
107✔
375
                    argBegin = std::next(argsIter);
107✔
376
                }
107✔
377
            }
64✔
378
            postfixForm.emplace(funcCallNode);
87✔
379
            tokenIter = argsEnd;
87✔
380
            prevExpType = ExpressionTokenType::Operand;
87✔
381
            continue;
87✔
382
        }
87✔
383
        if (isListAccessor(tokenIter)) {
1,014✔
384
            Node::Ptr listAccessorNode = std::make_shared<Node>(NodeType::ListAccessor);
32✔
385
            auto node = ParserContext::pushChildNode(listAccessorNode, NodeType::VariableName, token.ref);
32✔
386
            node->value = token.id();
32✔
387
            auto exprBegin = std::next(tokenIter);
32✔
388
            auto it = exprBegin;
32✔
389
            unsigned nestingLevel = 0;
32✔
390
            do {
391
                if (it->is(Operator::RectRightBrace))
151✔
392
                    nestingLevel--;
41✔
393
                else if (it->is(Operator::RectLeftBrace))
110✔
394
                    nestingLevel++;
41✔
395
                it++;
151✔
396
            } while (nestingLevel > 0);
151✔
397
            std::stack<SubExpression> argPostfixForm = generatePostfixForm(exprBegin, it, errors);
32✔
398
            auto exprNode = ParserContext::pushChildNode(listAccessorNode, NodeType::Expression, token.ref);
32✔
399
            buildExpressionSubtree(argPostfixForm, exprNode, errors);
32✔
400
            postfixForm.emplace(listAccessorNode);
32✔
401
            tokenIter = std::prev(it);
32✔
402
            prevExpType = ExpressionTokenType::Operand;
32✔
403
            continue;
32✔
404
        }
32✔
405
        if (isUnaryOperation(tokenIter, prevExpType)) {
982✔
406
            auto unaryNode = std::make_shared<Node>(NodeType::UnaryOperation);
45✔
407
            unaryNode->value = getUnaryOperation(*tokenIter);
45✔
408
            auto argsBegin = std::next(tokenIter);
45✔
409
            auto it = argsBegin;
45✔
410
            unsigned nestingLevel = 0;
45✔
411
            if (isFunctionCall(it) || isListAccessor(it)) {
45✔
412
                nestingLevel++;
7✔
413
                std::advance(it, 2);
7✔
414
            }
415
            do {
416
                if (it->is(Operator::RightBrace) or it->is(Operator::RectRightBrace))
129✔
417
                    nestingLevel--;
26✔
418
                else if (it->is(Operator::LeftBrace) or it->is(Operator::RectLeftBrace))
103✔
419
                    nestingLevel++;
19✔
420
                it++;
129✔
421
            } while (nestingLevel > 0);
129✔
422
            std::stack<SubExpression> argPostfixForm = generatePostfixForm(argsBegin, it, errors);
45✔
423
            auto exprNode = ParserContext::pushChildNode(unaryNode, NodeType::Expression, token.ref);
45✔
424
            buildExpressionSubtree(argPostfixForm, exprNode, errors);
45✔
425
            postfixForm.emplace(unaryNode);
45✔
426
            tokenIter = std::prev(it);
45✔
427
            prevExpType = ExpressionTokenType::Operand;
45✔
428
            continue;
45✔
429
        }
45✔
430
        ExpressionTokenType expType = getExpressionTokenType(token);
937✔
431
        if (expType == ExpressionTokenType::Operand) {
937✔
432
            postfixForm.emplace(tokenIter);
576✔
433
        } else if (expType == ExpressionTokenType::OpeningBrace) {
361✔
434
            operations.emplace(tokenIter);
29✔
435
        } else if (expType == ExpressionTokenType::ClosingBrace) {
332✔
436
            bool foundBrace = false;
29✔
437
            while (!operations.empty()) {
50✔
438
                if (getExpressionTokenType(*operations.top()) != ExpressionTokenType::OpeningBrace) {
50✔
439
                    postfixForm.emplace(operations.top());
21✔
440
                    operations.pop();
21✔
441
                } else {
442
                    foundBrace = true;
29✔
443
                    break;
29✔
444
                }
445
            }
446
            if (!foundBrace) {
29✔
UNCOV
447
                errors.push<ParserError>(token, "Unexpected closing brance in an expression");
×
448
            }
449
            if (!operations.empty())
29✔
450
                operations.pop(); // remove opening brace
29✔
451
        } else if (expType == ExpressionTokenType::Operation) {
303✔
452
            if (operations.empty() || getOperationPriority(token) < getOperationPriority(*operations.top())) {
239✔
453
                operations.emplace(tokenIter);
223✔
454
            } else {
455
                while (!operations.empty() && getOperationPriority(*operations.top()) <= getOperationPriority(token)) {
32✔
456
                    postfixForm.emplace(operations.top());
16✔
457
                    operations.pop();
16✔
458
                }
459
                operations.emplace(tokenIter);
16✔
460
            }
461
        } else if (expType == ExpressionTokenType::RectBrace) {
64✔
462
            continue;
64✔
463
        } else {
UNCOV
464
            errors.push<ParserError>(token, "Unexpected token inside an expression");
×
465
        }
466
        prevExpType = expType;
873✔
467
    }
468
    while (!operations.empty()) {
703✔
469
        postfixForm.emplace(operations.top());
202✔
470
        operations.pop();
202✔
471
    }
472
    return postfixForm;
1,002✔
473
}
501✔
474

475
bool isElementaryType(TypeId typeId) {
145✔
476
    return typeId == IntType || typeId == FloatType || typeId == BoolType || typeId == NoneType;
145✔
477
}
478

479
void parseType(ParserContext &ctx) {
166✔
480
    ctx.node = ctx.pushChildNode(NodeType::TypeName);
166✔
481
    ctx.node->value = TypeRegistry::typeId(ctx.token());
166✔
482
    if (ctx.token().is(Keyword::List)) {
166✔
483
        const Token &leftBrace = (ctx.goNextToken(), ctx.token());
13✔
484
        const Token &varTypeList = (ctx.goNextToken(), ctx.token());
13✔
485
        auto typeId = TypeRegistry::typeId(varTypeList);
13✔
486
        const Token &rightBrace = (ctx.goNextToken(), ctx.token());
13✔
487
        if (!leftBrace.is(Operator::RectLeftBrace) || !isElementaryType(typeId) ||
26✔
488
            !rightBrace.is(Operator::RectRightBrace)) {
26✔
UNCOV
489
            ctx.pushError("Unexpected syntax for list declaration");
×
490
        }
491
        auto node = ctx.pushChildNode(NodeType::TypeName);
13✔
492
        node->value = typeId;
13✔
493
    }
13✔
494
    ctx.goParentNode();
166✔
495
}
166✔
496

497
void parseSimpleStatement(ParserContext &ctx) {
7✔
498
    assert(ctx.tokenIter->is(Keyword::Break) || ctx.tokenIter->is(Keyword::Continue) ||
7✔
499
           ctx.tokenIter->is(Keyword::Pass));
500
    ctx.goNextToken();
7✔
501
    if (!ctx.token().is(Special::EndOfExpression)) {
7✔
UNCOV
502
        ctx.pushError("Unexpected token in a break|continue|pass statement");
×
503
        ctx.goNextExpression();
×
504
    }
505
    ctx.goParentNode();
7✔
506
    ctx.goNextToken();
7✔
507
}
7✔
508

509
void parseBranchRoot(ParserContext &ctx) {
194✔
510
    while (ctx.nestingLevel > 0) {
566✔
511
        if (ctx.tokenIter == ctx.tokenEnd)
566✔
512
            return;
44✔
513
        while (ctx.token().is(Special::EndOfExpression) || ctx.token().is(Special::Colon)) {
930✔
514
            ctx.goNextToken();
498✔
515
            if (ctx.tokenIter == ctx.tokenEnd)
498✔
516
                return;
90✔
517
        }
518
        int currNestingLevel = 0;
432✔
519
        while (ctx.token().is(Special::Indentation)) {
923✔
520
            currNestingLevel++;
491✔
521
            ctx.goNextToken();
491✔
522
        }
523
        if (currNestingLevel > ctx.nestingLevel) {
432✔
UNCOV
524
            ctx.pushError("Unexpected indentation mismatch: " + std::to_string(ctx.nestingLevel) +
×
UNCOV
525
                          " indentation(s) expected, " + std::to_string(currNestingLevel) + " indentation(s) given");
×
526
        } else if (currNestingLevel < ctx.nestingLevel) {
432✔
527
            ctx.goParentNode();
60✔
528
            while (ctx.node->type != NodeType::BranchRoot) {
126✔
529
                if (!ctx.node->parent)
90✔
530
                    break;
24✔
531
                ctx.goParentNode();
66✔
532
            }
533
            ctx.nestingLevel--;
60✔
534
            std::advance(ctx.tokenIter, -currNestingLevel);
60✔
535
            return;
60✔
536
        }
537

538
        const Token &currToken = ctx.token();
372✔
539
        if (currToken.is(Keyword::If)) {
372✔
540
            ctx.node = ctx.pushChildNode(NodeType::IfStatement);
25✔
541
        } else if (currToken.is(Keyword::While)) {
347✔
542
            ctx.node = ctx.pushChildNode(NodeType::WhileStatement);
6✔
543
        } else if (currToken.is(Keyword::For)) {
341✔
544
            ctx.node = ctx.pushChildNode(NodeType::ForStatement);
16✔
545
        } else if (isVariableDeclaration(ctx.tokenIter, ctx.tokenEnd)) {
325✔
546
            ctx.node = ctx.pushChildNode(NodeType::VariableDeclaration);
136✔
547
        } else if (currToken.is(Keyword::Elif) || currToken.is(Keyword::Else)) {
189✔
548
            auto lastNode = ctx.node->children.back();
15✔
549
            if (lastNode->type == NodeType::IfStatement) {
15✔
550
                auto nodeType = currToken.is(Keyword::Elif) ? NodeType::ElifStatement : NodeType::ElseStatement;
15✔
551
                ctx.node = ParserContext::pushChildNode(lastNode, nodeType, currToken.ref);
15✔
552
            } else {
UNCOV
553
                ctx.pushError((currToken.is(Keyword::Elif) ? std::string("elif") : std::string("else")) +
×
554
                              " is not allowed here");
555
            }
556
        } else if (currToken.is(Keyword::Return)) {
189✔
557
            ctx.node = ctx.pushChildNode(NodeType::ReturnStatement);
30✔
558
        } else if (currToken.is(Keyword::Continue)) {
144✔
559
            ctx.node = ctx.pushChildNode(NodeType::ContinueStatement);
1✔
560
            parseSimpleStatement(ctx);
1✔
561
            continue;
1✔
562
        } else if (currToken.is(Keyword::Break)) {
143✔
563
            ctx.node = ctx.pushChildNode(NodeType::BreakStatement);
1✔
564
            parseSimpleStatement(ctx);
1✔
565
            continue;
1✔
566
        } else if (currToken.is(Keyword::Pass)) {
142✔
567
            ctx.node = ctx.pushChildNode(NodeType::PassStatement);
5✔
568
            parseSimpleStatement(ctx);
5✔
569
            continue;
5✔
570
        } else {
571
            ctx.node = ctx.pushChildNode(NodeType::Expression);
137✔
572
        }
573
        ctx.propagate();
365✔
574
    }
575
}
576

577
void parseElifStatement(ParserContext &ctx) {
9✔
578
    assert(ctx.tokenIter->is(Keyword::Elif));
9✔
579
    ctx.goNextToken();
9✔
580
    ctx.node = ctx.pushChildNode(NodeType::Expression);
9✔
581
    ctx.propagate();
9✔
582
    if (!ctx.token().is(Special::Colon)) {
9✔
UNCOV
583
        ctx.pushError("Colon expected here");
×
UNCOV
584
        ctx.goNextExpression();
×
585
    }
586
    ctx.node = ctx.pushChildNode(NodeType::BranchRoot);
9✔
587
    ctx.nestingLevel++;
9✔
588
    ctx.propagate();
9✔
589
}
9✔
590

591
void parseElseStatement(ParserContext &ctx) {
6✔
592
    assert(ctx.tokenIter->is(Keyword::Else));
6✔
593
    ctx.goNextToken();
6✔
594
    if (!ctx.token().is(Special::Colon)) {
6✔
UNCOV
595
        ctx.pushError("Colon expected here");
×
UNCOV
596
        ctx.goNextExpression();
×
597
    }
598
    ctx.node = ctx.pushChildNode(NodeType::BranchRoot);
6✔
599
    ctx.nestingLevel++;
6✔
600
    ctx.propagate();
6✔
601
}
6✔
602

603
void parseExpression(ParserContext &ctx) {
294✔
604
    auto it = ctx.tokenIter;
294✔
605
    while (!it->is(Special::Colon) && !it->is(Special::EndOfExpression))
1,582✔
606
        it++;
1,288✔
607
    const auto &tokenIterBegin = ctx.tokenIter;
294✔
608
    const auto &tokenIterEnd = it;
294✔
609
    std::stack<SubExpression> postfixForm = generatePostfixForm(tokenIterBegin, tokenIterEnd, ctx.errors);
294✔
610
    buildExpressionSubtree(postfixForm, ctx.node, ctx.errors);
294✔
611
    ctx.tokenIter = tokenIterEnd;
294✔
612
    ctx.goParentNode();
294✔
613
}
294✔
614

615
void parseFunctionArguments(ParserContext &ctx) {
132✔
616
    assert(ctx.token().is(Operator::LeftBrace));
132✔
617
    ctx.goNextToken();
132✔
618
    while (!ctx.token().is(Operator::RightBrace)) {
162✔
619
        const Token &argName = ctx.token();
30✔
620
        const Token &colon = (ctx.goNextToken(), ctx.token());
30✔
621
        const Token &argType = (ctx.goNextToken(), ctx.token());
30✔
622
        if (argName.type != TokenType::Identifier || !colon.is(Special::Colon) || !TypeRegistry::isTypename(argType)) {
30✔
UNCOV
623
            ctx.pushError("Function argument declaration is ill-formed");
×
UNCOV
624
            while (!ctx.token().is(Operator::RightBrace) && !ctx.token().is(Special::Colon))
×
UNCOV
625
                ctx.goNextToken();
×
UNCOV
626
            break;
×
627
        }
628
        ctx.node = ctx.pushChildNode(NodeType::FunctionArgument);
30✔
629
        parseType(ctx);
30✔
630
        auto argNameNode = ParserContext::pushChildNode(ctx.node, NodeType::VariableName, argName.ref);
30✔
631
        argNameNode->value = argName.id();
30✔
632
        ctx.goParentNode();
30✔
633
        ctx.goNextToken();
30✔
634
        if (ctx.token().is(Operator::Comma))
30✔
635
            ctx.goNextToken();
14✔
636
    }
30✔
637
    ctx.goParentNode();
132✔
638
    ctx.goNextToken();
132✔
639
}
132✔
640

641
void parseFunctionDefinition(ParserContext &ctx) {
132✔
642
    assert(ctx.tokenIter->is(Keyword::Definition));
132✔
643
    ctx.goNextToken();
132✔
644
    if (ctx.token().type != TokenType::Identifier) {
132✔
645
        ctx.pushError("Given token is not allowed here in function definition");
×
646
    }
647
    ctx.pushChildNode(NodeType::FunctionName)->value = ctx.token().id();
132✔
648
    ctx.goNextToken();
132✔
649
    if (!ctx.token().is(Operator::LeftBrace)) {
132✔
UNCOV
650
        ctx.pushError("Given token is not allowed here in function definition");
×
651
    }
652
    ctx.node = ctx.pushChildNode(NodeType::FunctionArguments);
132✔
653
    ctx.propagate();
132✔
654
    if (!ctx.token().is(Special::Arrow)) {
132✔
UNCOV
655
        ctx.pushError("Function return type is mandatory in its header");
×
656
    }
657
    ctx.goNextToken();
132✔
658
    if (!TypeRegistry::isTypename(ctx.token())) {
132✔
UNCOV
659
        ctx.pushError("Type name not found");
×
660
    }
661
    auto retTypeId = TypeRegistry::typeId(ctx.token());
132✔
662
    if (!isElementaryType(retTypeId))
132✔
663
        ctx.pushError("Function return type must be one of the following: int, float, bool, None");
×
664
    ctx.pushChildNode(NodeType::FunctionReturnType)->value = retTypeId;
132✔
665
    ctx.goNextToken();
132✔
666
    if (!ctx.token().is(Special::Colon)) {
132✔
UNCOV
667
        ctx.pushError("Colon expected at the end of function header");
×
668
    }
669
    ctx.goNextToken();
132✔
670
    ctx.node = ctx.pushChildNode(NodeType::BranchRoot);
132✔
671
    ctx.nestingLevel = 1;
132✔
672
    ctx.propagate();
132✔
673
}
132✔
674

675
void parseIfStatement(ParserContext &ctx) {
25✔
676
    assert(ctx.tokenIter->is(Keyword::If));
25✔
677
    ctx.goNextToken();
25✔
678
    ctx.node = ctx.pushChildNode(NodeType::Expression);
25✔
679
    ctx.propagate();
25✔
680
    if (!ctx.token().is(Special::Colon)) {
25✔
UNCOV
681
        ctx.pushError("Colon expected here");
×
UNCOV
682
        ctx.goNextExpression();
×
683
    }
684
    ctx.node = ctx.pushChildNode(NodeType::BranchRoot);
25✔
685
    ctx.nestingLevel++;
25✔
686
    ctx.propagate();
25✔
687
}
25✔
688

689
void parseProgramRoot(ParserContext &ctx) {
108✔
690
    while (ctx.tokenIter != ctx.tokenEnd) {
240✔
691
        if (ctx.token().is(Keyword::Definition)) {
132✔
692
            ctx.node = ctx.pushChildNode(NodeType::FunctionDefinition);
132✔
693
            ctx.propagate();
132✔
694
        } else {
UNCOV
695
            ctx.pushError("Function definition was expected");
×
UNCOV
696
            return;
×
697
        }
698
    }
699
}
700

701
void parseReturnStatement(ParserContext &ctx) {
30✔
702
    assert(ctx.tokenIter->is(Keyword::Return));
30✔
703
    ctx.goNextToken();
30✔
704
    if (ctx.token().is(Special::EndOfExpression)) {
30✔
705
        ctx.goParentNode();
15✔
706
        ctx.goNextToken();
15✔
707
        return;
15✔
708
    }
709
    const Token &currToken = ctx.token();
15✔
710
    if (currToken.type != TokenType::FloatingPointLiteral && currToken.type != TokenType::Identifier &&
12✔
711
        currToken.type != TokenType::IntegerLiteral && currToken.type != TokenType::StringLiteral &&
27✔
712
        !currToken.is(Operator::LeftBrace)) {
15✔
UNCOV
713
        ctx.pushError("Expression as function return value was expected");
×
UNCOV
714
        ctx.goNextExpression();
×
UNCOV
715
        return;
×
716
    }
717
    ctx.node = ctx.pushChildNode(NodeType::Expression);
15✔
718
    ctx.propagate();
15✔
719
    ctx.goParentNode();
15✔
720
}
721

722
void parseVariableDeclaration(ParserContext &ctx) {
136✔
723
    const Token &varName = ctx.token();
136✔
724
    ctx.goNextToken();
136✔
725
    const Token &varType = (ctx.goNextToken(), ctx.token());
136✔
726
    parseType(ctx);
136✔
727
    auto node = ctx.pushChildNode(NodeType::VariableName);
136✔
728
    node->value = varName.id();
136✔
729

730
    auto endOfDecl = std::next(ctx.tokenIter);
136✔
731
    if (endOfDecl->is(Special::EndOfExpression)) {
136✔
732
        // declaration without definition
733
        std::advance(ctx.tokenIter, 2);
44✔
734
        ctx.goParentNode();
44✔
735
    } else if (endOfDecl->is(Operator::Assign)) {
92✔
736
        // declaration with definition
737
        ctx.node = ctx.pushChildNode(NodeType::Expression);
92✔
738
        if (varType.is(Keyword::List)) {
92✔
739
            ctx.node = ctx.pushChildNode(NodeType::ListStatement);
9✔
740
        }
741
        std::advance(ctx.tokenIter, 2);
92✔
742
        ctx.propagate();
92✔
743
        ctx.goParentNode();
92✔
744
    } else {
UNCOV
745
        ctx.errors.push<ParserError>(*endOfDecl, "Definition expression or line break was expected");
×
746
    }
747
}
136✔
748

749
void parseWhileStatement(ParserContext &ctx) {
6✔
750
    assert(ctx.tokenIter->is(Keyword::While));
6✔
751
    ctx.goNextToken();
6✔
752
    ctx.node = ctx.pushChildNode(NodeType::Expression);
6✔
753
    ctx.propagate();
6✔
754
    if (!ctx.token().is(Special::Colon)) {
6✔
UNCOV
755
        ctx.pushError("Colon expected here");
×
UNCOV
756
        ctx.goNextExpression();
×
757
    }
758
    ctx.node = ctx.pushChildNode(NodeType::BranchRoot);
6✔
759
    ctx.nestingLevel++;
6✔
760
    ctx.propagate();
6✔
761
}
6✔
762

763
void parseForStatement(ParserContext &ctx) {
16✔
764
    assert(ctx.tokenIter->is(Keyword::For));
16✔
765
    ctx.goNextToken();
16✔
766
    auto forNode = ctx.node;
16✔
767
    auto it = ctx.tokenIter;
16✔
768
    auto forTargets = ParserContext::pushChildNode(forNode, NodeType::ForTargets, ctx.tokenIter->ref);
16✔
769
    while (!it->is(Keyword::In) && !it->is(Special::EndOfExpression)) {
36✔
770
        if (it->type == TokenType::Identifier) {
20✔
771
            auto targetNode = ParserContext::pushChildNode(forTargets, NodeType::VariableName, ctx.tokenIter->ref);
18✔
772
            targetNode->value = it->id();
18✔
773
            it++;
18✔
774
        } else if (it->is(Operator::Comma)) {
20✔
775
            it++;
2✔
776
        } else {
UNCOV
777
            ctx.pushError("Unexpected token in a for statement");
×
778
        }
779
    }
780
    ctx.tokenIter = it;
16✔
781
    ctx.goNextToken();
16✔
782
    ctx.node = ctx.pushChildNode(NodeType::ForIterable);
16✔
783
    ctx.node = ctx.pushChildNode(NodeType::Expression);
16✔
784
    ctx.propagate();
16✔
785
    ctx.goParentNode();
16✔
786
    if (!ctx.token().is(Special::Colon)) {
16✔
787
        ctx.pushError("Colon expected here");
1✔
788
        ctx.goNextExpression();
1✔
789
    }
790
    ctx.node = ctx.pushChildNode(NodeType::BranchRoot);
16✔
791
    ctx.nestingLevel++;
16✔
792
    ctx.propagate();
16✔
793
}
16✔
794

795
void parseListStatement(ParserContext &ctx) {
9✔
796
    assert(ctx.tokenIter->is(Operator::RectLeftBrace));
9✔
797
    while (!ctx.token().is(Operator::RectRightBrace)) {
32✔
798
        ctx.goNextToken();
23✔
799
        auto it = ctx.tokenIter;
23✔
800
        while (!it->is(Operator::Comma) && !it->is(Operator::RectRightBrace))
53✔
801
            it++;
30✔
802
        const auto &tokenIterBegin = ctx.tokenIter;
23✔
803
        const auto &tokenIterEnd = it;
23✔
804
        if (tokenIterEnd->is(Special::EndOfExpression)) {
23✔
UNCOV
805
            ctx.errors.push<ParserError>(*tokenIterEnd, "']' was expected");
×
806
        }
807
        ctx.node = ctx.pushChildNode(NodeType::Expression);
23✔
808
        std::stack<SubExpression> postfixForm = generatePostfixForm(tokenIterBegin, tokenIterEnd, ctx.errors);
23✔
809
        buildExpressionSubtree(postfixForm, ctx.node, ctx.errors);
23✔
810
        ctx.tokenIter = tokenIterEnd;
23✔
811
        ctx.goParentNode();
23✔
812
    }
23✔
813
    ctx.goNextToken();
9✔
814
    if (ctx.token().is(Special::EndOfExpression)) {
9✔
815
        ctx.goParentNode();
6✔
816
    } else if (ctx.token().is(Operator::Mult)) {
3✔
817
        ctx.goNextToken();
3✔
818
        ctx.node = ctx.pushChildNode(NodeType::ListDynamicSize);
3✔
819
        ctx.node = ctx.pushChildNode(NodeType::Expression);
3✔
820
        ctx.propagate();
3✔
821
        ctx.goParentNode();
3✔
822
        ctx.goParentNode();
3✔
823
    } else {
UNCOV
824
        ctx.pushError("Either end of line or '*' was expected");
×
825
    }
826
    ctx.goParentNode();
9✔
827
}
9✔
828

829
// clang-format off
830
#define SUBPARSER(NodeTypeVal) {NodeType::NodeTypeVal, parse##NodeTypeVal}
831

832
static std::unordered_map<NodeType, std::function<void(ParserContext &)>> subparsers = {
833
    SUBPARSER(BranchRoot),
834
    SUBPARSER(ElifStatement),
835
    SUBPARSER(ElseStatement),
836
    SUBPARSER(Expression),
837
    SUBPARSER(FunctionArguments),
838
    SUBPARSER(FunctionDefinition),
839
    SUBPARSER(IfStatement),
840
    SUBPARSER(ProgramRoot),
841
    SUBPARSER(ReturnStatement),
842
    SUBPARSER(VariableDeclaration),
843
    SUBPARSER(WhileStatement),
844
    SUBPARSER(ListStatement),
845
    SUBPARSER(ForStatement),
846
};
847
// clang-format on
848

849
} // namespace
850

851
SyntaxTree Parser::process(const TokenList &tokens) {
108✔
852
    SyntaxTree tree;
108✔
853
    tree.root = std::make_shared<Node>(NodeType::ProgramRoot);
108✔
854

855
    ParserContext ctx = {subparsers, tree.root, tokens.begin(), tokens.end(), 0};
108✔
856
    ctx.propagate();
108✔
857
    if (!ctx.errors.empty()) {
108✔
858
        throw ctx.errors;
4✔
859
    }
860

861
    return tree;
208✔
862
}
112✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc