• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

FormulasQuestion / moodle-qtype_formulas / 13200038469

07 Feb 2025 12:40PM UTC coverage: 76.583% (+1.5%) from 75.045%
13200038469

Pull #62

github

web-flow
Merge 27bf7cac9 into acd272945
Pull Request #62: Rewrite the parser

2517 of 3116 new or added lines in 22 files covered. (80.78%)

146 existing lines in 6 files now uncovered.

2976 of 3886 relevant lines covered (76.58%)

431.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.29
/classes/local/lexer.php
1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16

17
namespace qtype_formulas\local;
18

19
/**
20
 * Formulas Question Lexer class
21
 *
22
 * @package    qtype_formulas
23
 * @copyright  2022 Philipp Imhof
24
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25
 */
26
class lexer {
27
    /** @var null */
28
    const EOF = null;
29

30
    /** @var ?input_stream input stream */
31
    private ?input_stream $inputstream = null;
32

33
    /** @var token[] list of all tokens in the input stream */
34
    private array $tokens = [];
35

36
    /** @var int level of nested ternary operators */
37
    private int $pendingternary = 0;
38

39
    /**
40
     * Constructor
41
     *
42
     * @param string $str the input to be tokenized
43
     */
44
    public function __construct(string $str) {
45
        $this->inputstream = new input_stream($str);
2,091✔
46
        $this->build_token_list();
2,091✔
47
    }
48

49
    /**
50
     * Return the list of all tokens.
51
     *
52
     * @return array
53
     */
54
    public function get_tokens(): array {
55
        return $this->tokens;
2,057✔
56
    }
57

58
    /**
59
     * Go through the entire input and fetch all tokens except comments and white space.
60
     * Store them in the corresponding variable, so that they can be retrieved.
61
     *
62
     * @return void
63
     */
64
    private function build_token_list(): void {
65
        $currenttoken = $this->read_next_token();
2,091✔
66
        $tokens = [];
2,057✔
67
        while ($currenttoken !== self::EOF) {
2,057✔
68
            $tokens[] = $currenttoken;
2,057✔
69
            $currenttoken = $this->read_next_token();
2,057✔
70
        }
71
        $this->tokens = $tokens;
2,057✔
72
    }
73

74
    /**
75
     * Find out what type of token is next and read it from the input stream by calling the
76
     * corresponding dedicated method.
77
     *
78
     * @return ?token the token or null, if we have reached the end of the input stream
79
     */
80
    private function read_next_token(): ?token {
81
        // Check the next char and quit if we are at the end of the stream.
82
        $currentchar = $this->inputstream->peek();
2,091✔
83
        if ($currentchar === input_stream::EOF) {
2,091✔
84
            return self::EOF;
1,938✔
85
        }
86
        // Skip all white space.
87
        $this->consume_whitespace();
2,091✔
88
        $currentchar = $this->inputstream->peek();
2,091✔
89
        // If we have a # character, this is the start of a comment.
90
        if ($currentchar === '#') {
2,091✔
91
            $this->consume_comment();
51✔
92
            $currentchar = $this->inputstream->peek();
51✔
93
        }
94
        // If there is nothing after stripping whitespace and comments, we may quit.
95
        if ($currentchar === input_stream::EOF) {
2,091✔
96
            return self::EOF;
119✔
97
        }
98
        // If we have a " or ' character, this is the start of a string.
99
        if ($currentchar === '"' || $currentchar === "'") {
2,091✔
100
            return $this->read_string();
340✔
101
        }
102
        // If we are at the start of a number, return that number as the next token.
103
        if (preg_match('/[0-9]/', $currentchar)) {
1,819✔
104
            return $this->read_number();
748✔
105
        }
106
        // The decimal point counts as the start of a number, iff it is followed by a digit.
107
        if ($currentchar === '.' && preg_match('/[0-9]/', $this->inputstream->peek(1))) {
1,530✔
108
            return $this->read_number();
51✔
109
        }
110
        // A letter indicates the start of an identifier, i. e. a variable or function name.
111
        if (preg_match('/[_A-Za-z]/', $currentchar)) {
1,496✔
112
            return $this->read_identifier();
578✔
113
        }
114
        // Unless we are in the middle of a ternary operator, we treat : as a RANGE_SEPARATOR.
115
        if ($currentchar === ':' && $this->pendingternary < 1) {
1,326✔
116
            return $this->read_single_char_token(token::RANGE_SEPARATOR);
68✔
117
        }
118
        // Operators always start with specific characters and may be up to two characters long.
119
        if (preg_match('/[-+*\/%=&|~^<>!?:]/', $currentchar)) {
1,292✔
120
            // After a ? operator, we expect a : to finish the ternary operator.
121
            // Note: In case of a syntax error, this flag might remain set even after the end
122
            // of a statement and we could therefore wrongfully interpret a : as an operator.
123
            // We don't mind, because bad syntax of a ternary operator will lead to a syntax error
124
            // anyway.
125
            if ($currentchar === '?') {
1,088✔
126
                $this->pendingternary++;
102✔
127
            }
128
            // After a : operator, the ternary operator is no longer pending. In case of *nested*
129
            // ternary operators, we descend one level.
130
            if ($currentchar === ':') {
1,088✔
131
                $this->pendingternary--;
68✔
132
            }
133
            return $this->read_operator();
1,088✔
134
        }
135
        // There are some single-character tokens...
136
        if (preg_match('/[]\[(){},;π\\\]/', $currentchar)) {
374✔
137
            $types = [
357✔
138
                '[' => token::OPENING_BRACKET,
357✔
139
                '(' => token::OPENING_PAREN,
357✔
140
                '{' => token::OPENING_BRACE,
357✔
141
                ']' => token::CLOSING_BRACKET,
357✔
142
                ')' => token::CLOSING_PAREN,
357✔
143
                '}' => token::CLOSING_BRACE,
357✔
144
                ',' => token::ARG_SEPARATOR,
357✔
145
                '\\' => token::PREFIX,
357✔
146
                ';' => token::END_OF_STATEMENT,
357✔
147
                'π' => token::CONSTANT,
357✔
148
            ];
357✔
149
            return $this->read_single_char_token($types[$currentchar]);
357✔
150
        }
151
        // If we are still here, that's not good at all. We need to read the char (it is only peeked
152
        // so far) in order for the input stream to be at the right position.
153
        $this->inputstream->read();
17✔
154
        $this->inputstream->die(get_string('error_unexpectedinput', 'qtype_formulas', $currentchar));
17✔
155
    }
156

157
    /**
158
     * Read a single-char token from the input stream, e.g. a parenthesis or a comma.
159
     *
160
     * @param int $type type to use when creating the new token
161
     * @return token
162
     */
163
    private function read_single_char_token(int $type): token {
164
        $char = $this->inputstream->read();
391✔
165
        $startingposition = $this->inputstream->get_position();
391✔
166
        return new token($type, $char, $startingposition['row'], $startingposition['column']);
391✔
167
    }
168

169
    /**
170
     * Read a number token from the input stream.
171
     *
172
     * @return token the number token
173
     */
174
    private function read_number(): token {
175
        // Start by reading the first char. If we are here, that means it was a number or a decimal point.
176
        $currentchar = $this->inputstream->read();
782✔
177

178
        // Save starting position of the number.
179
        $startingposition = $this->inputstream->get_position();
782✔
180

181
        // A number can only have one decimal point and one exponent (for scientific notation) at most.
182
        $hascomma = ($currentchar === '.');
782✔
183
        $hasexponent = false;
782✔
184

185
        // Save the first character.
186
        $result = $currentchar;
782✔
187
        while ($currentchar !== input_stream::EOF) {
782✔
188
            // Look at the next char and decide what to do.
189
            $nextchar = $this->inputstream->peek();
782✔
190
            if ($nextchar === '.') {
782✔
191
                // A decimal point is only valid, if we don't have one yet and if we are in the mantissa.
192
                if ($hascomma || $hasexponent) {
102✔
193
                    break;
17✔
194
                }
195
                // Keep track that we do now have a decimal point in the number.
196
                $hascomma = true;
102✔
197
            } else if ($nextchar === 'e' || $nextchar === 'E') {
782✔
198
                // An exponent is only valid, if we don't have one yet.
199
                if ($hasexponent) {
289✔
200
                    break;
17✔
201
                }
202
                // Also, an exponent must be followed either by a digit or by a plus/minus sign *and* a digit.
203
                // If it is not, it might be the start of an identifier or a syntax error, but that's not the question.
204
                $followedby = $this->inputstream->peek(1);
289✔
205
                if (preg_match('/[0-9]/', $followedby)) {
289✔
206
                    $hasexponent = true;
102✔
207
                } else if (preg_match('/[-+]/', $followedby) && preg_match('/[0-9]/', $this->inputstream->peek(2))) {
204✔
208
                    $hasexponent = true;
85✔
209
                    // In this particular case, we will store two characters. The first one (e or E) must be
210
                    // read now, the second one (+ or -) will follow at the end of the loop.
211
                    $currentchar = $this->inputstream->read();
85✔
212
                    $result .= $currentchar;
85✔
213
                } else {
214
                    // We had an e or E, but it is not the start of an exponent, so we drop out. The e or E
215
                    // must be part of the next token.
216
                    break;
217✔
217
                }
218
            } else if (!preg_match('/[0-9]/', $nextchar)) {
748✔
219
                // We have covered all special cases. So, if the character is not a digit, we must stop here.
220
                break;
646✔
221
            }
222
            $currentchar = $this->inputstream->read();
544✔
223
            $result .= $currentchar;
544✔
224
        }
225
        return new token(token::NUMBER, floatval($result), $startingposition['row'], $startingposition['column']);
782✔
226
    }
227

228
    /**
229
     * Read a string token from the input stream.
230
     *
231
     * @return token the string token
232
     */
233
    private function read_string(): token {
234
        // Start by reading the opening delimiter, either a " or a ' character.
235
        $opener = $this->inputstream->read();
340✔
236

237
        // Record position of the opening delimiter.
238
        $startingposition = $this->inputstream->get_position();
340✔
239

240
        $result = '';
340✔
241
        $currentchar = $this->inputstream->peek();
340✔
242
        while ($currentchar !== input_stream::EOF) {
340✔
243
            $nextchar = $this->inputstream->peek();
340✔
244
            // A backslash could be used to escape the opening/closing delimiter inside the string.
245
            if ($nextchar == '\\') {
340✔
246
                $followedby = $this->inputstream->peek(1);
187✔
247
                if ($followedby === $opener) {
187✔
248
                    // Consume the backslash. The quote will be appended later.
249
                    $this->inputstream->read();
34✔
250
                } else if ($followedby === 't' || $followedby === 'n') {
153✔
251
                    $this->inputstream->read();
85✔
252
                    $currentchar = $this->inputstream->read();
85✔
253
                    $result .= ($followedby === 't' ? "\t" : "\n");
85✔
254
                    continue;
139✔
255
                }
256
            } else if ($nextchar === $opener) {
340✔
257
                $this->inputstream->read();
306✔
258
                return new token(token::STRING, $result, $startingposition['row'], $startingposition['column']);
306✔
259
            }
260
            $currentchar = $this->inputstream->read();
340✔
261
            $result .= $currentchar;
340✔
262
        }
263
        // Still here? That means the string has not been closed.
264
        $a = (object)$startingposition;
34✔
265
        $this->inputstream->die(get_string('error_unterminatedstring', 'qtype_formulas', $a));
34✔
266
    }
267

268
    /**
269
     * Read an identifier token (function name, variable name, reserved word or pre-defined constant
270
     * like π) from the input stream.
271
     *
272
     * @return token the identifier token
273
     */
274
    private function read_identifier(): token {
275
        // Start by reading the first char. If we are here, that means it was a letter or an underscore.
276
        $currentchar = $this->inputstream->read();
578✔
277
        $result = $currentchar;
578✔
278

279
        // Record position of the opening delimiter.
280
        $startingposition = $this->inputstream->get_position();
578✔
281

282
        while ($currentchar !== input_stream::EOF) {
578✔
283
            $nextchar = $this->inputstream->peek();
578✔
284
            // Identifiers may contain letters, digits or underscores.
285
            if (!preg_match('/[A-Za-z0-9_]/', $nextchar)) {
578✔
286
                break;
578✔
287
            }
288
            $currentchar = $this->inputstream->read();
374✔
289
            $result .= $currentchar;
374✔
290
        }
291
        if ($result === 'for') {
578✔
NEW
292
            $type = token::RESERVED_WORD;
×
293
        } else if ($result === 'pi') {
578✔
294
            $type = token::CONSTANT;
17✔
295
            $result = 'π';
17✔
296
            // If we have the legacy syntax pi(), we drop the two parens.
297
            $next = $this->inputstream->peek();
17✔
298
            $nextbutone = $this->inputstream->peek(1);
17✔
299
            if ($next === '(' && $nextbutone === ')') {
17✔
300
                $this->inputstream->read();
17✔
301
                $this->inputstream->read();
17✔
302
            }
303
        } else {
304
            $type = token::IDENTIFIER;
561✔
305
        }
306
        return new token($type, $result, $startingposition['row'], $startingposition['column']);
578✔
307
    }
308

309
    /**
310
     * Read an operator token from the input stream.
311
     *
312
     * @return token the operator token
313
     */
314
    private function read_operator(): token {
315
        // Start by reading the first char.
316
        $currentchar = $this->inputstream->read();
1,088✔
317
        $result = $currentchar;
1,088✔
318

319
        // Record position of the opening delimiter.
320
        $startingposition = $this->inputstream->get_position();
1,088✔
321

322
        // Some chars might be the start of a two-character operator. Those are:
323
        // ** << >> == != >= <= && ||
324
        // Let's look at the following character...
325
        $followedby = $this->inputstream->peek();
1,088✔
326
        if (preg_match('/[*=&|<>]/', $followedby)) {
1,088✔
327
            // In most cases, two-character operators have the same character twice.
328
            // The only exceptions are !=, <= and >= where the second char is always the equal sign.
329
            if (($currentchar === $followedby)
408✔
330
                || ($followedby === '=' && preg_match('/[!<>]/', $currentchar))) {
408✔
331
                $result .= $this->inputstream->read();
340✔
332
            }
333
        }
334
        return new token(token::OPERATOR, $result, $startingposition['row'], $startingposition['column']);
1,088✔
335
    }
336

337
    /**
338
     * Read until the end of the line, because comments always extend until the end of the line.
339
     *
340
     * @return void
341
     */
342
    private function consume_comment(): void {
343
        $currentchar = $this->inputstream->peek();
51✔
344
        while ($currentchar !== "\n" && $currentchar !== input_stream::EOF) {
51✔
345
            $currentchar = $this->inputstream->read();
51✔
346
        }
347
        // Eat up all white space following the comment.
348
        $this->consume_whitespace();
51✔
349
    }
350

351
    /**
352
     * Eat up all white space until the start of the next token.
353
     *
354
     * @return void
355
     */
356
    private function consume_whitespace(): void {
357
        $currentchar = $this->inputstream->peek();
2,091✔
358
        while (preg_match('/\s/', $currentchar)) {
2,091✔
359
            $this->inputstream->read();
391✔
360
            $currentchar = $this->inputstream->peek();
391✔
361
        }
362
    }
363
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc