• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

FormulasQuestion / moodle-qtype_formulas / 13217446514

08 Feb 2025 04:37PM UTC coverage: 76.899% (+1.9%) from 75.045%
13217446514

Pull #62

github

web-flow
Merge b36f9931f into acd272945
Pull Request #62: Rewrite the parser

2547 of 3139 new or added lines in 22 files covered. (81.14%)

146 existing lines in 6 files now uncovered.

3006 of 3909 relevant lines covered (76.9%)

438.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.97
/classes/local/answer_parser.php
1
<?php
2
// This file is part of Moodle - https://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <https://www.gnu.org/licenses/>.
16

17
namespace qtype_formulas\local;
18

19
use qtype_formulas;
20

21
/**
22
 * Parser for answer expressions for qtype_formulas
23
 *
24
 * @package    qtype_formulas
25
 * @copyright  2022 Philipp Imhof
26
 * @license    https://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
27
 */
28
class answer_parser extends parser {
29
    /**
30
     * Create a parser for student answers. This class does additional filtering (e. g. block
31
     * forbidden operators) and syntax checking according to the answer type. It also translates
32
     * the ^ symbol to the ** operator.
33
     *
34
     * @param string|array $tokenlist list of tokens as returned from the lexer or input string
35
     * @param array $knownvariables
36
     * @param bool $caretmeanspower whether ^ should be interpreted as exponentiation operator
37
     * @param bool $formodelanswer whether we are parsing a teacher's model answer (thus allowing \ prefix)
38
     */
39
    public function __construct($tokenlist, array $knownvariables = [], bool $caretmeanspower = true,
40
            bool $formodelanswer = false) {
41
        // If the input is given as a string, run it through the lexer first.
42
        if (is_string($tokenlist)) {
3,876✔
43
            $lexer = new lexer($tokenlist);
3,876✔
44
            $tokenlist = $lexer->get_tokens();
3,876✔
45
        }
46

47
        $precededbyprefix = false;
3,876✔
48
        foreach ($tokenlist as $token) {
3,876✔
49
            // In the context of student answers, the caret (^) *always* means exponentiation (**) instead
50
            // of XOR. In model answers entered by the teacher, the caret *only* means exponentiation
51
            // for algebraic formulas, but not for the other answer types.
52
            if ($caretmeanspower) {
3,791✔
53
                if ($token->type === token::OPERATOR && $token->value === '^') {
3,791✔
54
                    $token->value = '**';
493✔
55
                }
56
            }
57

58
            // Students are not allowed to use function names as variables, e.g. they cannot use a
59
            // variable 'sin'. This is important, because teachers have that option and the regular
60
            // parser will automatically consider 'sin' in the expression '3*sin x' as a variable,
61
            // due to the missing parens. We want to avoid that, because it would conceal a syntax
62
            // error. We make one exception: if the identifier has been labelled as a known variable,
63
            // the token will be considered as a variable. This allows the teacher to use e.g. 'exp'
64
            // as a unit name, if they want to.
65
            if ($token->type === token::IDENTIFIER) {
3,791✔
66
                if (in_array($token->value, $knownvariables) && !$precededbyprefix) {
1,326✔
67
                    $token->type = token::VARIABLE;
17✔
68
                } else if (array_key_exists($token->value, functions::FUNCTIONS + evaluator::PHPFUNCTIONS)) {
1,326✔
69
                    $token->type = token::FUNCTION;
493✔
70
                }
71
            }
72

73
            if (!$formodelanswer && $token->type === token::PREFIX) {
3,791✔
74
                $this->die(get_string('error_prefix', 'qtype_formulas'), $token);
170✔
75
            }
76

77
            $precededbyprefix = ($token->type === token::PREFIX);
3,621✔
78
        }
79

80
        // Once this is done, we can parse the expression normally.
81
        parent::__construct($tokenlist, $knownvariables);
3,876✔
82
    }
83

84
    /**
85
     * Perform the right check according to a given answer type.
86
     *
87
     * @param int $type the answer type, a constant from the qtype_formulas class
88
     * @return bool
89
     */
90
    public function is_acceptable_for_answertype(int $type): bool {
91
        if ($type === qtype_formulas::ANSWER_TYPE_NUMBER) {
3,876✔
92
            return $this->is_acceptable_number();
697✔
93
        }
94

95
        if ($type === qtype_formulas::ANSWER_TYPE_NUMERIC) {
3,179✔
96
            return $this->is_acceptable_numeric();
697✔
97
        }
98

99
        if ($type === qtype_formulas::ANSWER_TYPE_NUMERICAL_FORMULA) {
2,482✔
100
            return $this->is_acceptable_numerical_formula();
697✔
101
        }
102

103
        if ($type === qtype_formulas::ANSWER_TYPE_ALGEBRAIC) {
1,785✔
104
            return $this->is_acceptable_algebraic_formula();
1,785✔
105
        }
106
    }
107

108
    /**
109
     * Check whether the given answer contains only valid tokens for the answer type NUMBER, i. e.
110
     * - just a number, possibly with a decimal point
111
     * - no operators, except unary + or - at start
112
     * - possibly followed by e/E (maybe followed by + or -) plus an integer
113
     *
114
     * @return bool
115
     */
116
    private function is_acceptable_number(): bool {
117
        // The statement list must contain exactly one expression object.
118
        if (count($this->statements) !== 1) {
3,876✔
119
            return false;
510✔
120
        }
121

122
        $answertokens = $this->statements[0]->body;
3,366✔
123

124
        // The first element of the answer expression must be a token of type NUMBER or
125
        // CONSTANT, e.g. pi or π; we currently do not have other named constants.
126
        // Note: if the user has entered -5, this has now become [5, _].
127
        if (!in_array($answertokens[0]->type, [token::NUMBER, token::CONSTANT])) {
3,366✔
128
            return false;
629✔
129
        }
130
        array_shift($answertokens);
2,737✔
131

132
        // If there are no tokens left, everything is fine.
133
        if (empty($answertokens)) {
2,737✔
134
            return true;
850✔
135
        }
136

137
        // We accept one more token: an unary minus sign (OPERATOR '_'). An unary plus sign
138
        // would be possible, but it would already have been dropped. For backwards compatibility,
139
        // we do not accept multiple unary minus signs.
140
        if (count($answertokens) > 1) {
1,887✔
141
            return false;
1,462✔
142
        }
143
        $token = $answertokens[0];
425✔
144
        return ($token->type === token::OPERATOR && $token->value === '_');
425✔
145
    }
146

147
    /**
148
     * Check whether the given answer contains only valid tokens for the answer type NUMERIC, i. e.
149
     * - numbers
150
     * - operators +, -, *, ** or ^
151
     * - round parens ( and )
152
     * - pi or pi() or π
153
     * - no functions
154
     * - no variables
155
     *
156
     * @return bool
157
     */
158
    private function is_acceptable_numeric(): bool {
159
        // If it's a valid number expression, we have nothing to do.
160
        if ($this->is_acceptable_number()) {
2,516✔
161
            return true;
306✔
162
        }
163

164
        // The statement list must contain exactly one expression object.
165
        if (count($this->statements) !== 1) {
2,210✔
166
            return false;
442✔
167
        }
168

169
        $answertokens = $this->statements[0]->body;
1,768✔
170

171
        // Iterate over all tokens.
172
        foreach ($answertokens as $token) {
1,768✔
173
            // The PREFIX operator must not be used in numeric answers.
174
            if ($token->type === token::PREFIX) {
1,768✔
NEW
175
                return false;
×
176
            }
177

178
            // If we find a FUNCTION or VARIABLE token, we can stop, because those are not
179
            // allowed in the numeric answer type.
180
            if ($token->type === token::FUNCTION || $token->type === token::VARIABLE) {
1,768✔
181
                return false;
1,037✔
182
            }
183
            // If it is an OPERATOR, it has to be +, -, *, /, ^, ** or the unary minus _.
184
            $allowedoperators = ['+', '-', '*', '/', '^', '**', '_'];
1,292✔
185
            if ($token->type === token::OPERATOR && !in_array($token->value, $allowedoperators)) {
1,292✔
186
                return false;
68✔
187
            }
188
            $isparen = ($token->type & token::ANY_PAREN);
1,292✔
189
            // Only round parentheses are allowed.
190
            if ($isparen && !in_array($token->value, ['(', ')'])) {
1,292✔
191
                return false;
102✔
192
            }
193
        }
194

195
        // Still here? Then it's all good.
196
        return true;
561✔
197
    }
198

199
    /**
200
     * Check whether the given answer contains only valid tokens for the answer type NUMERICAL_FORMULA, i. e.
201
     * - numerical expression
202
     * - plus functions: sin, cos, tan, asin, acos, atan, atan2, sinh, cosh, tanh, asinh, acosh, atanh
203
     * - plus functions: sqrt, exp, log, log10, ln
204
     * - plus functions: abs, ceil, floor
205
     * - plus functions: fact, ncr, npr
206
     * - no variables
207
     *
208
     * @return bool
209
     */
210
    private function is_acceptable_numerical_formula(): bool {
211
        if ($this->is_acceptable_number() || $this->is_acceptable_numeric()) {
697✔
212
            return true;
476✔
213
        }
214

215
        // Checking whether the expression is valid as an algebraic formula, but with variables
216
        // being disallowed. This also makes sure that there is one single statement.
217
        if (!$this->is_acceptable_algebraic_formula(true)) {
221✔
218
            return false;
187✔
219
        }
220

221
        // Still here? Then it's all good.
222
        return true;
34✔
223
    }
224

225
    /**
226
     * Check whether the given answer contains only valid tokens for the answer type ALGEBRAIC, i. e.
227
     * - everything allowed for numerical formulas
228
     * - all functions and operators except assignment =
229
     * - variables (TODO: maybe only allow registered variables, would avoid student mistake "ab" instead of "a b" or "a*b")
230
     *
231
     * @param bool $fornumericalformula whether we disallow the usage of variables and the PREFIX operator
232
     * @return bool
233
     */
234
    private function is_acceptable_algebraic_formula(bool $fornumericalformula = false): bool {
235
        if ($this->is_acceptable_number() || $this->is_acceptable_numeric()) {
2,006✔
236
            return true;
578✔
237
        }
238

239
        // The statement list must contain exactly one expression object.
240
        if (count($this->statements) !== 1) {
1,428✔
241
            return false;
374✔
242
        }
243

244
        $answertokens = $this->statements[0]->body;
1,054✔
245

246
        // Iterate over all tokens. If we find a FUNCTION token, we check whether it is in the white list.
247
        $functionwhitelist = [
1,054✔
248
            'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'atan2', 'sinh', 'cosh', 'tanh', 'asinh', 'acosh', 'atanh',
1,054✔
249
            'sqrt', 'exp', 'log', 'log10', 'ln', 'abs', 'ceil', 'floor', 'fact', 'ncr', 'npr',
1,054✔
250
        ];
1,054✔
251
        $operatorwhitelist = ['+', '_', '-', '/', '*', '**', '^', '%'];
1,054✔
252
        foreach ($answertokens as $token) {
1,054✔
253
            // Cut short, if it is a NUMBER token.
254
            if ($token->type === token::NUMBER) {
1,054✔
255
                continue;
714✔
256
            }
257
            // The PREFIX operator must not be used in numerical formulas.
258
            if ($fornumericalformula && $token->type === token::PREFIX) {
1,054✔
NEW
259
                return false;
×
260
            }
261
            if ($token->type === token::VARIABLE) {
1,054✔
262
                if ($fornumericalformula) {
782✔
263
                    return false;
68✔
264
                }
265
                /* TODO: maybe we should reject unknown variables, because that avoids mistakes
266
                         like student writing a(x+y) = ax + ay instead of a*x or a x.
267
                if (!$this->is_known_variable($token)) {
268
                    return false;
269
                }*/
270
            }
271
            if ($token->type === token::FUNCTION && !in_array($token->value, $functionwhitelist)) {
986✔
272
                return false;
51✔
273
            }
274
            if ($token->type === token::OPERATOR && !in_array($token->value, $operatorwhitelist)) {
952✔
275
                return false;
153✔
276
            }
277
        }
278

279
        // Still here? Then let's check the syntax.
280
        return $this->is_valid_syntax();
799✔
281
    }
282

283
    /**
284
     * This function determines the index where the numeric part ends and the unit part begins, e.g.
285
     * for the answer "1.5e3 m^2", that index would be 6.
286
     * We know that the student cannot (legally) use variables in their answers of type number, numeric
287
     * or numerical formula. Also, we know that units will be classified as variables. Thus, we can
288
     * walk through the list of tokens until we reach the first "variable" (actually a unit) and then
289
     * we know where the unit starts.
290
     *
291
     * @return int
292
     */
293
    public function find_start_of_units(): int {
294
        foreach ($this->tokenlist as $token) {
17✔
295
            if ($token->type === token::VARIABLE) {
17✔
296
                return $token->column - 1;
17✔
297
            }
298
        }
299
        // Still here? That means there is no unit, so it starts very, very far away...
NEW
300
        return PHP_INT_MAX;
×
301
    }
302

303
    /**
304
     * Iterate over all tokens and check whether the expression is *syntactically* valid.
305
     * Note that this does not necessarily mean that the expression can be evaluated:
306
     * - sqrt(-3) is syntactically valid, but it cannot be calculated
307
     * - asin(x*y) is syntactically valid, but cannot be evaluated if abs(x*y) > 1
308
     * - a/(b-b) is syntactically valid, but it cannot be evaluated
309
     * - a-*b is syntactically invalid, because the operators cannot be chained that way
310
     *
311
     * @return bool
312
     */
313
    private function is_valid_syntax(): bool {
314
        $tokens = $this->statements[0]->body;
799✔
315

316
        // Iterate over all tokens. Push literals (strings, number) and variables on the stack.
317
        // Operators and functions will consume them, but not evaluate anything. In the end, there
318
        // should be only one single element on the stack.
319
        $stack = [];
799✔
320
        foreach ($tokens as $token) {
799✔
321
            if (in_array($token->type, [token::STRING, token::NUMBER, token::VARIABLE])) {
799✔
322
                $stack[] = $token->value;
799✔
323
            }
324
            if ($token->type === token::OPERATOR) {
799✔
325
                // Check whether the operator is unary. We also include operators that are not
326
                // actually allowed in a student's answer. Unary operators would operate on
327
                // the last token on the stack, but as we do not evaluate anything, we just
328
                // drop them.
329
                if (in_array($token->value, ['_', '!', '~'])) {
765✔
330
                    continue;
85✔
331
                }
332
                // All other operators are binary, because the student cannot use the ternary
333
                // operator in their answer. Also, they are not allowed other than round parens,
334
                // so there can be no %%rangebuild or similar pseudo-operators in the queue.
335
                // A binary operator would pop the two top elements, do its magic and then push
336
                // the result on the stack. As we do not evaluate anything, we simply drop the top
337
                // element.
338
                array_pop($stack);
765✔
339
            }
340
            // For functions, the top element on the stack (always a number literal) will indicate
341
            // the number of arguments to consume. So we pop that element plus one less than what
342
            // it indicates, meaning we actually drop exactly the number of elements indicated
343
            // by that element.
344
            if ($token->type === token::FUNCTION) {
799✔
345
                $n = end($stack);
272✔
346
                $stack = array_slice($stack, 0, -$n);
272✔
347
            }
348
        }
349

350
        return (count($stack) === 1);
799✔
351
    }
352

353
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc