• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mattwparas / steel / 13615020054

02 Mar 2025 11:34AM UTC coverage: 47.089% (+0.03%) from 47.062%
13615020054

Pull #310

github

web-flow
Merge f80ed61d2 into f1a605a0f
Pull Request #310: support escaped identifiers

93 of 138 new or added lines in 4 files covered. (67.39%)

4 existing lines in 2 files now uncovered.

12737 of 27049 relevant lines covered (47.09%)

422185.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.37
/crates/steel-parser/src/lexer.rs
1
use super::parser::SourceId;
2
use crate::tokens::{IntLiteral, Token, TokenType};
3
use crate::tokens::{NumberLiteral, Paren, ParenMod, RealLiteral};
4
use smallvec::SmallVec;
5
use std::borrow::Cow;
6
use std::char;
7
use std::iter::Iterator;
8
use std::marker::PhantomData;
9
use std::sync::Arc;
10
use std::{iter::Peekable, str::Chars};
11

12
pub const INFINITY: &str = "+inf.0";
13
pub const NEG_INFINITY: &str = "-inf.0";
14
pub const NAN: &str = "+nan.0";
15
pub const NEG_NAN: &str = "-nan.0";
16

17
pub struct OwnedString;
18

19
impl ToOwnedString<String> for OwnedString {
NEW
20
    fn own(&self, s: Cow<str>) -> String {
×
21
        s.to_string()
×
22
    }
23
}
24

25
pub trait ToOwnedString<T> {
26
    fn own(&self, s: Cow<str>) -> T;
27
}
28

29
pub type Span = core::ops::Range<usize>;
30

31
pub struct Lexer<'a> {
32
    /// The source of the lexer.
33
    source: &'a str,
34
    /// An iterator over the characters.
35
    chars: Peekable<Chars<'a>>,
36
    /// The  next token to return or `None` if it should be parsed.
37
    queued: Option<TokenType<Cow<'a, str>>>,
38
    token_start: usize,
39
    token_end: usize,
40
}
41

42
impl<'a> Lexer<'a> {
43
    pub fn new(source: &'a str) -> Self {
2,375✔
44
        Self {
45
            source,
46
            chars: source.chars().peekable(),
2,375✔
47
            queued: None,
48
            token_start: 0,
49
            token_end: 0,
50
        }
51
    }
52

53
    fn eat(&mut self) -> Option<char> {
16,417,020✔
54
        if let Some(c) = self.chars.next() {
32,834,030✔
55
            self.token_end += c.len_utf8();
×
56
            Some(c)
×
57
        } else {
58
            None
10✔
59
        }
60
    }
61

62
    // Consume characters until the next non whitespace input
63
    fn consume_whitespace(&mut self) {
2,233,249✔
64
        while let Some(&c) = self.chars.peek() {
14,385,396✔
65
            if c.is_whitespace() {
4,960,686✔
66
                self.eat();
4,960,686✔
67

68
                self.token_start = self.token_end;
4,960,686✔
69
            } else {
70
                break;
2,230,775✔
71
            }
72
        }
73
    }
74

75
    fn read_string(&mut self) -> Result<TokenType<Cow<'a, str>>> {
24,834✔
76
        // Skip the opening quote.
77
        self.eat();
24,834✔
78

79
        let mut buf = String::new();
24,834✔
80

81
        while let Some(&c) = self.chars.peek() {
894,887✔
82
            self.eat();
×
83
            match c {
×
84
                '"' => return Ok(TokenType::StringLiteral(Arc::new(buf))),
24,830✔
NEW
85
                '\\' => {
×
86
                    if let Some(c) = self.read_string_escape()? {
1,210✔
87
                        buf.push(c);
401✔
88
                    }
89
                }
90
                _ => buf.push(c),
422,208✔
91
            }
92
        }
93

94
        Err(TokenError::IncompleteString)
1✔
95
    }
96

97
    fn read_string_escape(&mut self) -> Result<Option<char>> {
407✔
98
        let c = match self.chars.peek() {
809✔
NEW
99
            Some('"') => {
×
100
                self.eat();
353✔
101
                '"'
353✔
102
            }
103

NEW
104
            Some('a') => {
×
NEW
105
                self.eat();
×
NEW
106
                '\x07'
×
107
            }
108

NEW
109
            Some('b') => {
×
NEW
110
                self.eat();
×
NEW
111
                '\x08'
×
112
            }
113

NEW
114
            Some('\\') => {
×
115
                self.eat();
29✔
116
                '\\'
29✔
117
            }
118

NEW
119
            Some('|') => {
×
NEW
120
                self.eat();
×
NEW
121
                '|'
×
122
            }
123

NEW
124
            Some('t') => {
×
NEW
125
                self.eat();
×
NEW
126
                '\t'
×
127
            }
128

NEW
129
            Some('n') => {
×
130
                self.eat();
7✔
131
                '\n'
7✔
132
            }
133

NEW
134
            Some('r') => {
×
NEW
135
                self.eat();
×
NEW
136
                '\r'
×
137
            }
138

NEW
139
            Some('0') => {
×
140
                self.eat();
9✔
141
                '\0'
9✔
142
            }
143

144
            Some(&code @ ('x' | 'u')) => {
7✔
145
                self.eat();
7✔
146

147
                let mut digits = String::new();
7✔
148

149
                let braces = match self.chars.peek().copied() {
14✔
150
                    Some('{') if code == 'u' => {
4✔
151
                        self.eat();
2✔
152
                        true
2✔
153
                    }
154
                    _ => false,
5✔
155
                };
156

NEW
157
                loop {
×
158
                    let Some(c) = self.eat() else {
66✔
NEW
159
                        return Err(TokenError::MalformedByteEscape);
×
160
                    };
161

162
                    match c {
28✔
163
                        ';' if !braces => break,
3✔
164
                        '}' if braces => break,
2✔
165
                        c if c.is_ascii_digit() => {
30✔
166
                            digits.push(c);
15✔
167
                        }
168
                        'a'..='f' | 'A'..='F' => {
24✔
169
                            digits.push(c);
11✔
170
                        }
171
                        _ => return Err(TokenError::MalformedByteEscape),
2✔
172
                    }
173
                }
174

175
                let codepoint = u32::from_str_radix(&digits, 16)
10✔
176
                    .map_err(|_| TokenError::MalformedByteEscape)?;
5✔
177
                let char = char::from_u32(codepoint).ok_or(TokenError::MalformedByteEscape)?;
5✔
178

179
                char
4✔
180
            }
181

182
            Some(&start @ (' ' | '\t' | '\n')) => {
2✔
183
                self.eat();
2✔
184

185
                let mut trimming = start == '\n';
2✔
186

NEW
187
                loop {
×
188
                    let Some(c) = self.chars.peek() else {
14✔
NEW
189
                        return Err(TokenError::IncompleteString);
×
190
                    };
191

192
                    match c {
2✔
193
                        ' ' | '\t' => {
4✔
194
                            self.eat();
4✔
195
                        }
196
                        '\n' if !trimming => {
1✔
197
                            self.eat();
1✔
198
                            trimming = true;
1✔
199
                        }
200
                        _ if trimming => return Ok(None),
2✔
201

NEW
202
                        _ => return Err(TokenError::InvalidEscape),
×
203
                    }
204
                }
205
            }
206

NEW
207
            Some(_) => return Err(TokenError::InvalidEscape),
×
208

NEW
209
            None => return Err(TokenError::IncompleteString),
×
210
        };
211

NEW
212
        Ok(Some(c))
×
213
    }
214

215
    fn read_hash_value(&mut self) -> Result<TokenType<Cow<'a, str>>> {
59,546✔
216
        fn parse_char(slice: &str) -> Option<char> {
959✔
217
            use std::str::FromStr;
218

219
            debug_assert!(slice.len() > 2);
1,918✔
220

221
            match &slice[2..] {
959✔
222
                "alarm" | "ALARM" => Some('\x07'),
1,918✔
223
                "backspace" | "BACKSPACE" => Some('\x08'),
1,916✔
224
                "delete" | "DELETE" => Some('\x7F'),
1,914✔
225
                "escape" | "ESCAPE" => Some('\x1B'),
1,912✔
226
                "newline" | "NEWLINE" => Some('\n'),
1,910✔
227
                "null" | "NULL" => Some('\0'),
596✔
228
                "return" | "RETURN" => Some('\r'),
594✔
229
                "space" | "SPACE" => Some(' '),
598✔
230
                "tab" | "TAB" => Some('\t'),
580✔
231
                "\\" => Some('\\'),
289✔
232
                ")" => Some(')'),
290✔
233
                "]" => Some(']'),
290✔
234
                "[" => Some('['),
288✔
235
                "(" => Some('('),
285✔
236
                "^" => Some('^'),
284✔
237

238
                character => {
282✔
239
                    let first = character.as_bytes()[0];
282✔
240

241
                    let escape = (first == b'u' || first == b'x') && slice.len() > 3;
285✔
242

243
                    if !escape {
×
244
                        return char::from_str(character).ok();
267✔
245
                    }
246

247
                    let payload = if first == b'u' && character.as_bytes().get(1) == Some(&b'{') {
41✔
248
                        if character.as_bytes().last() != Some(&b'}') {
5✔
249
                            return None;
1✔
250
                        }
251

252
                        &character[2..(character.len() - 1)]
4✔
253
                    } else {
254
                        &character[1..]
10✔
255
                    };
256

257
                    let code = u32::from_str_radix(payload, 16).ok()?;
14✔
258

259
                    char::from_u32(code)
×
260
                }
261
            }
262
        }
263

264
        while let Some(&c) = self.chars.peek() {
1,442,324✔
265
            match c {
700,333✔
266
                '\\' => {
959✔
267
                    self.eat();
959✔
268
                    self.eat();
959✔
269
                }
270
                '\'' | '`' => {
×
271
                    self.eat();
14✔
272
                    break;
14✔
273
                }
274

275
                ',' => {
×
276
                    self.eat();
3✔
277
                    if Some('@') == self.chars.peek().copied() {
3✔
278
                        self.eat();
1✔
279
                        break;
1✔
280
                    } else {
281
                        break;
2✔
282
                    }
283
                }
284

285
                '(' | '[' | ')' | ']' => break,
19,840✔
286
                c if c.is_whitespace() => break,
779,659✔
287
                _ => {
660,670✔
288
                    self.eat();
660,670✔
289
                }
290
            };
291
        }
292

293
        match self.slice() {
59,546✔
294
            "#true" | "#t" => Ok(TokenType::BooleanLiteral(true)),
119,945✔
295
            "#false" | "#f" => Ok(TokenType::BooleanLiteral(false)),
122,319✔
296

297
            "#'" => Ok(TokenType::QuoteSyntax),
50,690✔
298
            "#`" => Ok(TokenType::QuasiQuoteSyntax),
50,671✔
299
            "#," => Ok(TokenType::UnquoteSyntax),
50,667✔
300
            "#,@" => Ok(TokenType::UnquoteSpliceSyntax),
50,664✔
301

302
            hex if hex.starts_with("#x") => {
50,786✔
303
                let hex = isize::from_str_radix(hex.strip_prefix("#x").unwrap(), 16)
124✔
304
                    .map_err(|_| TokenError::MalformedHexInteger)?;
124✔
305

306
                Ok(IntLiteral::Small(hex).into())
×
307
            }
308

309
            octal if octal.starts_with("#o") => {
50,610✔
310
                let hex = isize::from_str_radix(octal.strip_prefix("#o").unwrap(), 8)
10✔
311
                    .map_err(|_| TokenError::MalformedOctalInteger)?;
10✔
312

313
                Ok(IntLiteral::Small(hex).into())
×
314
            }
315

316
            binary if binary.starts_with("#b") => {
50,601✔
317
                let hex = isize::from_str_radix(binary.strip_prefix("#b").unwrap(), 2)
6✔
318
                    .map_err(|_| TokenError::MalformedBinaryInteger)?;
6✔
319

320
                Ok(IntLiteral::Small(hex).into())
×
321
            }
322

323
            keyword if keyword.starts_with("#:") => Ok(TokenType::Keyword(self.slice().into())),
65,648✔
324

325
            character if character.starts_with("#\\") => {
44,982✔
326
                if character.len() <= 2 {
959✔
327
                    return Err(TokenError::InvalidCharacter);
×
328
                }
329

330
                if let Some(parsed_character) = parse_char(character) {
955✔
331
                    Ok(TokenType::CharacterLiteral(parsed_character))
×
332
                } else {
333
                    Err(TokenError::InvalidCharacter)
4✔
334
                }
335
            }
336

337
            "#" if self.chars.peek() == Some(&'(') => {
42,723✔
338
                self.eat();
309✔
339
                Ok(TokenType::OpenParen(Paren::Round, Some(ParenMod::Vector)))
309✔
340
            }
341

342
            "#u8" if self.chars.peek() == Some(&'(') => {
42,030✔
343
                self.eat();
117✔
344
                Ok(TokenType::OpenParen(Paren::Round, Some(ParenMod::Bytes)))
117✔
345
            }
346

347
            _ => self.read_word(),
41,679✔
348
        }
349
    }
350

351
    fn read_number(&mut self) -> Result<TokenType<Cow<'a, str>>> {
32,601✔
352
        while let Some(&c) = self.chars.peek() {
137,914✔
353
            match c {
12,360✔
354
                c if c.is_ascii_digit() => {
72,010✔
355
                    self.eat();
36,005✔
356
                }
357
                '+' | '-' | '.' | '/' | 'e' | 'E' | 'i' => {
363✔
358
                    self.eat();
363✔
359
                }
360
                '(' | ')' | '[' | ']' => {
×
361
                    return if let Some(t) = parse_number(self.slice()) {
40,434✔
NEW
362
                        Ok(t.into())
×
363
                    } else {
364
                        self.read_word()
×
365
                    }
366
                }
367
                c if c.is_whitespace() => {
37,074✔
368
                    return if let Some(t) = parse_number(self.slice()) {
24,704✔
NEW
369
                        Ok(t.into())
×
370
                    } else {
371
                        self.read_word()
10✔
372
                    }
373
                }
374
                _ => return self.read_word(),
3✔
375
            }
376
        }
377
        match parse_number(self.slice()) {
24✔
378
            Some(n) => Ok(n.into()),
23✔
379
            None => self.read_word(),
1✔
380
        }
381
    }
382

383
    fn read_rest_of_line(&mut self) {
55,329✔
384
        while let Some(c) = self.eat() {
4,755,843✔
385
            if c == '\n' {
×
386
                break;
55,320✔
387
            }
388
        }
389
    }
390

391
    fn read_word(&mut self) -> Result<TokenType<Cow<'a, str>>> {
926,529✔
392
        let escaped_identifier = self.chars.peek().copied() == Some('|');
926,529✔
393

394
        if escaped_identifier {
926,536✔
395
            self.eat();
7✔
396
        }
397

398
        let mut ident_buffer = IdentBuffer::new(self.chars.clone());
926,529✔
399

400
        while let Some(&c) = self.chars.peek() {
15,068,054✔
401
            match c {
7,290,155✔
402
                '|' if escaped_identifier => {
7✔
403
                    self.eat();
7✔
404

405
                    break;
7✔
406
                }
407
                '\\' if escaped_identifier => {
2✔
408
                    self.eat();
2✔
409

410
                    let escaped = self.read_string_escape().map_err(|err| match err {
4✔
NEW
411
                        TokenError::IncompleteString => TokenError::IncompleteIdentifier,
×
NEW
412
                        err => err,
×
413
                    })?;
414

415
                    ident_buffer.push_escape(escaped);
2✔
416
                }
417
                c if escaped_identifier => {
9✔
418
                    ident_buffer.push(c);
9✔
419
                    self.eat();
9✔
420
                }
421
                '(' | '[' | ')' | ']' | '{' | '}' => break,
243,843✔
422
                c if c.is_whitespace() => break,
8,655,457✔
423
                '\'' | '"' | '`' | ';' | ',' => {
×
424
                    break;
6✔
425
                }
426
                // Could be a quote within a word, we should handle escaping it accordingly
427
                // (even though its a bit odd)
428
                '\\' => {
1✔
429
                    self.eat();
1✔
430
                    self.eat();
1✔
431
                }
432

433
                _ => {
6,607,497✔
434
                    self.eat();
6,607,497✔
435
                }
436
            };
437
        }
438

439
        let token = match self.slice() {
1,853,058✔
440
            "." => TokenType::Dot,
927,612✔
441
            "define" | "defn" | "#%define" => TokenType::Define,
2,723,921✔
442
            "let" => TokenType::Let,
885,539✔
443
            "%plain-let" => TokenType::TestLet,
862,195✔
444
            "return!" => TokenType::Return,
859,060✔
445
            "begin" => TokenType::Begin,
867,484✔
446
            "lambda" | "fn" | "#%plain-lambda" | "λ" => TokenType::Lambda,
3,373,251✔
447
            "quote" => TokenType::Quote,
842,408✔
448
            // "unquote" => TokenType::Unquote,
449
            "syntax-rules" => TokenType::SyntaxRules,
821,657✔
450
            "define-syntax" => TokenType::DefineSyntax,
818,310✔
451
            "..." => TokenType::Ellipses,
828,337✔
452
            "set!" => TokenType::Set,
799,365✔
453
            "require" => TokenType::Require,
787,466✔
454
            "if" => TokenType::If,
798,602✔
455
            INFINITY => RealLiteral::Float(f64::INFINITY).into(),
767,053✔
456
            NEG_INFINITY => RealLiteral::Float(f64::NEG_INFINITY).into(),
767,043✔
457
            NAN => RealLiteral::Float(f64::NAN).into(),
767,040✔
458
            NEG_NAN => RealLiteral::Float(f64::NAN).into(),
767,033✔
459
            identifier => {
767,027✔
460
                debug_assert!(!identifier.is_empty());
1,534,054✔
461

462
                match identifier.as_bytes() {
767,027✔
463
                    [b'+', _, ..] if self.queued.is_none() => {
767,027✔
NEW
464
                        self.queued = Some(TokenType::Identifier((&identifier[1..]).into()));
×
NEW
465
                        TokenType::Identifier("+".into())
×
466
                    }
467
                    [b'|', .., b'|'] if escaped_identifier => {
7✔
468
                        if ident_buffer.ident.is_empty() {
7✔
469
                            TokenType::Identifier((&identifier[1..identifier.len() - 1]).into())
5✔
470
                        } else {
471
                            TokenType::Identifier(ident_buffer.ident.into())
2✔
472
                        }
473
                    }
NEW
474
                    _ if escaped_identifier => {
×
NEW
475
                        return Err(TokenError::IncompleteIdentifier);
×
476
                    }
477
                    _ => TokenType::Identifier(identifier.into()),
767,020✔
478
                }
479
            }
480
        };
481

NEW
482
        Ok(token)
×
483
    }
484

485
    fn read_nestable_comment(&mut self) -> Result<TokenType<Cow<'a, str>>> {
3✔
486
        self.eat();
3✔
487

488
        let mut depth = 1;
3✔
489

490
        while let Some(c) = self.eat() {
37✔
491
            match c {
×
492
                '|' => {
×
493
                    if self.chars.peek().copied() == Some('#') {
3✔
494
                        self.eat();
3✔
495
                        depth -= 1;
3✔
496

497
                        if depth == 0 {
3✔
498
                            return Ok(TokenType::Comment);
2✔
499
                        }
500
                    }
501
                }
502
                '#' => {
×
503
                    if self.chars.peek().copied() == Some('|') {
2✔
504
                        self.eat();
1✔
505
                        depth += 1;
1✔
506
                    }
507
                }
508
                _ => {}
14✔
509
            }
510
        }
511

512
        Err(TokenError::IncompleteComment)
1✔
513
    }
514
}
515

516
struct IdentBuffer<'a> {
517
    chars: Peekable<Chars<'a>>,
518
    ident: String,
519
    // works as Either:
520
    //  - Ok: saw a non-trivial escape, buffering into ident
521
    //  - Err: "trivial" string, keeping count of its len
522
    mode: std::result::Result<(), usize>,
523
}
524

525
impl<'a> IdentBuffer<'a> {
526
    fn new(chars: Peekable<Chars<'a>>) -> Self {
926,529✔
527
        Self {
528
            chars,
529
            ident: Default::default(),
926,529✔
530
            mode: Err(0),
926,529✔
531
        }
532
    }
533

534
    fn push(&mut self, c: char) {
9✔
535
        if let Err(len) = self.mode.as_mut() {
17✔
NEW
536
            *len += 1;
×
537
        } else {
538
            self.ident.push(c);
1✔
539
        }
540
    }
541

542
    fn push_escape(&mut self, c: Option<char>) {
2✔
543
        if let Err(len) = self.mode {
4✔
NEW
544
            self.ident.extend(self.chars.clone().take(len));
×
NEW
545
            self.mode = Ok(());
×
546
        }
547

548
        if let Some(c) = c {
3✔
NEW
549
            self.ident.push(c);
×
550
        }
551
    }
552
}
553

554
fn strip_shebang_line(input: &str) -> (&str, usize, usize) {
2,530✔
555
    if input.starts_with("#!") {
2,530✔
556
        let stripped = input.trim_start_matches("#!");
×
557
        let result = match stripped.char_indices().skip_while(|x| x.1 != '\n').next() {
×
558
            Some((pos, _)) => &stripped[pos..],
×
559
            None => "",
×
560
        };
561

562
        let original = input.len();
×
563
        let new = result.len();
×
564

565
        (
566
            result,
×
567
            original - new,
×
568
            input.as_bytes().len() - result.as_bytes().len(),
×
569
        )
570
    } else {
571
        (input, 0, 0)
2,530✔
572
    }
573
}
574

575
impl<'a> Lexer<'a> {
576
    #[inline]
577
    pub fn span(&self) -> Span {
3,981✔
578
        self.token_start..self.token_end
3,981✔
579
    }
580

581
    #[inline]
582
    pub fn slice(&self) -> &'a str {
3,256,970✔
583
        self.source.get(self.span()).unwrap()
3,256,970✔
584
    }
585
}
586

587
pub struct TokenStream<'a> {
588
    pub(crate) lexer: Lexer<'a>,
589
    skip_comments: bool,
590
    source_id: Option<SourceId>,
591
}
592

593
impl<'a> TokenStream<'a> {
594
    pub fn new(input: &'a str, skip_comments: bool, source_id: Option<SourceId>) -> Self {
2,530✔
595
        let (_, char_offset, bytes_offset) = strip_shebang_line(input);
2,530✔
596

597
        let mut res = Self {
598
            lexer: Lexer::new(input),
2,530✔
599
            skip_comments,
600
            source_id, // skip_doc_comments,
601
        };
602

603
        res.lexer.token_start += bytes_offset;
2,530✔
604
        res.lexer.token_end += bytes_offset;
2,530✔
605

606
        for _ in 0..char_offset {
2,530✔
607
            res.lexer.chars.next();
×
608
        }
609

610
        res
2,530✔
611
    }
612

613
    pub fn into_owned<T, F: ToOwnedString<T>>(self, adapter: F) -> OwnedTokenStream<'a, T, F> {
2,494✔
614
        OwnedTokenStream {
615
            stream: self,
616
            adapter,
617
            _token_type: PhantomData,
618
        }
619
    }
620
}
621

622
pub struct OwnedTokenStream<'a, T, F> {
623
    pub(crate) stream: TokenStream<'a>,
624
    adapter: F,
625
    _token_type: PhantomData<T>,
626
}
627

628
impl<'a, T, F: ToOwnedString<T>> Iterator for OwnedTokenStream<'a, T, F> {
629
    type Item = Token<'a, T>;
630

631
    fn next(&mut self) -> Option<Self::Item> {
2,233,029✔
632
        self.stream.next().map(|x| Token {
4,463,604✔
633
            ty: x.ty.map(|x| self.adapter.own(x)),
5,235,637✔
634
            source: x.source,
2,230,575✔
635
            span: x.span,
2,230,575✔
636
        })
637
    }
638
}
639

640
impl<'a, T, F: ToOwnedString<T>> OwnedTokenStream<'a, T, F> {
641
    pub fn offset(&self) -> usize {
9✔
642
        self.stream.lexer.span().end
9✔
643
    }
644
}
645
impl<'a> Iterator for TokenStream<'a> {
646
    type Item = Token<'a, Cow<'a, str>>;
647

648
    fn next(&mut self) -> Option<Self::Item> {
2,233,243✔
649
        self.lexer.next().and_then(|token| {
4,464,012✔
650
            let token = match token {
4,461,538✔
651
                Ok(token) => token,
2,230,761✔
652
                Err(_) => TokenType::Error,
8✔
653
            };
654

655
            let token = Token::new(token, self.lexer.slice(), self.lexer.span(), self.source_id);
2,230,769✔
656
            match token.ty {
55,329✔
657
                // TokenType::Space => self.next(),
658
                TokenType::Comment if self.skip_comments => self.next(),
3✔
659
                // TokenType::DocComment if self.skip_doc_comments => self.next(),
660
                _ => Some(token),
2,230,766✔
661
            }
662
        })
663
    }
664
}
665

666
pub type Result<T> = std::result::Result<T, TokenError>;
667

668
#[derive(Clone, Debug, PartialEq)]
669
pub enum TokenError {
670
    UnexpectedChar(char),
671
    IncompleteString,
672
    IncompleteIdentifier,
673
    IncompleteComment,
674
    InvalidEscape,
675
    InvalidCharacter,
676
    MalformedHexInteger,
677
    MalformedOctalInteger,
678
    MalformedBinaryInteger,
679
    MalformedByteEscape,
680
}
681

682
impl<'a> Iterator for Lexer<'a> {
683
    type Item = Result<TokenType<Cow<'a, str>>>;
684

685
    fn next(&mut self) -> Option<Self::Item> {
2,233,249✔
686
        if let Some(t) = self.queued.take() {
2,233,249✔
687
            return Some(Ok(t));
×
688
        }
689
        // Crunch until the next input
690
        self.consume_whitespace();
2,233,249✔
691

692
        self.token_start = self.token_end;
2,233,249✔
693

694
        match self.chars.peek() {
2,233,249✔
695
            Some(';') => {
×
696
                self.eat();
55,329✔
697
                self.read_rest_of_line();
55,329✔
698
                Some(Ok(TokenType::Comment))
55,329✔
699
            }
700

701
            Some('"') => Some(self.read_string()),
24,834✔
702

703
            Some(&paren @ ('(' | '[' | '{')) => {
560,413✔
704
                self.eat();
560,413✔
705
                let kind = match paren {
1,120,826✔
706
                    '[' => Paren::Square,
52,287✔
707
                    '{' => Paren::Curly,
×
708
                    _ => Paren::Round,
508,126✔
709
                };
710
                Some(Ok(TokenType::OpenParen(kind, None)))
560,413✔
711
            }
712

713
            Some(&paren @ (')' | ']' | '}')) => {
560,818✔
714
                self.eat();
560,818✔
715
                let kind = match paren {
1,121,636✔
716
                    ']' => Paren::Square,
52,288✔
717
                    '}' => Paren::Curly,
×
718
                    _ => Paren::Round,
508,530✔
719
                };
720
                Some(Ok(TokenType::CloseParen(kind)))
560,818✔
721
            }
722

723
            // Handle Quotes
724
            Some('\'') => {
×
725
                self.eat();
23,760✔
726
                Some(Ok(TokenType::QuoteTick))
23,760✔
727
            }
728

729
            Some('`') => {
×
730
                self.eat();
7,469✔
731
                Some(Ok(TokenType::QuasiQuote))
7,469✔
732
            }
733

734
            Some(',') => {
×
735
                self.eat();
21,156✔
736

737
                if let Some('@') = self.chars.peek() {
21,156✔
738
                    self.eat();
2,161✔
739

740
                    Some(Ok(TokenType::UnquoteSplice))
2,161✔
741
                } else {
742
                    Some(Ok(TokenType::Unquote))
18,995✔
743
                }
744
            }
745
            Some('+') | Some('-') => {
×
746
                self.eat();
8,059✔
747
                match self.chars.peek() {
8,059✔
748
                    Some(&c) if c.is_ascii_digit() => Some(self.read_number()),
8,380✔
749
                    _ => Some(self.read_word()),
7,898✔
750
                }
751
            }
752
            Some('#') => {
×
753
                self.eat();
59,559✔
754
                let next = self.chars.peek().copied();
59,559✔
755

756
                let token = match next {
119,118✔
757
                    Some('|') => self.read_nestable_comment(),
3✔
758
                    Some(';') => {
×
759
                        self.eat();
10✔
760
                        Ok(TokenType::DatumComment)
10✔
761
                    }
762
                    _ => self.read_hash_value(),
59,546✔
763
                };
764

765
                Some(token)
×
766
            }
767

768
            Some(c) if !c.is_whitespace() && !c.is_ascii_digit() || *c == '_' => {
2,728,134✔
NEW
769
                Some(self.read_word())
×
770
            }
771
            Some(c) if c.is_ascii_digit() => Some(self.read_number()),
97,320✔
772
            Some(_) => self.eat().map(|e| Err(TokenError::UnexpectedChar(e))),
×
773
            None => None,
2,474✔
774
        }
775
    }
776
}
777

778
// Split the string by + and -. Returns at most 2 elements or `None` if there were more than 2.
779
fn split_into_complex<'a>(s: &'a str) -> Option<SmallVec<[NumPart<'a>; 2]>> {
32,598✔
780
    let classify_num_part = |s: &'a str| -> NumPart<'a> {
65,238✔
781
        match s.chars().last() {
32,640✔
782
            Some('i') => NumPart::Imaginary(&s[..s.len() - 1]),
47✔
783
            _ => NumPart::Real(s),
32,593✔
784
        }
785
    };
786
    let idxs: SmallVec<[usize; 3]> = s
32,598✔
787
        .char_indices()
788
        .filter(|(_, ch)| *ch == '+' || *ch == '-')
138,199✔
789
        .map(|(idx, _)| idx)
65,402✔
790
        .take(3)
791
        .collect();
792
    let parts = match idxs.as_slice() {
65,195✔
793
        [] | [0] => SmallVec::from_iter(std::iter::once(s).map(classify_num_part)),
65,345✔
794
        [idx] | [0, idx] => {
43✔
795
            SmallVec::from_iter([&s[0..*idx], &s[*idx..]].into_iter().map(classify_num_part))
43✔
796
        }
797
        _ => return None,
1✔
798
    };
799
    Some(parts)
×
800
}
801

802
#[derive(Debug)]
803
enum NumPart<'a> {
804
    Real(&'a str),
805
    Imaginary(&'a str),
806
}
807

808
fn parse_real(s: &str) -> Option<RealLiteral> {
32,636✔
809
    let mut has_e = false;
32,636✔
810
    let mut has_dot = false;
32,636✔
811
    let mut frac_position = None;
32,636✔
812
    for (idx, ch) in s.chars().enumerate() {
69,099✔
813
        match ch {
36,463✔
814
            '+' => {
815
                if idx != 0 {
37✔
816
                    return None;
×
817
                }
818
            }
819
            '-' => {
820
                if idx != 0 {
164✔
821
                    return None;
×
822
                }
823
            }
824
            'e' | 'E' => {
825
                if has_e {
17✔
826
                    return None;
1✔
827
                };
828
                has_e = true;
16✔
829
            }
830
            '/' => {
831
                frac_position = match frac_position {
63✔
832
                    Some(_) => return None,
1✔
833
                    None => Some(idx),
62✔
834
                }
835
            }
836
            '.' => {
837
                if has_dot {
189✔
838
                    return None;
1✔
839
                }
840
                has_dot = true
188✔
841
            }
842
            _ => {}
35,993✔
843
        }
844
    }
845
    if has_e || has_dot {
65,252✔
846
        s.parse().map(|f| RealLiteral::Float(f)).ok()
395✔
847
    } else if let Some(p) = frac_position {
32,493✔
848
        let (n_str, d_str) = s.split_at(p);
849
        let d_str = &d_str[1..];
850
        let n: IntLiteral = n_str.parse().ok()?;
59✔
851
        let d: IntLiteral = d_str.parse().ok()?;
59✔
852
        Some(RealLiteral::Rational(n, d))
853
    } else {
854
        let int: IntLiteral = s.parse().ok()?;
64,750✔
855
        Some(RealLiteral::Int(int))
856
    }
857
}
858

859
fn parse_number(s: &str) -> Option<NumberLiteral> {
32,598✔
860
    match split_into_complex(s)?.as_slice() {
65,196✔
861
        [NumPart::Real(x)] => parse_real(x).map(NumberLiteral::from),
32,550✔
862
        [NumPart::Imaginary(x)] => {
4✔
863
            if !matches!(x.chars().next(), Some('+') | Some('-')) {
4✔
864
                return None;
×
865
            };
866
            Some(NumberLiteral::Complex(IntLiteral::Small(0).into(), parse_real(x)?).into())
8✔
867
        }
868
        [NumPart::Real(re), NumPart::Imaginary(im)]
41✔
869
        | [NumPart::Imaginary(im), NumPart::Real(re)] => {
×
870
            Some(NumberLiteral::Complex(parse_real(re)?, parse_real(im)?))
83✔
871
        }
872
        _ => None,
2✔
873
    }
874
}
875

876
#[cfg(test)]
877
mod lexer_tests {
878
    use std::str::FromStr;
879

880
    use super::*;
881
    use crate::span::Span;
882
    use crate::tokens::{IntLiteral, TokenType::*};
883
    use pretty_assertions::assert_eq;
884

885
    fn identifier(ident: &str) -> TokenType<Cow<str>> {
886
        Identifier(ident.into())
887
    }
888

889
    // TODO: Figure out why this just cause an infinite loop when parsing it?
890
    #[test]
891
    fn test_identifier_with_quote_end() {
892
        let s = TokenStream::new(
893
            "        (define (stream-cdr stream)
894
            ((stream-cdr' stream)))
895
",
896
            true,
897
            SourceId::none(),
898
        );
899

900
        for token in s {
901
            println!("{:?}", token);
902
        }
903
    }
904

905
    #[test]
906
    fn test_bracket_characters() {
907
        let s = TokenStream::new(
908
            "[(equal? #\\[ (car chars)) (b (cdr chars) (+ sum 1))]",
909
            true,
910
            SourceId::none(),
911
        );
912

913
        for token in s {
914
            println!("{:?}", token);
915
        }
916
    }
917

918
    #[test]
919
    fn test_escape_in_string() {
920
        let s = TokenStream::new(r#"(display "}\n")"#, true, SourceId::none());
921

922
        for token in s {
923
            println!("{:?}", token);
924
        }
925
    }
926

927
    #[test]
928
    fn test_quote_within_word() {
929
        let mut s = TokenStream::new("'foo\\'a", true, SourceId::none());
930

931
        println!("{:?}", s.next());
932
        println!("{:?}", s.next());
933
        println!("{:?}", s.next());
934
    }
935

936
    #[test]
937
    fn test_single_period() {
938
        let mut s = TokenStream::new(".", true, SourceId::none());
939

940
        println!("{:?}", s.next());
941
    }
942

943
    #[test]
944
    fn test_chars() {
945
        let mut s = TokenStream::new("#\\a #\\b #\\λ", true, SourceId::none());
946

947
        assert_eq!(
948
            s.next(),
949
            Some(Token {
950
                ty: CharacterLiteral('a'),
951
                source: "#\\a",
952
                span: Span::new(0, 3, SourceId::none())
953
            })
954
        );
955
        assert_eq!(
956
            s.next(),
957
            Some(Token {
958
                ty: CharacterLiteral('b'),
959
                source: "#\\b",
960
                span: Span::new(4, 7, SourceId::none())
961
            })
962
        );
963
        assert_eq!(
964
            s.next(),
965
            Some(Token {
966
                ty: CharacterLiteral('λ'),
967
                source: "#\\λ",
968
                span: Span::new(8, 12, SourceId::none())
969
            })
970
        );
971
    }
972

973
    #[test]
974
    fn test_unicode_escapes() {
975
        let mut s = TokenStream::new(
976
            r#"  #\xAb #\u{0D300} #\u0540 "\x00D;" "\u1044;" "\u{045}"  "#,
977
            true,
978
            SourceId::none(),
979
        );
980

981
        assert_eq!(
982
            s.next().unwrap(),
983
            Token {
984
                ty: CharacterLiteral('«'),
985
                source: r#"#\xAb"#,
986
                span: Span::new(2, 7, SourceId::none())
987
            }
988
        );
989

990
        assert_eq!(
991
            s.next().unwrap(),
992
            Token {
993
                ty: CharacterLiteral('팀'),
994
                source: r#"#\u{0D300}"#,
995
                span: Span::new(8, 18, SourceId::none())
996
            }
997
        );
998

999
        assert_eq!(
1000
            s.next().unwrap(),
1001
            Token {
1002
                ty: CharacterLiteral('Հ'),
1003
                source: r#"#\u0540"#,
1004
                span: Span::new(19, 26, SourceId::none())
1005
            }
1006
        );
1007

1008
        assert_eq!(
1009
            s.next().unwrap(),
1010
            Token {
1011
                ty: StringLiteral(Arc::from("\r".to_string())),
1012
                source: r#""\x00D;""#,
1013
                span: Span::new(27, 35, SourceId::none())
1014
            }
1015
        );
1016

1017
        assert_eq!(
1018
            s.next().unwrap(),
1019
            Token {
1020
                ty: StringLiteral(Arc::from("၄".to_string())),
1021
                source: r#""\u1044;""#,
1022
                span: Span::new(36, 45, SourceId::none())
1023
            }
1024
        );
1025

1026
        assert_eq!(
1027
            s.next().unwrap(),
1028
            Token {
1029
                ty: StringLiteral(Arc::from("E".to_string())),
1030
                source: r#""\u{045}""#,
1031
                span: Span::new(46, 55, SourceId::none())
1032
            }
1033
        );
1034
    }
1035

1036
    #[test]
1037
    fn test_invalid_unicode_escapes() {
1038
        let tokens = [
1039
            r#" #\xd820 "#,
1040
            r#" #\u{1 "#,
1041
            r#" "\xabx" "#,
1042
            r#" "\u0045" "#,
1043
            r#" #\xaaaaaaaa " "#,
1044
            r#" "\u{ffffffff}" "#,
1045
            r#" #\u{} "#,
1046
        ];
1047

1048
        for token in tokens {
1049
            let mut s = TokenStream::new(token, true, SourceId::none());
1050

1051
            assert_eq!(s.next().unwrap().ty, Error, "{:?} should be invalid", token);
1052
        }
1053
    }
1054

1055
    #[test]
1056
    fn test_string_newlines() {
1057
        let mut s = TokenStream::new(
1058
            " \"foo\nbar\" \"foo \\  \n   bar\" ",
1059
            true,
1060
            SourceId::none(),
1061
        );
1062

1063
        assert_eq!(
1064
            s.next().unwrap(),
1065
            Token {
1066
                ty: StringLiteral(Arc::from("foo\nbar".to_string())),
1067
                source: "\"foo\nbar\"",
1068
                span: Span::new(1, 10, SourceId::none())
1069
            }
1070
        );
1071

1072
        assert_eq!(
1073
            s.next().unwrap(),
1074
            Token {
1075
                ty: StringLiteral(Arc::from("foo bar".to_string())),
1076
                source: "\"foo \\  \n   bar\"",
1077
                span: Span::new(11, 27, SourceId::none())
1078
            }
1079
        );
1080
    }
1081

1082
    #[test]
1083
    fn test_unexpected_char() {
1084
        let mut s = TokenStream::new("($)", true, SourceId::none());
1085
        assert_eq!(
1086
            s.next(),
1087
            Some(Token {
1088
                ty: OpenParen(Paren::Round, None),
1089
                source: "(",
1090
                span: Span::new(0, 1, SourceId::none())
1091
            })
1092
        );
1093
        assert_eq!(
1094
            s.next(),
1095
            Some(Token {
1096
                ty: identifier("$"),
1097
                source: "$",
1098
                span: Span::new(1, 2, SourceId::none())
1099
            })
1100
        );
1101
        assert_eq!(
1102
            s.next(),
1103
            Some(Token {
1104
                ty: CloseParen(Paren::Round),
1105
                source: ")",
1106
                span: Span::new(2, 3, SourceId::none())
1107
            })
1108
        );
1109
    }
1110

1111
    #[test]
1112
    fn test_words() {
1113
        let mut s = TokenStream::new("foo FOO _123_ Nil #f #t", true, SourceId::none());
1114

1115
        assert_eq!(
1116
            s.next(),
1117
            Some(Token {
1118
                ty: identifier("foo"),
1119
                source: "foo",
1120
                span: Span::new(0, 3, SourceId::none())
1121
            })
1122
        );
1123

1124
        assert_eq!(
1125
            s.next(),
1126
            Some(Token {
1127
                ty: identifier("FOO"),
1128
                source: "FOO",
1129
                span: Span::new(4, 7, SourceId::none())
1130
            })
1131
        );
1132

1133
        assert_eq!(
1134
            s.next(),
1135
            Some(Token {
1136
                ty: identifier("_123_"),
1137
                source: "_123_",
1138
                span: Span::new(8, 13, SourceId::none())
1139
            })
1140
        );
1141

1142
        assert_eq!(
1143
            s.next(),
1144
            Some(Token {
1145
                ty: identifier("Nil"),
1146
                source: "Nil",
1147
                span: Span::new(14, 17, SourceId::none())
1148
            })
1149
        );
1150

1151
        assert_eq!(
1152
            s.next(),
1153
            Some(Token {
1154
                ty: BooleanLiteral(false),
1155
                source: "#f",
1156
                span: Span::new(18, 20, SourceId::none())
1157
            })
1158
        );
1159

1160
        assert_eq!(
1161
            s.next(),
1162
            Some(Token {
1163
                ty: BooleanLiteral(true),
1164
                source: "#t",
1165
                span: Span::new(21, 23, SourceId::none())
1166
            })
1167
        );
1168

1169
        assert_eq!(s.next(), None);
1170
    }
1171

1172
    #[test]
1173
    fn test_almost_literals() {
1174
        let got: Vec<_> =
1175
            TokenStream::new("1e 1ee 1.2e5.4 1E10/4 1.45# 3- e10", true, SourceId::none())
1176
                .collect();
1177
        assert_eq!(
1178
            got.as_slice(),
1179
            &[
1180
                Token {
1181
                    ty: identifier("1e"),
1182
                    source: "1e",
1183
                    span: Span::new(0, 2, SourceId::none()),
1184
                },
1185
                Token {
1186
                    ty: identifier("1ee"),
1187
                    source: "1ee",
1188
                    span: Span::new(3, 6, SourceId::none()),
1189
                },
1190
                Token {
1191
                    ty: identifier("1.2e5.4"),
1192
                    source: "1.2e5.4",
1193
                    span: Span::new(7, 14, SourceId::none()),
1194
                },
1195
                Token {
1196
                    ty: identifier("1E10/4"),
1197
                    source: "1E10/4",
1198
                    span: Span::new(15, 21, SourceId::none()),
1199
                },
1200
                Token {
1201
                    ty: identifier("1.45#"),
1202
                    source: "1.45#",
1203
                    span: Span::new(22, 27, SourceId::none()),
1204
                },
1205
                Token {
1206
                    ty: identifier("3-"),
1207
                    source: "3-",
1208
                    span: Span::new(28, 30, SourceId::none()),
1209
                },
1210
                Token {
1211
                    ty: identifier("e10"),
1212
                    source: "e10",
1213
                    span: Span::new(31, 34, SourceId::none()),
1214
                },
1215
            ]
1216
        );
1217
    }
1218

1219
    #[test]
1220
    fn test_real_numbers() {
1221
        let got: Vec<_> = TokenStream::new(
1222
            "0 -0 -1.2 +2.3 999 1. 1e2 1E2 1.2e2 1.2E2 +inf.0 -inf.0",
1223
            true,
1224
            SourceId::none(),
1225
        )
1226
        .collect();
1227
        assert_eq!(
1228
            got.as_slice(),
1229
            &[
1230
                Token {
1231
                    ty: IntLiteral::Small(0).into(),
1232
                    source: "0",
1233
                    span: Span::new(0, 1, SourceId::none()),
1234
                },
1235
                Token {
1236
                    ty: IntLiteral::Small(0).into(),
1237
                    source: "-0",
1238
                    span: Span::new(2, 4, SourceId::none()),
1239
                },
1240
                Token {
1241
                    ty: RealLiteral::Float(-1.2).into(),
1242
                    source: "-1.2",
1243
                    span: Span::new(5, 9, SourceId::none()),
1244
                },
1245
                Token {
1246
                    ty: RealLiteral::Float(2.3).into(),
1247
                    source: "+2.3",
1248
                    span: Span::new(10, 14, SourceId::none()),
1249
                },
1250
                Token {
1251
                    ty: IntLiteral::Small(999).into(),
1252
                    source: "999",
1253
                    span: Span::new(15, 18, SourceId::none()),
1254
                },
1255
                Token {
1256
                    ty: RealLiteral::Float(1.0).into(),
1257
                    source: "1.",
1258
                    span: Span::new(19, 21, SourceId::none()),
1259
                },
1260
                Token {
1261
                    ty: RealLiteral::Float(100.0).into(),
1262
                    source: "1e2",
1263
                    span: Span::new(22, 25, SourceId::none()),
1264
                },
1265
                Token {
1266
                    ty: RealLiteral::Float(100.0).into(),
1267
                    source: "1E2",
1268
                    span: Span::new(26, 29, SourceId::none()),
1269
                },
1270
                Token {
1271
                    ty: RealLiteral::Float(120.0).into(),
1272
                    source: "1.2e2",
1273
                    span: Span::new(30, 35, SourceId::none()),
1274
                },
1275
                Token {
1276
                    ty: RealLiteral::Float(120.0).into(),
1277
                    source: "1.2E2",
1278
                    span: Span::new(36, 41, SourceId::none()),
1279
                },
1280
                Token {
1281
                    ty: RealLiteral::Float(f64::INFINITY).into(),
1282
                    source: "+inf.0",
1283
                    span: Span::new(42, 48, SourceId::none()),
1284
                },
1285
                Token {
1286
                    ty: RealLiteral::Float(f64::NEG_INFINITY).into(),
1287
                    source: "-inf.0",
1288
                    span: Span::new(49, 55, SourceId::none()),
1289
                },
1290
            ]
1291
        );
1292
    }
1293

1294
    #[test]
1295
    fn test_nan() {
1296
        // nan does not equal nan so we have to run the is_nan predicate.
1297
        let got = TokenStream::new("+nan.0", true, SourceId::none())
1298
            .next()
1299
            .unwrap();
1300

1301
        match got.ty {
1302
            TokenType::Number(n) => {
1303
                assert!(matches!(*n, NumberLiteral::Real(RealLiteral::Float(x)) if x.is_nan()))
1304
            }
1305

1306
            _ => panic!("Didn't match"),
1307
        }
1308

1309
        let got = TokenStream::new("-nan.0", true, None).next().unwrap();
1310

1311
        match got.ty {
1312
            TokenType::Number(n) => {
1313
                assert!(matches!(*n, NumberLiteral::Real(RealLiteral::Float(x)) if x.is_nan()))
1314
            }
1315

1316
            _ => panic!("Didn't match"),
1317
        }
1318
    }
1319

1320
    #[test]
1321
    fn test_rationals() {
1322
        let got: Vec<_> = TokenStream::new(
1323
            r#"
1324
                1/4
1325
                (1/4 1/3)
1326
                11111111111111111111/22222222222222222222
1327
                /
1328
                1/
1329
                1/4.0
1330
                1//4
1331
                1 / 4
1332
"#,
1333
            true,
1334
            SourceId::none(),
1335
        )
1336
        .collect();
1337
        assert_eq!(
1338
            got.as_slice(),
1339
            &[
1340
                Token {
1341
                    ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(4)).into(),
1342
                    source: "1/4",
1343
                    span: Span::new(17, 20, SourceId::none()),
1344
                },
1345
                Token {
1346
                    ty: OpenParen(Paren::Round, None),
1347
                    source: "(",
1348
                    span: Span::new(37, 38, SourceId::none()),
1349
                },
1350
                Token {
1351
                    ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(4)).into(),
1352
                    source: "1/4",
1353
                    span: Span::new(38, 41, SourceId::none()),
1354
                },
1355
                Token {
1356
                    ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(3)).into(),
1357
                    source: "1/3",
1358
                    span: Span::new(42, 45, SourceId::none()),
1359
                },
1360
                Token {
1361
                    ty: CloseParen(Paren::Round),
1362
                    source: ")",
1363
                    span: Span::new(45, 46, SourceId::none()),
1364
                },
1365
                Token {
1366
                    ty: RealLiteral::Rational(
1367
                        IntLiteral::from_str("11111111111111111111").unwrap(),
1368
                        IntLiteral::from_str("22222222222222222222").unwrap(),
1369
                    )
1370
                    .into(),
1371
                    source: "11111111111111111111/22222222222222222222",
1372
                    span: Span::new(63, 104, SourceId::none()),
1373
                },
1374
                Token {
1375
                    ty: identifier("/"),
1376
                    source: "/",
1377
                    span: Span::new(121, 122, SourceId::none()),
1378
                },
1379
                Token {
1380
                    ty: identifier("1/"),
1381
                    source: "1/",
1382
                    span: Span::new(139, 141, SourceId::none()),
1383
                },
1384
                Token {
1385
                    ty: identifier("1/4.0"),
1386
                    source: "1/4.0",
1387
                    span: Span::new(158, 163, SourceId::none()),
1388
                },
1389
                Token {
1390
                    ty: identifier("1//4"),
1391
                    source: "1//4",
1392
                    span: Span::new(180, 184, SourceId::none()),
1393
                },
1394
                Token {
1395
                    ty: IntLiteral::Small(1).into(),
1396
                    source: "1",
1397
                    span: Span::new(201, 202, SourceId::none()),
1398
                },
1399
                Token {
1400
                    ty: identifier("/"),
1401
                    source: "/",
1402
                    span: Span::new(203, 204, SourceId::none()),
1403
                },
1404
                Token {
1405
                    ty: IntLiteral::Small(4).into(),
1406
                    source: "4",
1407
                    span: Span::new(205, 206, SourceId::none()),
1408
                },
1409
            ]
1410
        );
1411
    }
1412

1413
    #[test]
1414
    fn test_complex_numbers() {
1415
        let got: Vec<_> = TokenStream::new(
1416
            "1+2i 3-4i +5+6i +1i 1.0+2.0i 3-4.0i +1.0i",
1417
            true,
1418
            SourceId::none(),
1419
        )
1420
        .collect();
1421
        assert_eq!(
1422
            got.as_slice(),
1423
            &[
1424
                Token {
1425
                    ty: NumberLiteral::Complex(
1426
                        IntLiteral::Small(1).into(),
1427
                        IntLiteral::Small(2).into()
1428
                    )
1429
                    .into(),
1430
                    source: "1+2i",
1431
                    span: Span::new(0, 4, SourceId::none()),
1432
                },
1433
                Token {
1434
                    ty: NumberLiteral::Complex(
1435
                        IntLiteral::Small(3).into(),
1436
                        IntLiteral::Small(-4).into()
1437
                    )
1438
                    .into(),
1439
                    source: "3-4i",
1440
                    span: Span::new(5, 9, SourceId::none()),
1441
                },
1442
                Token {
1443
                    ty: NumberLiteral::Complex(
1444
                        IntLiteral::Small(5).into(),
1445
                        IntLiteral::Small(6).into()
1446
                    )
1447
                    .into(),
1448
                    source: "+5+6i",
1449
                    span: Span::new(10, 15, SourceId::none()),
1450
                },
1451
                Token {
1452
                    ty: NumberLiteral::Complex(
1453
                        IntLiteral::Small(0).into(),
1454
                        IntLiteral::Small(1).into()
1455
                    )
1456
                    .into(),
1457
                    source: "+1i",
1458
                    span: Span::new(16, 19, SourceId::none()),
1459
                },
1460
                Token {
1461
                    ty: NumberLiteral::Complex(
1462
                        RealLiteral::Float(1.0).into(),
1463
                        RealLiteral::Float(2.0).into()
1464
                    )
1465
                    .into(),
1466
                    source: "1.0+2.0i",
1467
                    span: Span::new(20, 28, SourceId::none()),
1468
                },
1469
                Token {
1470
                    ty: NumberLiteral::Complex(
1471
                        IntLiteral::Small(3).into(),
1472
                        RealLiteral::Float(-4.0).into()
1473
                    )
1474
                    .into(),
1475
                    source: "3-4.0i",
1476
                    span: Span::new(29, 35, SourceId::none()),
1477
                },
1478
                Token {
1479
                    ty: NumberLiteral::Complex(
1480
                        IntLiteral::Small(0).into(),
1481
                        RealLiteral::Float(1.0).into()
1482
                    )
1483
                    .into(),
1484
                    source: "+1.0i",
1485
                    span: Span::new(36, 41, SourceId::none()),
1486
                },
1487
            ]
1488
        );
1489
    }
1490

1491
    #[test]
1492
    fn test_malformed_complex_numbers_are_identifiers() {
1493
        let got: Vec<_> =
1494
            TokenStream::new("i -i 1i+1i 4+i -4+-2i", true, SourceId::none()).collect();
1495
        assert_eq!(
1496
            got.as_slice(),
1497
            &[
1498
                Token {
1499
                    ty: identifier("i"),
1500
                    source: "i",
1501
                    span: Span::new(0, 1, SourceId::none()),
1502
                },
1503
                Token {
1504
                    ty: identifier("-i"),
1505
                    source: "-i",
1506
                    span: Span::new(2, 4, SourceId::none()),
1507
                },
1508
                Token {
1509
                    ty: identifier("1i+1i"),
1510
                    source: "1i+1i",
1511
                    span: Span::new(5, 10, SourceId::none()),
1512
                },
1513
                Token {
1514
                    ty: identifier("4+i"),
1515
                    source: "4+i",
1516
                    span: Span::new(11, 14, SourceId::none()),
1517
                },
1518
                Token {
1519
                    ty: identifier("-4+-2i"),
1520
                    source: "-4+-2i",
1521
                    span: Span::new(15, 21, SourceId::none()),
1522
                },
1523
            ]
1524
        );
1525
    }
1526

1527
    #[test]
1528
    fn test_string() {
1529
        let got: Vec<_> =
1530
            TokenStream::new(r#" "" "Foo bar" "\"\\" "#, true, SourceId::none()).collect();
1531
        assert_eq!(
1532
            got.as_slice(),
1533
            &[
1534
                Token {
1535
                    ty: StringLiteral(Arc::new(r#""#.to_string())),
1536
                    source: r#""""#,
1537
                    span: Span::new(1, 3, SourceId::none()),
1538
                },
1539
                Token {
1540
                    ty: StringLiteral(Arc::new(r#"Foo bar"#.to_string())),
1541
                    source: r#""Foo bar""#,
1542
                    span: Span::new(4, 13, SourceId::none()),
1543
                },
1544
                Token {
1545
                    ty: StringLiteral(Arc::new(r#""\"#.to_string())),
1546
                    source: r#""\"\\""#,
1547
                    span: Span::new(14, 20, SourceId::none()),
1548
                },
1549
            ]
1550
        );
1551
    }
1552

1553
    #[test]
1554
    fn test_comment() {
1555
        let mut s = TokenStream::new(";!/usr/bin/gate\n   ; foo\n", true, SourceId::none());
1556
        assert_eq!(s.next(), None);
1557
    }
1558

1559
    #[test]
1560
    fn function_definition() {
1561
        let s = TokenStream::new(
1562
            "(define odd-rec? (lambda (x) (if (= x 0) #f (even-rec? (- x 1)))))",
1563
            true,
1564
            SourceId::none(),
1565
        );
1566
        let res: Vec<Token<Cow<str>>> = s.collect();
1567

1568
        println!("{:#?}", res);
1569
    }
1570

1571
    #[test]
1572
    fn lex_string_with_escape_chars() {
1573
        let s = TokenStream::new("\"\0\0\0\"", true, SourceId::none());
1574
        let res: Vec<Token<Cow<str>>> = s.collect();
1575
        println!("{:#?}", res);
1576
    }
1577

1578
    #[test]
1579
    fn scheme_statement() {
1580
        let s = TokenStream::new("(apples (function a b) (+ a b))", true, SourceId::none());
1581
        let res: Vec<Token<Cow<str>>> = s.collect();
1582

1583
        let expected: Vec<Token<Cow<str>>> = vec![
1584
            Token {
1585
                ty: OpenParen(Paren::Round, None),
1586
                source: "(",
1587
                span: Span::new(0, 1, SourceId::none()),
1588
            },
1589
            Token {
1590
                ty: identifier("apples"),
1591
                source: "apples",
1592
                span: Span::new(1, 7, SourceId::none()),
1593
            },
1594
            Token {
1595
                ty: OpenParen(Paren::Round, None),
1596
                source: "(",
1597
                span: Span::new(8, 9, SourceId::none()),
1598
            },
1599
            Token {
1600
                ty: identifier("function"),
1601
                source: "function",
1602
                span: Span::new(9, 17, SourceId::none()),
1603
            },
1604
            Token {
1605
                ty: identifier("a"),
1606
                source: "a",
1607
                span: Span::new(18, 19, SourceId::none()),
1608
            },
1609
            Token {
1610
                ty: identifier("b"),
1611
                source: "b",
1612
                span: Span::new(20, 21, SourceId::none()),
1613
            },
1614
            Token {
1615
                ty: CloseParen(Paren::Round),
1616
                source: ")",
1617
                span: Span::new(21, 22, SourceId::none()),
1618
            },
1619
            Token {
1620
                ty: OpenParen(Paren::Round, None),
1621
                source: "(",
1622
                span: Span::new(23, 24, SourceId::none()),
1623
            },
1624
            Token {
1625
                ty: identifier("+"),
1626
                source: "+",
1627
                span: Span::new(24, 25, SourceId::none()),
1628
            },
1629
            Token {
1630
                ty: identifier("a"),
1631
                source: "a",
1632
                span: Span::new(26, 27, SourceId::none()),
1633
            },
1634
            Token {
1635
                ty: identifier("b"),
1636
                source: "b",
1637
                span: Span::new(28, 29, SourceId::none()),
1638
            },
1639
            Token {
1640
                ty: CloseParen(Paren::Round),
1641
                source: ")",
1642
                span: Span::new(29, 30, SourceId::none()),
1643
            },
1644
            Token {
1645
                ty: CloseParen(Paren::Round),
1646
                source: ")",
1647
                span: Span::new(30, 31, SourceId::none()),
1648
            },
1649
        ];
1650

1651
        assert_eq!(res, expected);
1652
    }
1653

1654
    #[test]
1655
    fn test_bigint() {
1656
        let s = TokenStream::new("9223372036854775808", true, SourceId::none()); // isize::MAX + 1
1657
        let res: Vec<Token<Cow<str>>> = s.collect();
1658

1659
        let expected_bigint = Box::new("9223372036854775808".parse().unwrap());
1660

1661
        let expected: Vec<Token<Cow<str>>> = vec![Token {
1662
            ty: IntLiteral::Big(expected_bigint).into(),
1663
            source: "9223372036854775808",
1664
            span: Span::new(0, 19, SourceId::none()),
1665
        }];
1666

1667
        assert_eq!(res, expected);
1668
    }
1669

1670
    #[test]
1671
    fn negative_test_bigint() {
1672
        let s = TokenStream::new("-9223372036854775809", true, SourceId::none()); // isize::MIN - 1
1673
        let res: Vec<Token<Cow<str>>> = s.collect();
1674

1675
        let expected_bigint = Box::new("-9223372036854775809".parse().unwrap());
1676

1677
        let expected: Vec<Token<Cow<str>>> = vec![Token {
1678
            ty: IntLiteral::Big(expected_bigint).into(),
1679
            source: "-9223372036854775809",
1680
            span: Span::new(0, 20, SourceId::none()),
1681
        }];
1682

1683
        assert_eq!(res, expected);
1684
    }
1685

1686
    #[test]
1687
    fn identifier_test() {
1688
        let s = TokenStream::new("a b(c`d'e\"www\"f,g;", true, SourceId::none());
1689

1690
        let tokens: Vec<(TokenType<Cow<str>>, &str)> =
1691
            s.map(|token| (token.ty, token.source)).collect();
1692

1693
        assert_eq!(tokens[0], (identifier("a"), "a"));
1694
        assert_eq!(tokens[1], (identifier("b"), "b"));
1695
        assert_eq!(tokens[3], (identifier("c"), "c"));
1696
        assert_eq!(tokens[5], (identifier("d"), "d"));
1697
        assert_eq!(tokens[7], (identifier("e"), "e"));
1698
        assert_eq!(tokens[9], (identifier("f"), "f"));
1699
        assert_eq!(tokens[11], (identifier("g"), "g"));
1700
    }
1701

1702
    #[test]
1703
    fn vector_test() {
1704
        let s = TokenStream::new("a b #(c d)", true, None);
1705

1706
        let tokens: Vec<(TokenType<Cow<str>>, &str)> =
1707
            s.map(|token| (token.ty, token.source)).collect();
1708

1709
        assert_eq!(tokens[0], (identifier("a"), "a"));
1710
        assert_eq!(tokens[1], (identifier("b"), "b"));
1711
        assert_eq!(
1712
            tokens[2],
1713
            (
1714
                TokenType::OpenParen(Paren::Round, Some(ParenMod::Vector)),
1715
                "#("
1716
            )
1717
        );
1718
        assert_eq!(tokens[3], (identifier("c"), "c"));
1719
        assert_eq!(tokens[4], (identifier("d"), "d"));
1720
    }
1721

1722
    #[test]
1723
    fn bytevector_test() {
1724
        let s = TokenStream::new("a b #u8(1 2)", true, None);
1725

1726
        let tokens: Vec<(TokenType<Cow<str>>, &str)> =
1727
            s.map(|token| (token.ty, token.source)).collect();
1728

1729
        assert_eq!(tokens[0], (identifier("a"), "a"));
1730
        assert_eq!(tokens[1], (identifier("b"), "b"));
1731
        assert_eq!(
1732
            tokens[2],
1733
            (
1734
                TokenType::OpenParen(Paren::Round, Some(ParenMod::Bytes)),
1735
                "#u8("
1736
            )
1737
        );
1738
        assert_eq!(tokens[5], (TokenType::CloseParen(Paren::Round), ")"));
1739
    }
1740

1741
    #[test]
1742
    fn special_comments_test() {
1743
        let mut lexer = Lexer::new("#| f(\n [ |#");
1744
        assert_eq!(lexer.next(), Some(Ok(TokenType::Comment)));
1745

1746
        let mut lexer = Lexer::new("#| a #| ( |# |#");
1747
        assert_eq!(lexer.next(), Some(Ok(TokenType::Comment)));
1748

1749
        let mut lexer = Lexer::new("#;(a b)");
1750
        assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1751

1752
        let mut lexer = Lexer::new("#; #(#true 3)");
1753
        assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1754

1755
        let mut lexer = Lexer::new("#; #; 3 5");
1756
        assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1757
    }
1758

1759
    #[test]
1760
    fn comment_error_test() {
1761
        let mut lexer = Lexer::new("#|");
1762

1763
        assert_eq!(lexer.next().unwrap(), Err(TokenError::IncompleteComment));
1764
    }
1765

1766
    #[test]
1767
    fn escaped_identifier_test() {
1768
        let mut s = TokenStream::new(r#"|a| |a b| |\x61;| |.|"#, true, SourceId::none());
1769

1770
        assert_eq!(
1771
            s.next().unwrap(),
1772
            Token {
1773
                ty: identifier("a"),
1774
                source: "|a|",
1775
                span: Span::new(0, 3, None),
1776
            },
1777
        );
1778

1779
        assert_eq!(
1780
            s.next().unwrap(),
1781
            Token {
1782
                ty: identifier("a b"),
1783
                source: "|a b|",
1784
                span: Span::new(4, 9, None),
1785
            },
1786
        );
1787

1788
        assert_eq!(
1789
            s.next().unwrap(),
1790
            Token {
1791
                ty: identifier("a"),
1792
                source: r#"|\x61;|"#,
1793
                span: Span::new(10, 17, None),
1794
            },
1795
        );
1796

1797
        assert_eq!(
1798
            s.next().unwrap(),
1799
            Token {
1800
                ty: identifier("."),
1801
                source: "|.|",
1802
                span: Span::new(18, 21, None),
1803
            },
1804
        );
1805

1806
        let mut s = TokenStream::new("|a\\\nb|", true, SourceId::none());
1807

1808
        assert_eq!(
1809
            s.next().unwrap(),
1810
            Token {
1811
                ty: identifier("ab"),
1812
                source: "|a\\\nb|",
1813
                span: Span::new(0, 6, None),
1814
            },
1815
        );
1816
    }
1817
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc