17061797732

Committed 19 Aug 2025 06:45AM UTC coverage: 76.633% (+1.0%) from 75.585%

Build # 17061797732

Build Type

push

github

Committed by

vcfxb

Commit Message

chore: cargo fmt

Run Details

28 of 31 new or added lines in 5 files covered. (90.32%)

26 existing lines in 3 files now uncovered.

1056 of 1378 relevant lines covered (76.63%)

38.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.83

/wright/src/parser.rs

//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
//!
//! [AST]: crate::ast
//! [Token]: crate::lexer::token::Token

use error::{ParserError, ParserErrorKind};

use super::lexer::Lexer;
use crate::{
    lexer::token::{Token, TokenTy},
    source_tracking::fragment::Fragment,
};
use std::collections::VecDeque;

mod decl;
pub mod error;
mod identifier;
mod literal;
mod path;
mod ty;

/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
#[derive(Debug)]
pub struct Parser {
    lexer: Lexer,
    lookahead: VecDeque<Token>,
}

impl Parser {
    /// Construct a new parser around a given [Lexer].
    pub fn new(lexer: Lexer) -> Self {
        Parser {
            lexer,
            lookahead: VecDeque::new(),
        }
    }

    /// Get the [Lexer] that's wrapped.
    pub fn lexer(&self) -> &Lexer {
        &self.lexer
    }

    /// Lookahead `k` [Token]s.
    ///
    /// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
    pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
        while self.lookahead.len() <= k {
            self.lookahead.push_back(self.lexer.next_token()?);
        }

        // SAFETY: It's certain that if this function reaches this line, this access is infallible.
        Some(unsafe { self.lookahead.get(k).unwrap_unchecked() })
    }

    /// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
    pub fn peek(&mut self) -> Option<&Token> {
        self.lookahead(0)
    }

    /// Peek the [Fragment] of the next [Token].
    pub fn peek_fragment(&mut self) -> Option<&Fragment> {
        self.peek().map(|token| &token.fragment)
    }

    /// Peek the [TokenTy] of the next [Token].
    pub fn peek_variant(&mut self) -> Option<TokenTy> {
        self.peek().map(|token| token.variant)
    }

    /// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
    ///
    /// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
    pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {
        // Pull tokens from lexer
        while self.lookahead.len() < n {
            self.lookahead.push_back(self.lexer.next_token()?);
        }

        // Use make contiguous here to get a unified/single slice.
        Some(&self.lookahead.make_contiguous()[..n])
    }

    /// Peek the next token that's not whitespace.
    pub fn peek_next_not_whitespace(&mut self) -> Option<&Token> {
        // There's no way to do this in safe rust, despite the memory accesses being fine,
        // so we do it unsafely here

        for i in 0.. {
            let peek = self.lookahead(i)?;

            if peek.variant != TokenTy::Whitespace {
                // This bit prevents the rust compiler from thinking we're breaking
                // lifetime/aliasing rules by mutating the internal state in the next
                // iteration of the loop while still holding a reference to the peeked token.
                unsafe {
                    let const_ref = &raw const *peek;
                    let upcast = &*const_ref;
                    return Some(upcast);
                }
            }
        }

        // Safety: For large enough values of `i`, self.lookahead eventually has to return `None`
        unsafe { std::hint::unreachable_unchecked() }
    }

    /// Get the number of remaining bytes on this parser. This is potentially useful for checking
    /// if a parser has advanced between two calls (or checking if a parser has reached end of input).
    pub fn bytes_remaining(&self) -> usize {
        let bytes_remaining_in_lookahead_buffer = self
            .lookahead
            .iter()
            .map(|t| t.fragment.len())
            .sum::<usize>();

        let bytes_remaining_in_lexer = self.lexer.bytes_remaining();

        bytes_remaining_in_lexer + bytes_remaining_in_lookahead_buffer
    }

    /// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
    ///
    /// Skips any non-document comments encountered via the lexer implementation.
    ///
    /// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
    pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
        let token = self
            .lookahead
            .pop_front()
            .or_else(|| self.lexer.next_token());

        // Check for unknown tokens, which should always convert to an error.
        if let Some(ref t) = token
            && t.variant == TokenTy::Unknown
        {
            // Clone here is avoidable but this code path is super unlikely and
            // probably optimized heavily.
            Err(ParserErrorKind::EncounteredUnknownToken.at(t.fragment.clone()))
        } else {
            Ok(token)
        }
    }

    /// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
    ///
    /// Panics
    /// - If `n` is greater than the number of remaining tokens.
    pub fn advance(&mut self, n: usize) {
        // Add tokens to the lookahead buffer until we have enough to split off.
        while self.lookahead.len() < n {
            let token = self
                .lexer
                .next_token()
                .expect("advance: `n` <= number of remaining tokens");

            self.lookahead.push_back(token);
        }

        // Split them off.
        self.lookahead = self.lookahead.split_off(n);
    }

    /// Peek the [Fragment] of the next [Token] and clone it or return a clone of the
    /// remainder [Fragment] of the internal [Lexer]
    /// (which will be empty, since there wasn't a [Token] to peek).
    ///
    /// This is likely only useful for error reporting -- a clone of a potentially empty fragment is
    /// rarely ever useful otherwise.
    pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment {
        match self.peek() {
            Some(Token { fragment, .. }) => fragment.clone(),
            None => {
                let rest = self.lexer.remaining.clone();

                // Assert that we're making the right assumptions about the remaining fragment.
                // These are (unidiomatically) done using debug_assert -- perhaps that changes eventually
                // however it should be fine for now, since this can only produce logic bugs (never memory or
                // concurrency bugs).
                debug_assert!(rest.is_valid());
                debug_assert!(rest.is_empty());
                debug_assert!(rest.is_empty_at_end_of_source());

                rest
            }
        }
    }

    /// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
    pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
        // Peeking successfully first means that the lookahead vec will never be empty here.
        (self.peek_variant()? == token_ty)
            // SAFETY: We just peeked a token to check its variant so this unwrap is always ok.
            .then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
    }

    /// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
    /// sequence of [TokenTy]s.
    pub fn matches(&mut self, seq: &[TokenTy]) -> bool {
        // Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
        let Some(lookahead_window) = self.lookahead_window(seq.len()) else {
            return false;
        };

        // Use a zipped iterator to compare all the token variants.
        lookahead_window
            .iter()
            .zip(seq)
            .all(|(token, matches)| token.variant == *matches)
    }

    /// Check if the given sequence of token types equals the next ones (looking ahead through the lexer/parser)
    /// if all future whitespaces are ignored.
    pub fn matches_ignore_whitespace(&mut self, seq: &[TokenTy]) -> bool {
        let mut non_whitespace_in_lookahead: usize = self
            .lookahead
            .iter()
            .filter(|t| t.variant != TokenTy::Whitespace)
            .count();

        while non_whitespace_in_lookahead < seq.len() {
            let Some(pop) = self.lexer.next_token() else {
                return false;
            };

            if pop.variant != TokenTy::Whitespace {
                non_whitespace_in_lookahead += 1;
            }

            self.lookahead.push_back(pop);
        }

        let lookahead_iter = self
            .lookahead
            .iter()
            .filter(|t| t.variant != TokenTy::Whitespace)
            .map(|t| t.variant);

        // Check that all the variants are the same.
        seq.iter()
            .zip(lookahead_iter)
            .all(|(seq_item, lookahead_item)| *seq_item == lookahead_item)
    }

    /// Consume & remove all whitespace tokens from the front of the parser.
    pub fn consume_optional_whitespace(&mut self) {
        // Iterate until the next token is not whitespace.
        while self.next_if_is(TokenTy::Whitespace).is_some() {}
    }

    /// Require a whitespace from the [Parser]. Do not advance if the next [Token] is not a whitespace.
    pub fn consume_at_least_one_whitespace(&mut self) -> Result<(), ParserError> {
        if self.next_if_is(TokenTy::Whitespace).is_some() {
            self.consume_optional_whitespace();
            Ok(())
        } else {
            Err(ParserErrorKind::ExpectedWhitespace.at(self.peek_fragment_or_rest_cloned()))
        }
    }
}

1	//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
2	//!
3	//! [AST]: crate::ast
4	//! [Token]: crate::lexer::token::Token
5
6	use error::{ParserError, ParserErrorKind};
7
8	use super::lexer::Lexer;
9	use crate::{
10	lexer::token::{Token, TokenTy},
11	source_tracking::fragment::Fragment,
12	};
13	use std::collections::VecDeque;
14
15	mod decl;
16	pub mod error;
17	mod identifier;
18	mod literal;
19	mod path;
20	mod ty;
21
22	/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
23	#[derive(Debug)]
24	pub struct Parser {
25	lexer: Lexer,
26	lookahead: VecDeque<Token>,
27	}
28
29	impl Parser {
30	/// Construct a new parser around a given [Lexer].
31	pub fn new(lexer: Lexer) -> Self {	30✔
32	Parser {	30✔
33	lexer,	30✔
34	lookahead: VecDeque::new(),	30✔
35	}	30✔
36	}	30✔
37
38	/// Get the [Lexer] that's wrapped.
39	pub fn lexer(&self) -> &Lexer {	1✔
40	&self.lexer	1✔
41	}	1✔
42
43	/// Lookahead `k` [Token]s.
44	///
45	/// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
46	pub fn lookahead(&mut self, k: usize) -> Option<&Token> {	180✔
47	while self.lookahead.len() <= k {	231✔
48	self.lookahead.push_back(self.lexer.next_token()?);	53✔
49	}
50
51	// SAFETY: It's certain that if this function reaches this line, this access is infallible.
52	Some(unsafe { self.lookahead.get(k).unwrap_unchecked() })	178✔
53	}	180✔
54
55	/// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
56	pub fn peek(&mut self) -> Option<&Token> {	180✔
57	self.lookahead(0)	180✔
58	}	180✔
59
60	/// Peek the [Fragment] of the next [Token].
61	pub fn peek_fragment(&mut self) -> Option<&Fragment> {	×
62	self.peek().map(\|token\| &token.fragment)	×
63	}	×
64
65	/// Peek the [TokenTy] of the next [Token].
66	pub fn peek_variant(&mut self) -> Option<TokenTy> {	156✔
67	self.peek().map(\|token\| token.variant)	156✔
68	}	156✔
69
70	/// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
71	///
72	/// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
UNCOV 73	pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {	×
74	// Pull tokens from lexer
UNCOV 75	while self.lookahead.len() < n {	×
UNCOV 76	self.lookahead.push_back(self.lexer.next_token()?);	×
77	}
78
79	// Use make contiguous here to get a unified/single slice.
UNCOV 80	Some(&self.lookahead.make_contiguous()[..n])	×
UNCOV 81	}	×
82
83	/// Peek the next token that's not whitespace.
84	pub fn peek_next_not_whitespace(&mut self) -> Option<&Token> {	×
85	// There's no way to do this in safe rust, despite the memory accesses being fine,
86	// so we do it unsafely here
87
88	for i in 0.. {	×
89	let peek = self.lookahead(i)?;	×
90
91	if peek.variant != TokenTy::Whitespace {	×
92	// This bit prevents the rust compiler from thinking we're breaking
93	// lifetime/aliasing rules by mutating the internal state in the next
94	// iteration of the loop while still holding a reference to the peeked token.
95	unsafe {
96	let const_ref = &raw const *peek;	×
97	let upcast = &*const_ref;	×
98	return Some(upcast);	×
99	}
100	}	×
101	}
102
103	// Safety: For large enough values of `i`, self.lookahead eventually has to return `None`
104	unsafe { std::hint::unreachable_unchecked() }	×
105	}	×
106
107	/// Get the number of remaining bytes on this parser. This is potentially useful for checking
108	/// if a parser has advanced between two calls (or checking if a parser has reached end of input).
109	pub fn bytes_remaining(&self) -> usize {	31✔
110	let bytes_remaining_in_lookahead_buffer = self	31✔
111	.lookahead	31✔
112	.iter()	31✔
113	.map(\|t\| t.fragment.len())	31✔
114	.sum::<usize>();	31✔
115
116	let bytes_remaining_in_lexer = self.lexer.bytes_remaining();	31✔
117
118	bytes_remaining_in_lexer + bytes_remaining_in_lookahead_buffer	31✔
119	}	31✔
120
121	/// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
122	///
123	/// Skips any non-document comments encountered via the lexer implementation.
124	///
125	/// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
126	pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {	9✔
127	let token = self	9✔
128	.lookahead	9✔
129	.pop_front()	9✔
130	.or_else(\|\| self.lexer.next_token());	9✔
131
132	// Check for unknown tokens, which should always convert to an error.
133	if let Some(ref t) = token	9✔
134	&& t.variant == TokenTy::Unknown	9✔
135	{
136	// Clone here is avoidable but this code path is super unlikely and
137	// probably optimized heavily.
138	Err(ParserErrorKind::EncounteredUnknownToken.at(t.fragment.clone()))	×
139	} else {
140	Ok(token)	9✔
141	}
142	}	9✔
143
144	/// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
145	///
146	/// Panics
147	/// - If `n` is greater than the number of remaining tokens.
148	pub fn advance(&mut self, n: usize) {	14✔
149	// Add tokens to the lookahead buffer until we have enough to split off.
150	while self.lookahead.len() < n {	14✔
151	let token = self	×
152	.lexer	×
153	.next_token()	×
154	.expect("advance: `n` <= number of remaining tokens");	×
155		×
156	self.lookahead.push_back(token);	×
157	}	×
158
159	// Split them off.
160	self.lookahead = self.lookahead.split_off(n);	14✔
161	}	14✔
162
163	/// Peek the [Fragment] of the next [Token] and clone it or return a clone of the
164	/// remainder [Fragment] of the internal [Lexer]
165	/// (which will be empty, since there wasn't a [Token] to peek).
166	///
167	/// This is likely only useful for error reporting -- a clone of a potentially empty fragment is
168	/// rarely ever useful otherwise.
169	pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment {	24✔
170	match self.peek() {	24✔
171	Some(Token { fragment, .. }) => fragment.clone(),	23✔
172	None => {
173	let rest = self.lexer.remaining.clone();	1✔
174
175	// Assert that we're making the right assumptions about the remaining fragment.
176	// These are (unidiomatically) done using debug_assert -- perhaps that changes eventually
177	// however it should be fine for now, since this can only produce logic bugs (never memory or
178	// concurrency bugs).
179	debug_assert!(rest.is_valid());	1✔
180	debug_assert!(rest.is_empty());	1✔
181	debug_assert!(rest.is_empty_at_end_of_source());	1✔
182
183	rest	1✔
184	}
185	}
186	}	24✔
187
188	/// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
189	pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {	142✔
190	// Peeking successfully first means that the lookahead vec will never be empty here.
191	(self.peek_variant()? == token_ty)	142✔
192	// SAFETY: We just peeked a token to check its variant so this unwrap is always ok.
193	.then(\|\| unsafe { self.lookahead.pop_front().unwrap_unchecked() })	141✔
194	}	142✔
195
196	/// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
197	/// sequence of [TokenTy]s.
UNCOV 198	pub fn matches(&mut self, seq: &[TokenTy]) -> bool {	×
199	// Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
UNCOV 200	let Some(lookahead_window) = self.lookahead_window(seq.len()) else {	×
UNCOV 201	return false;	×
202	};
203
204	// Use a zipped iterator to compare all the token variants.
UNCOV 205	lookahead_window	×
UNCOV 206	.iter()	×
UNCOV 207	.zip(seq)	×
UNCOV 208	.all(\|(token, matches)\| token.variant == *matches)	×
UNCOV 209	}	×
210
211	/// Check if the given sequence of token types equals the next ones (looking ahead through the lexer/parser)
212	/// if all future whitespaces are ignored.
213	pub fn matches_ignore_whitespace(&mut self, seq: &[TokenTy]) -> bool {	35✔
214	let mut non_whitespace_in_lookahead: usize = self	35✔
215	.lookahead	35✔
216	.iter()	35✔
217	.filter(\|t\| t.variant != TokenTy::Whitespace)	35✔
218	.count();	35✔
219
220	while non_whitespace_in_lookahead < seq.len() {	90✔
221	let Some(pop) = self.lexer.next_token() else {	63✔
222	return false;	8✔
223	};
224
225	if pop.variant != TokenTy::Whitespace {	55✔
226	non_whitespace_in_lookahead += 1;	37✔
227	}	37✔
228
229	self.lookahead.push_back(pop);	55✔
230	}
231
232	let lookahead_iter = self	27✔
233	.lookahead	27✔
234	.iter()	27✔
235	.filter(\|t\| t.variant != TokenTy::Whitespace)	52✔
236	.map(\|t\| t.variant);	27✔
237
238	// Check that all the variants are the same.
239	seq.iter()	27✔
240	.zip(lookahead_iter)	27✔
241	.all(\|(seq_item, lookahead_item)\| *seq_item == lookahead_item)	39✔
242	}	35✔
243
244	/// Consume & remove all whitespace tokens from the front of the parser.
245	pub fn consume_optional_whitespace(&mut self) {	53✔
246	// Iterate until the next token is not whitespace.
247	while self.next_if_is(TokenTy::Whitespace).is_some() {}	64✔
248	}	53✔
249
250	/// Require a whitespace from the [Parser]. Do not advance if the next [Token] is not a whitespace.
251	pub fn consume_at_least_one_whitespace(&mut self) -> Result<(), ParserError> {	8✔
252	if self.next_if_is(TokenTy::Whitespace).is_some() {	8✔
253	self.consume_optional_whitespace();	8✔
254	Ok(())	8✔
255	} else {
UNCOV 256	Err(ParserErrorKind::ExpectedWhitespace.at(self.peek_fragment_or_rest_cloned()))	×
257	}
258	}	8✔
259	}

vcfxb / wright-lang / 17061797732

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous