• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vcfxb / wright-lang / 14051529229

25 Mar 2025 04:13AM UTC coverage: 75.076% (+20.9%) from 54.177%
14051529229

push

github

vcfxb
coveralls gh action needed a version

994 of 1324 relevant lines covered (75.08%)

29.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

48.28
/wright/src/source_tracking/source.rs
1
//! Structure and implementation for storing source code items used by the wright compiler, including
2
//! source files from disk, source strings used in test cases, and source strings created at
3
//! run-time by an API consumer.
4

5
use super::SourceRef;
6
use super::{filename::FileName, fragment::Fragment, immutable_string::ImmutableString};
7
use std::io;
8
use std::path::PathBuf;
9
use std::sync::atomic::{AtomicU64, Ordering};
10
use std::{fs::File, sync::Arc};
11

12
#[cfg(feature = "file_memmap")]
13
use std::{sync::mpsc, thread, time::Duration};
14

15
#[cfg(feature = "file_memmap")]
16
use fs4::fs_std::FileExt;
17

18
#[cfg(feature = "file_memmap")]
19
use memmap2::Mmap;
20

21
#[cfg(feature = "file_memmap")]
22
use crate::reporting::Diagnostic;
23

24
/// Amount of time before we should warn the user about locking the file taking too long.
25
#[cfg(feature = "file_memmap")]
26
pub const FILE_LOCK_WARNING_TIME: Duration = Duration::from_secs(5);
27

28
/// The global [Source::id] generator.
29
///
30
/// This is just a global [u64] that gets incremented everytime a new source is instantiated.
31
static SOURCE_ID_GENERATOR: AtomicU64 = AtomicU64::new(1);
32

33
/// A process-unique source id, that is atomically generated and assigned to each [Source] on creation.
34
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
35
pub struct SourceId(u64);
36

37
/// A full source. This is usually a file, but may also be passed in the form of a string for testing.  
38
#[derive(Debug)]
39
pub struct Source {
40
    /// Globally (process-wide) unique [Source] ID.
41
    ///
42
    /// It is fequently useful to have a consistient way to sort sources and check for equality between sources.
43
    /// This cannot be done with the [Source::name] since that can be [FileName::None], and checking for equality
44
    /// of content can be an expensive process.
45
    ///
46
    /// The id of a [Source] is an identifier that's globally unique for the runtime of the program, and is assigned to
47
    /// the [Source] when it is instantiated.
48
    pub id: SourceId,
49

50
    /// The name of this source file.
51
    name: FileName,
52

53
    /// The content of this source file.
54
    source: ImmutableString,
55

56
    /// A list of byte indicies into the [Source::source] indicating where lines starts.
57
    line_starts: Vec<usize>,
58
}
59

60
impl Source {
61
    /// Construct a new [Source].
62
    fn new(name: FileName, source: ImmutableString) -> Self {
51✔
63
        Source {
51✔
64
            // I believe we can use relaxed ordering here, since as long as all operations are atomic,
51✔
65
            // we're not really worried about another thread's `fetch_add` being re-ordered before this one, since
51✔
66
            // neither will get the same number.
51✔
67
            id: SourceId(SOURCE_ID_GENERATOR.fetch_add(1, Ordering::Relaxed)),
51✔
68
            name,
51✔
69
            line_starts: source.line_starts().collect(),
51✔
70
            source,
51✔
71
        }
51✔
72
    }
51✔
73

74
    /// Create a [Source] using a heap allocated [String].
75
    pub fn new_from_string(name: FileName, source: String) -> Self {
12✔
76
        Source::new(name, ImmutableString::new_owned(source.into_boxed_str()))
12✔
77
    }
12✔
78

79
    /// Create a [Source] from a [`&'static str`].
80
    ///
81
    /// [`&'static str`]: str
82
    pub fn new_from_static_str(name: FileName, source: &'static str) -> Self {
39✔
83
        Source::new(name, ImmutableString::new_static(source))
39✔
84
    }
39✔
85

86
    /// Attempt to memory map a file from the disk into a [Source].
87
    /// This will likely be faster than reading the file in some cases, and almost always more memory efficient.
88
    ///
89
    /// This requires the "file_memmap" feature.
90
    #[cfg(feature = "file_memmap")]
91
    pub fn new_mapped_from_disk(path: PathBuf) -> io::Result<Self> {
×
92
        use crate::source_tracking::SourceMap;
93

94
        // Make a one-off enum here to use for channel messages.
95
        enum ChannelMessage {
96
            /// The file was successfully locked.
97
            FileLocked(File),
98
            /// There was an error locking the file.
99
            LockingError(io::Error),
100
            /// File is taking a long time to lock.
101
            FiveSecondWarning,
102
        }
103

104
        // Open the file for reading.
105
        let file: File = File::open(&path)?;
×
106

107
        // Create two threads and a mpsc channel for warning the user if
108
        // locking the file takes longer than 5 seconds.
109
        let (tx, rx) = mpsc::sync_channel::<ChannelMessage>(0);
×
110
        let timeout_tx = tx.clone();
×
111

×
112
        // Thread to lock the file
×
113
        thread::spawn(move || match file.lock_exclusive() {
×
114
            Ok(_) => tx.send(ChannelMessage::FileLocked(file)),
×
115
            Err(err) => tx.send(ChannelMessage::LockingError(err)),
×
116
        });
×
117

×
118
        // Thread to warn user if it takes too long.
×
119
        thread::spawn(move || {
×
120
            thread::sleep(FILE_LOCK_WARNING_TIME);
×
121
            timeout_tx.send(ChannelMessage::FiveSecondWarning)
×
122
        });
×
123

124
        // Use an infinite loop to make sure we recieve all the messages from the senders.
125
        loop {
126
            match rx.recv() {
×
127
                // Emit the diagnostic for the 5-second warning.
128
                Ok(ChannelMessage::FiveSecondWarning) => {
×
129
                    // Make the diagnostic to show to the user.
×
130
                    let message = format!(
×
131
                        "Getting a file lock on {} has taken more than {} seconds.",
×
132
                        path.display(),
×
133
                        FILE_LOCK_WARNING_TIME.as_secs()
×
134
                    );
×
135

×
136
                    // Wrap the message in a warning diagnostic and print it.
×
137
                    // Add a note to describe what is going on.
×
138
                    Diagnostic::warning()
×
139
                        .with_message(message)
×
140
                        .with_notes(["This may be caused by another process holding or failing to release a lock on this file."])
×
141
                        // Create a dummy empty source map here, since this diagnostic does not have any highlights.
×
142
                        .print(&SourceMap::new())
×
143
                        // If printing a diagnostic fails, we just crash. We should never have to deal with this failing 
×
144
                        // in practice.
×
145
                        .expect("codespan-reporting error");
×
146
                }
×
147

148
                // Handle any io errors locking the file by returning them.
149
                Ok(ChannelMessage::LockingError(io_err)) => Err(io_err)?,
×
150

151
                // Handle success by finishing adding the file to the FileMap.
152
                Ok(ChannelMessage::FileLocked(file)) => {
×
153
                    // The file is now locked, we can memmory map it and add it to the vec.
154
                    // SAFETY: The file should be locked at this point so undefined behaviour from concurrent
155
                    // modification is avoided.
156
                    let mem_map: Mmap = unsafe {
×
157
                        Mmap::map(&file)
×
158
                            // Make sure we (at least try to) unlock the file if there's an issue memory mapping it.
×
159
                            .inspect_err(|_| {
×
160
                                FileExt::unlock(&file)
×
161
                                    .map_err(|err| eprintln!("Error unlocking file: {:?}", err))
×
162
                                    .ok();
×
163
                            })
×
164
                    }?;
×
165

166
                    // Double check that the file is valid utf-8. If not, return an IO error.
167
                    let raw_data: &[u8] = mem_map.as_ref();
×
168

169
                    if let Err(utf8_error) = std::str::from_utf8(raw_data) {
×
170
                        // The file is not valid for us so we should unlock it and return an error.
171
                        FileExt::unlock(&file)
×
172
                            .map_err(|err| eprintln!("Error unlocking file: {:?}", err))
×
173
                            .ok();
×
174

×
175
                        Err(io::Error::new(io::ErrorKind::InvalidData, utf8_error))?;
×
176
                    }
×
177

178
                    // If we get here, the file is valid UTF-8 -- put the memory mapped file in an Immutable string object.
179
                    return Ok(Source::new(
×
180
                        FileName::Real(path),
×
181
                        ImmutableString::new_locked_file(file, mem_map),
×
182
                    ));
×
183
                }
184

185
                Err(_) => unreachable!(
×
186
                    "The reciever should never reach a state where both senders are closed."
×
187
                ),
×
188
            }
189
        }
190
    }
×
191

192
    /// Read a file from the disk into a source. This reads the file, which may take longer than memory mapping it
193
    /// as done in [Self::new_mapped_from_disk]. This does not require the same features and dependencies as memory
194
    /// mapped operations though. This stores the whole file in memory, rather than mapping virtual memory to the disk.
195
    /// That makes this less memory efficient than [Self::new_mapped_from_disk], which may be important on systems
196
    /// where ram is constrained.
197
    ///
198
    /// Use this if the "file_memmap" is not available for some reason.
199
    pub fn new_read_from_disk(path: PathBuf) -> io::Result<Self> {
×
200
        // Open the file for reading.
201
        let file: File = File::open(&path)?;
×
202
        // Read the file to a string.
203
        let content: String = io::read_to_string(&file)?;
×
204
        // Turn that into a Source.
205
        Ok(Self::new(
×
206
            FileName::Real(path),
×
207
            ImmutableString::new_owned(content.into_boxed_str()),
×
208
        ))
×
209
    }
×
210

211
    /// Attempt to open a file using [Self::new_mapped_from_disk] if that feature is designated
212
    /// as available at compile time. If that feature is not available at compile time, or if an [io::Error] is
213
    /// returned from [memmap2], fallback to reading the file from disk using [Self::new_read_from_disk].
214
    pub fn new_mapped_or_read(path: PathBuf) -> io::Result<Self> {
×
215
        #[cfg(feature = "file_memmap")]
×
216
        match Self::new_mapped_from_disk(path.clone()) {
×
217
            ok @ Ok(_) => return ok,
×
218
            Err(e) => {
×
219
                eprintln!(
×
220
                    "warn: attempted to map file at {} and got {e}, falling back to read",
×
221
                    path.display()
×
222
                );
×
223
            }
×
224
        };
×
225

×
226
        Self::new_read_from_disk(path)
×
227
    }
×
228

229
    /// Get byte indices of where lines start in this [Source].
230
    pub fn line_starts(&self) -> &[usize] {
3✔
231
        self.line_starts.as_slice()
3✔
232
    }
3✔
233

234
    /// Get the number of lines in this [Source]. This is identical to [`Self::line_starts`] length.
235
    pub fn count_lines(&self) -> usize {
6✔
236
        self.line_starts.len()
6✔
237
    }
6✔
238

239
    /// Get the line index that a byte index is on in this [Source].
240
    ///
241
    /// If the byte index is greater than the length of the [Source] then the highest possible index will be returned.
242
    pub fn line_index(&self, byte_index: usize) -> usize {
3✔
243
        // Get a list of the byte indices that lines start on.
3✔
244
        let line_starts: &[usize] = self.line_starts();
3✔
245

3✔
246
        // We just want the exact line index if the byte index is at the beginning of a line, otherwise, give us the
3✔
247
        // index of the line-start before it.
3✔
248
        line_starts
3✔
249
            .binary_search(&byte_index)
3✔
250
            // Subtract 1 here to make sure we get the index of the line start before the byte index instead of
3✔
251
            // after.
3✔
252
            .unwrap_or_else(|not_found_index| not_found_index.saturating_sub(1))
3✔
253
    }
3✔
254

255
    /// Get a line of this [Source] as a [Fragment].
256
    /// The returned [Fragment] will contain the line terminating characters at the end of it. If you don't want those,
257
    /// use [Fragment::trim_end].
258
    ///
259
    /// *Note* that this uses `line_index` which is considered 0-indexed -- when displaying line numbers to the user,
260
    /// remember to add 1.
261
    ///
262
    /// # Panics
263
    /// - This will panic if you ask for a line index that's higher than or equal to the number returned
264
    ///     by [`Self::count_lines`].
265
    pub fn get_line(self: Arc<Source>, line_index: usize) -> Fragment {
3✔
266
        if line_index >= self.count_lines() {
3✔
267
            panic!("{} is greater than the number of lines in {}", line_index, self.name);
×
268
        }
3✔
269

3✔
270
        // Get the starting byte index of the line.
3✔
271
        let start_byte_index: usize = self.line_starts[line_index];
3✔
272

273
        // Get the ending byte index of the line / the starting index of the next line/the index of the end of the file.
274
        let end_byte_index: usize = if line_index + 1 == self.count_lines() {
3✔
275
            self.source.len()
3✔
276
        } else {
277
            self.line_starts[line_index + 1]
×
278
        };
279

280
        // Construct the resultant fragment.
281
        let frag = Fragment {
3✔
282
            source: Arc::clone(&self),
3✔
283
            range: start_byte_index..end_byte_index,
3✔
284
        };
3✔
285

3✔
286
        // Debug assert that the fragment is valid. This should always be true but might be useful for testing.
3✔
287
        debug_assert!(frag.is_valid());
3✔
288
        // Return constructed fragment.
289
        frag
3✔
290
    }
3✔
291

292
    /// Get an iterator over all the lines of this [Source]. This calls [Source::get_line] for each element of
293
    /// the returned iterator.
294
    ///
295
    /// The returned [Fragment]s will contain the line terminating characters at the end of them. If you don't want
296
    /// those, use [Iterator::map] and [Fragment::trim_end].
297
    pub fn lines(self: SourceRef) -> impl Iterator<Item = Fragment> {
×
298
        (0..self.count_lines()).map(move |line_index| self.clone().get_line(line_index))
×
299
    }
×
300

301
    /// Get the the source code stored.
302
    pub const fn source(&self) -> &ImmutableString {
631✔
303
        &self.source
631✔
304
    }
631✔
305

306
    /// Get the name of this [Source].
307
    pub const fn name(&self) -> &FileName {
1✔
308
        &self.name
1✔
309
    }
1✔
310

311
    /// Get the entire content of this [Source] as a [Fragment].
312
    pub fn as_fragment(self: SourceRef) -> Fragment {
7✔
313
        let len = self.source.len();
7✔
314
        Fragment {
7✔
315
            source: self,
7✔
316
            range: 0..len,
7✔
317
        }
7✔
318
    }
7✔
319
}
320

321
#[cfg(test)]
322
mod tests {
323
    use std::{sync::mpsc, thread};
324

325
    use crate::source_tracking::filename::FileName;
326

327
    use super::Source;
328

329
    #[test]
330
    fn dozen_threads_dont_share_gids() {
1✔
331
        let (tx, rx) = mpsc::channel();
1✔
332

333
        for i in 0..12 {
13✔
334
            let tx = tx.clone();
12✔
335
            thread::spawn(move || {
12✔
336
                let source = Source::new_from_string(FileName::None, format!("{i}"));
12✔
337
                tx.send(source.id).unwrap();
12✔
338
            });
12✔
339
        }
12✔
340

341
        let mut gids = (0..12).map(|_| rx.recv().unwrap()).collect::<Vec<_>>();
12✔
342

1✔
343
        let original_len = gids.len();
1✔
344
        println!("{gids:?}");
1✔
345
        gids.sort();
1✔
346
        gids.dedup();
1✔
347
        let dedup_len = gids.len();
1✔
348

1✔
349
        assert_eq!(original_len, dedup_len, "global ids are not duplicated");
1✔
350
    }
1✔
351
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc