• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jzombie / rust-triplets / 24927961805

25 Apr 2026 09:37AM UTC coverage: 95.473% (-0.06%) from 95.528%
24927961805

push

github

web-flow
Add optional denoise (#81)

* Bump rustls-webpki in the cargo group across 1 directory

Bumps the cargo group with 1 update in the / directory: [rustls-webpki](https://github.com/rustls/webpki).


Updates `rustls-webpki` from 0.103.12 to 0.103.13
- [Release notes](https://github.com/rustls/webpki/releases)
- [Commits](https://github.com/rustls/webpki/compare/v/0.103.12...v/0.103.13)

---
updated-dependencies:
- dependency-name: rustls-webpki
  dependency-version: 0.103.13
  dependency-type: indirect
  dependency-group: cargo
...

Signed-off-by: dependabot[bot] <support@github.com>

* Prototype denoiser implementation

Co-authored-by: Copilot <copilot@github.com>

* Add more tests

Co-authored-by: Copilot <copilot@github.com>

* Add support for linearized data

Co-authored-by: Copilot <copilot@github.com>

* cargo fmt --all

* Remove user-configurable line-level control

Co-authored-by: Copilot <copilot@github.com>

* Draft README update

* Use single-line assertion

* Draft tagline and description

Co-authored-by: Copilot <copilot@github.com>

* Add line break

* Make markdown stripping optional

Co-authored-by: Copilot <copilot@github.com>

* Add link to GFM

Co-authored-by: Copilot <copilot@github.com>

* Fix doc comment

Co-authored-by: Copilot <copilot@github.com>

* Debug flaky CI tests

* Prepare for 0.19.0-alpha

* Add denoiser example to README

Co-authored-by: Copilot <copilot@github.com>

* Improve InMemory source implementation

Co-authored-by: Copilot <copilot@github.com>

* Include links to individual sources

Co-authored-by: Copilot <copilot@github.com>

* Reorder sources

* Update default source verbiage

* Add ability to construct DataRecord from text

Co-authored-by: Copilot <copilot@github.com>

* Prototype iterative wave expansion

Co-authored-by: Copilot <copilot@github.com>

* cargo fmt --all

* Migrate to preprocessor architecture

Co-authored-by: Copilot <copilot@github.com>

* Add more tests

Co-authored-by: Copilot <copilot@gi... (continued)

1028 of 1083 new or added lines in 6 files covered. (94.92%)

1 existing line in 1 file now uncovered.

18708 of 19595 relevant lines covered (95.47%)

3961.16 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.23
/src/source/backends/in_memory_source.rs
1
use chrono::Utc;
2

3
use crate::config::SamplerConfig;
4
use crate::data::DataRecord;
5
use crate::errors::SamplerError;
6
use crate::source::{DataSource, IndexableSource, SourceCursor, SourceSnapshot};
7
use crate::types::SourceId;
8

9
/// An in-memory data source backed by a `Vec<DataRecord>`.
10
///
11
/// Useful for tests, documentation examples, and small corpora that are
12
/// constructed entirely at runtime without a file or network backend.
13
///
14
/// # Example
15
///
16
/// ```
17
/// use std::sync::Arc;
18
/// use chrono::Utc;
19
/// use triplets::{DataRecord, DeterministicSplitStore, InMemorySource, SamplerConfig, SplitRatios, TripletSampler};
20
/// use triplets::data::{RecordSection, SectionRole};
21
///
22
/// let mut source = InMemorySource::new("my_source");
23
/// source.add_record(DataRecord {
24
///     id: "rec-0".into(),
25
///     source: "my_source".into(),
26
///     created_at: Utc::now(),
27
///     updated_at: Utc::now(),
28
///     quality: Default::default(),
29
///     taxonomy: vec![],
30
///     sections: vec![RecordSection {
31
///         role: SectionRole::Context,
32
///         heading: None,
33
///         text: "The quick brown fox.".into(),
34
///         sentences: vec![],
35
///     }],
36
///     meta_prefix: None,
37
/// });
38
///
39
/// // InMemorySource implements DataSource directly — no adapter needed.
40
/// let ratios = SplitRatios { train: 0.8, validation: 0.1, test: 0.1 };
41
/// let store = Arc::new(DeterministicSplitStore::new(ratios, 42).unwrap());
42
/// let sampler = TripletSampler::new(SamplerConfig::default(), store);
43
/// sampler.register_source(Box::new(source));
44
/// ```
45
pub struct InMemorySource {
46
    id: SourceId,
47
    records: Vec<DataRecord>,
48
}
49

50
impl InMemorySource {
51
    /// Create a new empty in-memory source with the given stable identifier.
52
    pub fn new(id: impl Into<SourceId>) -> Self {
4✔
53
        Self {
4✔
54
            id: id.into(),
4✔
55
            records: Vec::new(),
4✔
56
        }
4✔
57
    }
4✔
58

59
    /// Append a single record to the source.
60
    pub fn add_record(&mut self, record: DataRecord) {
2✔
61
        self.records.push(record);
2✔
62
    }
2✔
63

64
    /// Append multiple records to the source.
65
    pub fn add_records(&mut self, records: impl IntoIterator<Item = DataRecord>) {
2✔
66
        self.records.extend(records);
2✔
67
    }
2✔
68

69
    /// Create an in-memory source pre-populated with the given records.
70
    pub fn from_records(id: impl Into<SourceId>, records: Vec<DataRecord>) -> Self {
274✔
71
        Self {
274✔
72
            id: id.into(),
274✔
73
            records,
274✔
74
        }
274✔
75
    }
274✔
76
}
77

78
impl IndexableSource for InMemorySource {
79
    fn id(&self) -> &str {
7,665✔
80
        &self.id
7,665✔
81
    }
7,665✔
82

83
    fn len_hint(&self) -> Option<usize> {
3✔
84
        Some(self.records.len())
3✔
85
    }
3✔
86

87
    fn record_at(&self, idx: usize) -> Result<Option<DataRecord>, SamplerError> {
2✔
88
        Ok(self.records.get(idx).cloned())
2✔
89
    }
2✔
90
}
91

92
impl DataSource for InMemorySource {
93
    fn id(&self) -> &str {
7,665✔
94
        IndexableSource::id(self)
7,665✔
95
    }
7,665✔
96

97
    fn refresh(
940✔
98
        &self,
940✔
99
        _config: &SamplerConfig,
940✔
100
        cursor: Option<&SourceCursor>,
940✔
101
        limit: Option<usize>,
940✔
102
    ) -> Result<SourceSnapshot, SamplerError> {
940✔
103
        let records = &self.records;
940✔
104
        let total = records.len();
940✔
105
        let mut start = cursor.map(|cursor| cursor.revision as usize).unwrap_or(0);
940✔
106
        if total > 0 && start >= total {
940✔
107
            start = 0;
1✔
108
        }
939✔
109
        let max = limit.unwrap_or(total);
940✔
110
        let mut filtered = Vec::new();
940✔
111
        for idx in 0..total {
12,173✔
112
            if filtered.len() >= max {
12,173✔
113
                break;
133✔
114
            }
12,040✔
115
            let pos = (start + idx) % total;
12,040✔
116
            filtered.push(records[pos].clone());
12,040✔
117
        }
118
        let last_seen = filtered
940✔
119
            .iter()
940✔
120
            .map(|record| record.updated_at)
940✔
121
            .max()
940✔
122
            .unwrap_or_else(Utc::now);
940✔
123
        let next_start = if total == 0 {
940✔
NEW
124
            0
×
125
        } else {
126
            (start + filtered.len()) % total
940✔
127
        };
128
        Ok(SourceSnapshot {
940✔
129
            records: filtered,
940✔
130
            cursor: SourceCursor {
940✔
131
                last_seen,
940✔
132
                revision: next_start as u64,
940✔
133
            },
940✔
134
        })
940✔
135
    }
940✔
136

137
    fn reported_record_count(&self, _config: &SamplerConfig) -> Result<u128, SamplerError> {
1✔
138
        Ok(self.records.len() as u128)
1✔
139
    }
1✔
140
}
141

142
#[cfg(test)]
143
mod tests {
144
    use super::*;
145
    use crate::data::{QualityScore, RecordSection, SectionRole};
146
    use chrono::{Duration, Utc};
147

148
    fn make_record(id: &str, ts: chrono::DateTime<Utc>) -> DataRecord {
6✔
149
        DataRecord {
6✔
150
            id: id.to_string(),
6✔
151
            source: "mem".to_string(),
6✔
152
            created_at: ts,
6✔
153
            updated_at: ts,
6✔
154
            quality: QualityScore { trust: 1.0 },
6✔
155
            taxonomy: Vec::new(),
6✔
156
            sections: vec![RecordSection {
6✔
157
                role: SectionRole::Anchor,
6✔
158
                heading: None,
6✔
159
                text: id.to_string(),
6✔
160
                sentences: vec![id.to_string()],
6✔
161
            }],
6✔
162
            meta_prefix: None,
6✔
163
        }
6✔
164
    }
6✔
165

166
    #[test]
167
    fn in_memory_source_refresh_wraps_cursor_and_uses_latest_timestamp() {
1✔
168
        let now = Utc::now();
1✔
169
        let older = now - Duration::seconds(5);
1✔
170
        let newer = now + Duration::seconds(5);
1✔
171

172
        let mut source = InMemorySource::new("mem");
1✔
173
        source.add_records([make_record("a", older), make_record("b", newer)]);
1✔
174

175
        let cursor = SourceCursor {
1✔
176
            last_seen: now,
1✔
177
            revision: 7,
1✔
178
        };
1✔
179

180
        let snapshot = source
1✔
181
            .refresh(&SamplerConfig::default(), Some(&cursor), Some(1))
1✔
182
            .unwrap();
1✔
183
        assert_eq!(snapshot.records.len(), 1);
1✔
184
        assert_eq!(snapshot.records[0].id, "a");
1✔
185
        assert_eq!(snapshot.cursor.revision, 1);
1✔
186
        assert_eq!(snapshot.cursor.last_seen, older);
1✔
187
    }
1✔
188

189
    #[test]
190
    fn source_id_and_reported_count_are_exposed() {
1✔
191
        let memory = InMemorySource::new("mem_id");
1✔
192
        assert_eq!(DataSource::id(&memory), "mem_id");
1✔
193
        assert_eq!(
1✔
194
            memory
1✔
195
                .reported_record_count(&SamplerConfig::default())
1✔
196
                .unwrap(),
1✔
197
            0
198
        );
199
    }
1✔
200

201
    #[test]
202
    fn add_record_and_add_records_increase_len() {
1✔
203
        let mut source = InMemorySource::new("s");
1✔
204
        assert_eq!(source.len_hint(), Some(0));
1✔
205

206
        let now = Utc::now();
1✔
207
        source.add_record(make_record("r0", now));
1✔
208
        assert_eq!(source.len_hint(), Some(1));
1✔
209

210
        source.add_records([make_record("r1", now), make_record("r2", now)]);
1✔
211
        assert_eq!(source.len_hint(), Some(3));
1✔
212
    }
1✔
213

214
    #[test]
215
    fn record_at_returns_correct_record_and_none_out_of_bounds() {
1✔
216
        let now = Utc::now();
1✔
217
        let mut source = InMemorySource::new("s");
1✔
218
        source.add_record(make_record("only", now));
1✔
219

220
        let found = source.record_at(0).unwrap();
1✔
221
        assert_eq!(found.unwrap().id, "only");
1✔
222

223
        let oob = source.record_at(1).unwrap();
1✔
224
        assert!(oob.is_none());
1✔
225
    }
1✔
226
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc