• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tamada / oinkie / 18367171473

09 Oct 2025 06:08AM UTC coverage: 20.36%. First build
18367171473

Pull #3

github

tamada
Fix quoting in environment variable setup for LLVM installation on Windows and separate build steps for macOS and Windows
Pull Request #3: introduce extraction mode

144 of 436 new or added lines in 6 files covered. (33.03%)

181 of 889 relevant lines covered (20.36%)

0.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/lib/src/comparators.rs
1
use std::collections::{HashMap, HashSet};
2
use std::fmt::Display;
3
use std::iter::zip;
4
use clap::{Parser, ValueEnum};
5
use serde::{Serialize, Deserialize};
6

7
use crate::birthmarks::{Birthmark, BirthmarkType, Element, Info};
8
use crate::Result;
9

10
#[derive(Serialize, Deserialize, Parser, Debug, Clone, PartialEq, Eq, Hash, ValueEnum)]
11
pub enum Type {
12
    /// Simpson's coefficient
13
    Simpson,
14
    /// Jaccard index
15
    Jaccard,
16
    /// Dice's coefficient
17
    Dice,
18
    /// Cosine similarity
19
    Cosine,
20
    /// Longest common subsequence
21
    LCS,
22
    /// Levenshtein distance (Edit distance)
23
    Levenshtein,
24
}
25

26
impl Display for Type {
27
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
×
28
        write!(f, "{:?}", self)
×
29
    }
×
30
}
31

32
pub fn comparator(t: &Type) -> Box<dyn Comparator> {
×
33
    match t {
×
34
        Type::Simpson => Box::new(Simpson{}),
×
35
        Type::Jaccard => Box::new(Jaccard{}),
×
36
        Type::Dice => Box::new(Dice{}),
×
37
        Type::Cosine => Box::new(Cosine{}),
×
38
        Type::LCS => Box::new(LCS{}),
×
39
        Type::Levenshtein => Box::new(Levenshtein{}),
×
40
    }
41
}
×
42

43
#[derive(Serialize, Deserialize, Debug)]
44
pub struct Similarity {
45
    pub btype: BirthmarkType,
46
    pub a_info: Info,
47
    pub b_info: Info,
48
    pub ctype: Type,
49
    pub score: f64,
50
    pub elapsed_ms: Option<f64>,
51
}
52

53
pub trait Comparator {
54
    fn name(&self) -> String {
×
55
        self.ctype().to_string()
×
56
    }
×
57

58
    fn ctype(&self) -> Type;
59

60
    fn compare(&self, a: &Birthmark, b: &Birthmark) -> Result<Similarity> {
×
61
        let start = std::time::Instant::now();
×
62
        let s = match (a.len(), b.len()) {
×
63
            (0, 0) => Ok(1.0),
×
64
            (0, _) | (_, 0) => Ok(0.0),
×
65
            _ => self.compare_impl(a, b),
×
66
        };
67
        let elapsed = start.elapsed();
×
68
        s.map(|score| Similarity {
×
NEW
69
            btype: a.info.btype.clone(),
×
NEW
70
            a_info: a.info.clone(),
×
NEW
71
            b_info: b.info.clone(),
×
72
            ctype: self.ctype(),
×
73
            score,
×
74
            elapsed_ms: Some(elapsed.as_secs_f64() * 1000.0),
×
75
        })
×
76
    }
×
77

78
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64>;
79
}
80

81
struct Simpson {
82
}
83

84
struct Jaccard {
85
}
86

87
struct Dice {
88
}
89

90
struct Cosine {
91
}
92

93
struct LCS {
94
}
95

96
struct Levenshtein {
97
}
98

99
impl Comparator for Simpson {
100
    fn ctype(&self) -> Type {
×
101
        Type::Simpson
×
102
    }
×
103

104
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64> {
×
105
        Ok(zip(a.iter(), b.iter())
×
106
                .filter(|(a, b)| a.is_same(b))
×
107
                .count() as f64
×
108
                / a.len().min(b.len()) as f64)
×
109
    }
×
110
}
111

112
impl Comparator for Jaccard {
113
    fn ctype(&self) -> Type {
×
NEW
114
        Type::Jaccard
×
115
    }
×
116

117
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64> {
×
118
        Ok(zip(a.iter(), b.iter())
×
119
                .filter(|(a, b)| a.is_same(b))
×
120
                .count() as f64
×
121
                / (a.len() + b.len() - zip(a.iter(), b.iter())
×
122
                    .filter(|(a, b)| a.is_same(b))
×
123
                    .count()) as f64)
×
124
    }
×
125
}
126

127
impl Comparator for Dice{
128
    fn ctype(&self) -> Type {
×
129
        Type::Dice
×
130
    }
×
131

132
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64> {
×
133
        Ok(2.0 * zip(a.iter(), b.iter())
×
134
                .filter(|(a, b)| a.is_same(b))
×
135
                .count() as f64
×
136
                / (a.len() + b.len()) as f64)
×
137
    }
×
138
}
139

140
impl Comparator for Cosine {
141
    fn ctype(&self) -> Type {
×
142
        Type::Cosine
×
143
    }
×
144

145
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64> {
×
146
        let m1 = a.freq();
×
147
        let m2 = b.freq();
×
148
        let keys = merge_keys(&m1, &m2);
×
149

150
        let dot_product = keys.iter()
×
151
            .map(|k| m1.get(k).unwrap_or(&0) * m2.get(k).unwrap_or(&0))
×
152
            .sum::<usize>() as f64;
×
153
        let magnitude_a = (m1.values().map(|v| v * v).sum::<usize>() as f64).sqrt();
×
154
        let magnitude_b = (m2.values().map(|v| v * v).sum::<usize>() as f64).sqrt();
×
155
        Ok(dot_product / (magnitude_a * magnitude_b))
×
156
    }
×
157
}
158

159
fn merge_keys<'a>(m1: &'a HashMap<&'a Element, usize>, m2: &'a HashMap<&'a Element, usize>) -> HashSet<&'a Element> {
×
160
    let mut keys = HashSet::new();
×
161
    for k in m1.keys() {
×
162
        keys.insert(*k);
×
163
    }
×
164
    for k in m2.keys() {
×
165
        keys.insert(*k);
×
166
    }
×
167
    keys
×
168
}
×
169

170
impl Comparator for LCS {
171
    fn ctype(&self) -> Type {
×
172
        Type::LCS
×
173
    }
×
174

175
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64> {
×
176
        let len_a = a.len();
×
177
        let len_b = b.len();
×
178
        let lcs_len = {
×
179
            let mut dp = vec![vec![0; len_b + 1]; len_a + 1];
×
180
            for i in 1..=len_a {
×
181
                for j in 1..=len_b {
×
182
                    if a.iter().nth(i - 1).unwrap().is_same(b.iter().nth(j - 1).unwrap()) {
×
183
                        dp[i][j] = dp[i - 1][j - 1] + 1;
×
184
                    } else {
×
185
                        dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
×
186
                    }
×
187
                }
188
            }
189
            dp[len_a][len_b]
×
190
        };
191
        Ok(lcs_len as f64 / len_a.max(len_b) as f64)
×
192
    }
×
193
}
194

195
impl Comparator for Levenshtein {
196
    fn ctype(&self) -> Type {
×
197
        Type::Levenshtein
×
198
    }
×
199

200
    fn compare_impl(&self, a: &Birthmark, b: &Birthmark) -> Result<f64> {
×
201
        let len_a = a.len();
×
202
        let len_b = b.len();
×
203
        let dist = edit_distance(a, b);
×
204
        Ok(1.0 - dist as f64 / len_a.max(len_b) as f64)
×
205
    }
×
206
}
207

208
fn edit_distance(a: &Birthmark, b: &Birthmark) -> usize {
×
209
    let len_a = a.len();
×
210
    let len_b = b.len();
×
211
    let mut dp = vec![vec![0; len_b + 1]; len_a + 1];
×
212

213
    for i in 0..=len_a {
×
214
        dp[i][0] = i;
×
215
    }
×
216
    for j in 0..=len_b {
×
217
        dp[0][j] = j;
×
218
    }
×
219

220
    for i in 1..=len_a {
×
221
        for j in 1..=len_b {
×
222
            let cost = if a.iter().nth(i - 1).unwrap().is_same(b.iter().nth(j - 1).unwrap()) {
×
223
                0
×
224
            } else {
225
                1
×
226
            };
227
            dp[i][j] = *[
×
228
                dp[i - 1][j] + 1,     // Deletion
×
229
                dp[i][j - 1] + 1,     // Insertion
×
230
                dp[i - 1][j - 1] + cost, // Substitution
×
231
            ]
×
232
            .iter()
×
233
            .min()
×
234
            .unwrap();
×
235
        }
236
    }
237

238
    dp[len_a][len_b]
×
239
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc