• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moeyensj / difi / 24811108497

23 Apr 2026 01:10AM UTC coverage: 90.359% (+1.4%) from 88.939%
24811108497

Pull #62

github

web-flow
Merge 237b38a4e into d680bfac1
Pull Request #62: Add multi-partition DIFI, CIFI-output reuse, and IgnoredLinkages

740 of 780 new or added lines in 7 files covered. (94.87%)

3 existing lines in 2 files now uncovered.

2868 of 3174 relevant lines covered (90.36%)

552.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/python/mod.rs
1
//! Python bindings for difi via PyO3.
2
//!
3
//! Exposes CIFI and DIFI analysis as Python functions.
4
//! Data interchange uses Arrow RecordBatches (zero-copy via FFI).
5
//!
6
//! Gated behind the `python` feature flag.
7

8
use pyo3::exceptions::PyRuntimeError;
9
use pyo3::prelude::*;
10
use pyo3::types::PyDict;
11

12
use crate::cifi;
13
use crate::difi as difi_mod;
14
use crate::io;
15
use crate::metrics::FindabilityMetric;
16
use crate::metrics::singleton::SingletonMetric;
17
use crate::metrics::tracklet::TrackletMetric;
18

19
/// The difi Python module.
20
#[pymodule]
21
fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
×
22
    m.add_function(wrap_pyfunction!(version, m)?)?;
×
23
    m.add_function(wrap_pyfunction!(analyze_observations, m)?)?;
×
24
    m.add_function(wrap_pyfunction!(analyze_linkages, m)?)?;
×
25
    Ok(())
×
26
}
×
27

28
/// Return the difi version string.
29
#[pyfunction]
30
fn version() -> &'static str {
×
31
    env!("CARGO_PKG_VERSION")
×
32
}
×
33

34
/// Parse a JSON metric config string into a FindabilityMetric.
35
///
36
/// Expected format:
37
///   {"type": "singletons", "min_obs": 6, "min_nights": 3, "min_nightly_obs_in_min_nights": 1}
38
///   {"type": "tracklets", "tracklet_min_obs": 2, "max_obs_separation": 0.0625, ...}
39
fn parse_metric(metric_json: &str) -> PyResult<Box<dyn FindabilityMetric>> {
×
40
    let config: serde_json::Value = serde_json::from_str(metric_json)
×
41
        .map_err(|e| PyRuntimeError::new_err(format!("Invalid metric JSON: {e}")))?;
×
42

43
    let metric_type = config
×
44
        .get("type")
×
45
        .and_then(|v| v.as_str())
×
46
        .ok_or_else(|| PyRuntimeError::new_err("Metric JSON must have a 'type' field"))?;
×
47

48
    match metric_type {
×
49
        "singletons" => {
×
50
            let min_obs = config.get("min_obs").and_then(|v| v.as_u64()).unwrap_or(6) as usize;
×
51
            let min_nights = config
×
52
                .get("min_nights")
×
53
                .and_then(|v| v.as_u64())
×
54
                .unwrap_or(3) as usize;
×
55
            let min_nightly = config
×
56
                .get("min_nightly_obs_in_min_nights")
×
57
                .and_then(|v| v.as_u64())
×
58
                .unwrap_or(1) as usize;
×
59
            Ok(Box::new(SingletonMetric {
×
60
                min_obs,
×
61
                min_nights,
×
62
                min_nightly_obs_in_min_nights: min_nightly,
×
63
            }))
×
64
        }
65
        "tracklets" => {
×
66
            let tracklet_min_obs = config
×
67
                .get("tracklet_min_obs")
×
68
                .and_then(|v| v.as_u64())
×
69
                .unwrap_or(2) as usize;
×
70
            let max_obs_separation = config
×
71
                .get("max_obs_separation")
×
72
                .and_then(|v| v.as_f64())
×
73
                .unwrap_or(1.5 / 24.0);
×
74
            let min_linkage_nights = config
×
75
                .get("min_linkage_nights")
×
76
                .and_then(|v| v.as_u64())
×
77
                .unwrap_or(3) as usize;
×
78
            let min_angular = config
×
79
                .get("min_obs_angular_separation")
×
80
                .and_then(|v| v.as_f64())
×
81
                .unwrap_or(1.0);
×
82
            Ok(Box::new(TrackletMetric {
×
83
                tracklet_min_obs,
×
84
                max_obs_separation,
×
85
                min_linkage_nights,
×
86
                min_obs_angular_separation: min_angular,
×
87
            }))
×
88
        }
89
        _ => Err(PyRuntimeError::new_err(format!(
×
90
            "Unknown metric type: {metric_type}"
×
91
        ))),
×
92
    }
93
}
×
94

95
/// Can I Find It? — Determine findability of objects in observations.
96
///
97
/// Arguments:
98
///   observations_path: Path to observations Parquet file.
99
///   metric_json: JSON string with metric type and parameters.
100
///
101
/// Returns: dict with object/findable counts and per-object details.
102
#[pyfunction]
103
#[pyo3(signature = (observations_path, metric_json))]
104
fn analyze_observations(
×
105
    py: Python<'_>,
×
106
    observations_path: &str,
×
107
    metric_json: &str,
×
108
) -> PyResult<Py<PyDict>> {
×
109
    let obs_path = std::path::Path::new(observations_path);
×
110

111
    let (obs, id_interner, _) = io::read_observations(obs_path)
×
112
        .map_err(|e| PyRuntimeError::new_err(format!("Failed to read observations: {e}")))?;
×
113

114
    let metric_impl = parse_metric(metric_json)?;
×
115

116
    let (all_objects, findable, summaries) =
×
117
        cifi::analyze_observations(&obs, None, metric_impl.as_ref())
×
118
            .map_err(|e| PyRuntimeError::new_err(format!("CIFI failed: {e}")))?;
×
119

120
    let dict = PyDict::new(py);
×
121
    dict.set_item("num_objects", all_objects.len())?;
×
122
    dict.set_item("num_findable", findable.len())?;
×
123
    dict.set_item("num_partitions", summaries.len())?;
×
124

125
    if !summaries.is_empty() {
×
126
        dict.set_item("findable", summaries[0].findable)?;
×
127
    }
×
128

129
    // Include per-object details
130
    let objects_list: Vec<_> = (0..all_objects.len())
×
131
        .map(|i| {
×
132
            let d = PyDict::new(py);
×
133
            let _ = d.set_item(
×
134
                "object_id",
×
135
                id_interner.resolve(all_objects.object_id[i]).unwrap_or(""),
×
136
            );
×
137
            let _ = d.set_item("num_obs", all_objects.num_obs[i]);
×
138
            let _ = d.set_item("findable", all_objects.findable[i]);
×
139
            let _ = d.set_item("arc_length", all_objects.arc_length[i]);
×
140
            d
×
141
        })
×
142
        .collect();
×
143
    dict.set_item("objects", objects_list)?;
×
144

145
    Ok(dict.unbind())
×
146
}
×
147

148
/// Did I Find It? — Classify linkages and compute completeness.
149
///
150
/// Arguments:
151
///   observations_path: Path to observations Parquet file.
152
///   linkage_members_path: Path to linkage members Parquet file.
153
///   metric_json: JSON string with metric type and parameters.
154
///   min_obs: Minimum observations for "found" (default: 6).
155
///   contamination_percentage: Max contamination % (default: 20.0).
156
///
157
/// Returns: dict with classification counts and completeness.
158
#[pyfunction]
159
#[pyo3(signature = (observations_path, linkage_members_path, metric_json, min_obs=6, contamination_percentage=20.0))]
160
fn analyze_linkages(
×
161
    py: Python<'_>,
×
162
    observations_path: &str,
×
163
    linkage_members_path: &str,
×
164
    metric_json: &str,
×
165
    min_obs: usize,
×
166
    contamination_percentage: f64,
×
167
) -> PyResult<Py<PyDict>> {
×
168
    let obs_path = std::path::Path::new(observations_path);
×
169
    let lm_path = std::path::Path::new(linkage_members_path);
×
170

171
    let (obs, id_interner, _) = io::read_observations(obs_path)
×
172
        .map_err(|e| PyRuntimeError::new_err(format!("Failed to read observations: {e}")))?;
×
173
    let mut id_interner2 = id_interner.clone();
×
174
    let lm = io::read_linkage_members(lm_path, &mut id_interner2)
×
175
        .map_err(|e| PyRuntimeError::new_err(format!("Failed to read linkage members: {e}")))?;
×
176

177
    // CIFI
178
    let metric_impl = parse_metric(metric_json)?;
×
179

180
    let (mut all_objects, _findable, mut summaries) =
×
181
        cifi::analyze_observations(&obs, None, metric_impl.as_ref())
×
182
            .map_err(|e| PyRuntimeError::new_err(format!("CIFI failed: {e}")))?;
×
183

184
    if summaries.is_empty() {
×
185
        return Err(PyRuntimeError::new_err("No partitions created"));
×
186
    }
×
187

188
    // DIFI
NEW
189
    let (all_linkages, ignored_linkages) = difi_mod::analyze_linkages(
×
190
        &obs,
×
191
        &lm,
×
192
        &mut all_objects,
×
193
        &mut summaries[0],
×
194
        min_obs,
×
195
        contamination_percentage,
×
196
    )
197
    .map_err(|e| PyRuntimeError::new_err(format!("DIFI failed: {e}")))?;
×
198

199
    let n_pure: usize = all_linkages.pure.iter().filter(|&&p| p).count();
×
200
    let n_contaminated: usize = all_linkages.contaminated.iter().filter(|&&c| c).count();
×
201
    let n_mixed: usize = all_linkages.mixed.iter().filter(|&&m| m).count();
×
202

203
    let dict = PyDict::new(py);
×
204
    dict.set_item("num_linkages", all_linkages.len())?;
×
205
    dict.set_item("num_pure", n_pure)?;
×
206
    dict.set_item("num_contaminated", n_contaminated)?;
×
207
    dict.set_item("num_mixed", n_mixed)?;
×
NEW
208
    dict.set_item("num_ignored_linkages", ignored_linkages.len())?;
×
209
    dict.set_item("completeness", summaries[0].completeness)?;
×
210
    dict.set_item("found", summaries[0].found)?;
×
211
    dict.set_item("findable", summaries[0].findable)?;
×
212

213
    // Per-linkage details
214
    let linkages_list: Vec<_> = (0..all_linkages.len())
×
215
        .map(|i| {
×
216
            let d = PyDict::new(py);
×
217
            let _ = d.set_item(
×
218
                "linkage_id",
×
219
                id_interner2
×
220
                    .resolve(all_linkages.linkage_id[i])
×
221
                    .unwrap_or(""),
×
222
            );
×
223
            let _ = d.set_item(
×
224
                "linked_object_id",
×
225
                id_interner2
×
226
                    .resolve(all_linkages.linked_object_id[i])
×
227
                    .unwrap_or(""),
×
228
            );
×
229
            let _ = d.set_item("num_obs", all_linkages.num_obs[i]);
×
230
            let _ = d.set_item("contamination", all_linkages.contamination[i]);
×
231
            let _ = d.set_item("pure", all_linkages.pure[i]);
×
232
            let _ = d.set_item("contaminated", all_linkages.contaminated[i]);
×
233
            let _ = d.set_item("mixed", all_linkages.mixed[i]);
×
234
            let _ = d.set_item("pure_complete", all_linkages.pure_complete[i]);
×
235
            d
×
236
        })
×
237
        .collect();
×
238
    dict.set_item("linkages", linkages_list)?;
×
239

240
    Ok(dict.unbind())
×
241
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc