• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

geo-engine / geoengine / 9516064831

14 Jun 2024 12:24PM UTC coverage: 90.665% (+0.05%) from 90.619%
9516064831

push

github

web-flow
Merge pull request #961 from dbrandenstein/gbif-time

GBIF time and metadata

653 of 660 new or added lines in 3 files covered. (98.94%)

15 existing lines in 9 files now uncovered.

132839 of 146516 relevant lines covered (90.67%)

52802.09 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.83
/operators/src/source/ogr_source/dataset_iterator.rs
1
// generated code of `_OgrDatasetIterator` needs this lint for the `peeked` field
2
#![allow(clippy::option_option)]
3

4
use super::{AttributeFilter, CsvHeader, FeaturesProvider, FormatSpecifics, OgrSourceDataset};
5
use crate::error::{self};
6
use crate::util::gdal::gdal_open_dataset_ex;
7
use crate::util::Result;
8
use gdal::vector::sql::Dialect;
9
use gdal::vector::{Feature, LayerAccess};
10
use gdal::{Dataset, DatasetOptions, GdalOpenFlags};
11
use geoengine_datatypes::primitives::VectorQueryRectangle;
12
use log::debug;
13
use ouroboros::self_referencing;
14
use std::cell::Cell;
15
use std::collections::HashMap;
16
use std::ffi::OsStr;
17
use std::iter::FusedIterator;
18

19
/// An iterator over features from a OGR dataset.
20
/// This iterator contains the dataset and one of its layers.
21
pub struct OgrDatasetIterator {
22
    dataset_iterator: _OgrDatasetIterator,
23
    // must be cell since we borrow self in the iterator for emitting the output value
24
    // and thus cannot mutably borrow this value
25
    has_ended: Cell<bool>,
26
    was_spatial_filtered_by_ogr: bool,
27
    was_time_filtered_by_ogr: bool,
28
}
29

30
// We can implement `Send` for the combination of OGR dataset and layer
31
// as long we have a one-to-one relation. The layer mutates the dataset.
32
// So it is not `Send` if there is more than one layer.
33
unsafe impl Send for OgrDatasetIterator {}
34

35
/// Store a dataset and one of its layers.
36
/// Allows to iterate over features via accessing the layer only.
37
/// We must ensure to not access it from the outside.
38
#[self_referencing]
106✔
39
struct _OgrDatasetIterator {
40
    dataset: gdal::Dataset,
41
    #[borrows(mut dataset)]
42
    #[covariant]
43
    features_provider: FeaturesProvider<'this>,
44
}
45

46
impl OgrDatasetIterator {
47
    #[allow(clippy::needless_pass_by_value)]
48
    pub fn new(
54✔
49
        dataset_information: &OgrSourceDataset,
54✔
50
        query_rectangle: &VectorQueryRectangle,
54✔
51
        attribute_filters: Vec<AttributeFilter>,
54✔
52
    ) -> Result<OgrDatasetIterator> {
54✔
53
        let adjusted_filters =
54✔
54
            Self::adjust_filters_to_column_renaming(dataset_information, attribute_filters);
54✔
55

54✔
56
        let mut was_time_filtered_by_ogr = false;
54✔
57

58
        let dataset_iterator = _OgrDatasetIteratorTryBuilder {
53✔
59
            dataset: Self::open_gdal_dataset(dataset_information)?,
54✔
60
            features_provider_builder: |dataset| {
53✔
61
                Self::create_features_provider(
53✔
62
                    dataset,
53✔
63
                    dataset_information,
53✔
64
                    query_rectangle,
53✔
65
                    &adjusted_filters,
53✔
66
                )
53✔
67
                .map(|(provider, filtered)| {
53✔
68
                    was_time_filtered_by_ogr = filtered;
53✔
69

53✔
70
                    provider
53✔
71
                })
53✔
72
            },
53✔
73
        }
74
        .try_build()?;
53✔
75

76
        let was_spatial_filtered_by_ogr = dataset_information.force_ogr_spatial_filter
53✔
77
            || dataset_iterator
51✔
78
                .borrow_features_provider()
51✔
79
                .has_gdal_capability(gdal::vector::LayerCaps::OLCFastSpatialFilter);
51✔
80

81
        Ok(Self {
53✔
82
            dataset_iterator,
53✔
83
            has_ended: Cell::new(false),
53✔
84
            was_spatial_filtered_by_ogr,
53✔
85
            was_time_filtered_by_ogr,
53✔
86
        })
53✔
87
    }
54✔
88

89
    /// Undo the column renaming to let OGR apply the filters
90
    fn adjust_filters_to_column_renaming(
54✔
91
        dataset_information: &OgrSourceDataset,
54✔
92
        attribute_filters: Vec<AttributeFilter>,
54✔
93
    ) -> Vec<AttributeFilter> {
54✔
94
        match &dataset_information.columns {
54✔
95
            Some(cspec) => {
42✔
96
                match &cspec.rename {
42✔
97
                    Some(mapping) => {
7✔
98
                        // Build reverse mapping
7✔
99
                        let r_mapping = mapping
7✔
100
                            .iter()
7✔
101
                            .map(|(k, v)| (v.to_string(), k.to_string()))
191✔
102
                            .collect::<HashMap<_, _>>();
7✔
103

7✔
104
                        attribute_filters
7✔
105
                            .into_iter()
7✔
106
                            .map(|f| match r_mapping.get(&f.attribute) {
7✔
107
                                Some(name) => AttributeFilter {
1✔
108
                                    attribute: name.to_string(),
1✔
109
                                    ranges: f.ranges,
1✔
110
                                    keep_nulls: f.keep_nulls,
1✔
111
                                },
1✔
112
                                None => f,
×
113
                            })
7✔
114
                            .collect::<Vec<_>>()
7✔
115
                    }
116
                    // No renaming
117
                    None => attribute_filters,
35✔
118
                }
119
            }
120
            // No column spec
121
            None => attribute_filters,
12✔
122
        }
123
    }
54✔
124

125
    fn create_features_provider<'d>(
53✔
126
        dataset: &'d Dataset,
53✔
127
        dataset_information: &OgrSourceDataset,
53✔
128
        query_rectangle: &VectorQueryRectangle,
53✔
129
        attribute_filters: &[AttributeFilter],
53✔
130
    ) -> Result<(FeaturesProvider<'d>, bool)> {
53✔
131
        let filter_string = if dataset.driver().short_name() == "CSV" {
53✔
132
            FeaturesProvider::create_attribute_filter_string_cast(attribute_filters)
21✔
133
        } else {
134
            FeaturesProvider::create_attribute_filter_string(attribute_filters)
32✔
135
        };
136

137
        let time_filter = if dataset_information.force_ogr_time_filter {
53✔
138
            debug!(
7✔
NEW
139
                "using time filter {:?} for layer {:?}",
×
NEW
140
                query_rectangle.time_interval, &dataset_information.layer_name
×
141
            );
142
            FeaturesProvider::create_time_filter_string(
7✔
143
                dataset_information.time.clone(),
7✔
144
                query_rectangle.time_interval,
7✔
145
                &dataset.driver().short_name(),
7✔
146
            )
7✔
147
        } else {
148
            None
46✔
149
        };
150

151
        let final_filter = filter_string
53✔
152
            .map(|f| match &dataset_information.attribute_query {
53✔
153
                Some(a) => format!("({a}) AND {f}"),
2✔
154
                None => f,
11✔
155
            })
53✔
156
            .or_else(|| dataset_information.attribute_query.clone())
53✔
157
            .map(|f| match &time_filter {
53✔
158
                None => f,
14✔
NEW
159
                Some(t) => format!("({t}) AND {f}"),
×
160
            })
53✔
161
            .or_else(|| time_filter.clone());
53✔
162

163
        let mut features_provider = if let Some(sql) = dataset_information.sql_query.as_ref() {
53✔
164
            let query = if let Some(filter) = final_filter {
7✔
165
                debug!(
4✔
NEW
166
                    "using attribute filter {:?} for layer {:?}",
×
NEW
167
                    &filter, &dataset_information.layer_name
×
168
                );
169

170
                // This is necessary because otherwise the GDAL postgres driver does not perform a filter-pushdown in case an explicit SQL query is given
171
                format!("SELECT * FROM ({sql}) q WHERE {filter}")
4✔
172
            } else {
173
                sql.clone()
3✔
174
            };
175

176
            FeaturesProvider::ResultSet(
177
                dataset
7✔
178
                    .execute_sql(query, None, Dialect::DEFAULT)?
7✔
179
                    .ok_or(error::Error::OgrSqlQuery)?,
7✔
180
            )
181
        } else {
182
            let mut features_provider =
46✔
183
                FeaturesProvider::Layer(dataset.layer_by_name(&dataset_information.layer_name)?);
46✔
184

185
            if let Some(filter) = final_filter {
46✔
186
                debug!(
14✔
NEW
187
                    "using attribute filter {:?} for layer {:?}",
×
NEW
188
                    &filter, &dataset_information.layer_name
×
189
                );
190
                features_provider.set_attribute_filter(filter.as_str())?;
14✔
191
            }
32✔
192

193
            features_provider
46✔
194
        };
195

196
        let use_ogr_spatial_filter = dataset_information.force_ogr_spatial_filter
53✔
197
            || features_provider.has_gdal_capability(gdal::vector::LayerCaps::OLCFastSpatialFilter);
51✔
198

199
        if use_ogr_spatial_filter {
53✔
200
            debug!(
11✔
201
                "using spatial filter {:?} for layer {:?}",
×
202
                query_rectangle.spatial_bounds, &dataset_information.layer_name
×
203
            );
204
            // NOTE: the OGR-filter may be inaccurately allowing more features that should be returned in a "strict" fashion.
205
            features_provider.set_spatial_filter(&query_rectangle.spatial_bounds);
11✔
206
        }
42✔
207

208
        Ok((features_provider, time_filter.is_some()))
53✔
209
    }
53✔
210

211
    fn open_gdal_dataset(dataset_info: &OgrSourceDataset) -> Result<Dataset> {
54✔
212
        if Self::is_csv(dataset_info) {
54✔
213
            Self::open_csv_dataset(dataset_info)
21✔
214
        } else {
215
            gdal_open_dataset_ex(
33✔
216
                &dataset_info.file_name,
33✔
217
                DatasetOptions {
33✔
218
                    open_flags: GdalOpenFlags::GDAL_OF_VECTOR,
33✔
219
                    ..Default::default()
33✔
220
                },
33✔
221
            )
33✔
222
        }
223
    }
54✔
224

225
    fn open_csv_dataset(dataset_info: &OgrSourceDataset) -> Result<Dataset> {
21✔
226
        let columns = dataset_info
21✔
227
            .columns
21✔
228
            .as_ref()
21✔
229
            .ok_or(error::Error::OgrSourceColumnsSpecMissing)?;
21✔
230

231
        let allowed_drivers = Some(vec!["CSV"]);
21✔
232

21✔
233
        let mut dataset_options = DatasetOptions {
21✔
234
            open_flags: GdalOpenFlags::GDAL_OF_VECTOR,
21✔
235
            allowed_drivers: allowed_drivers.as_deref(),
21✔
236
            ..DatasetOptions::default()
21✔
237
        };
21✔
238

239
        let headers = if let Some(FormatSpecifics::Csv { header }) = &columns.format_specifics {
21✔
240
            header.as_gdal_param()
21✔
241
        } else {
242
            CsvHeader::Auto.as_gdal_param()
×
243
        };
244

245
        // TODO: make column x optional or allow other indication for data collection
246
        if columns.x.is_empty() {
21✔
247
            let open_opts = &[
13✔
248
                headers.as_str(),
13✔
249
                // "AUTODETECT_TYPE=YES", // This breaks tests
13✔
250
            ];
13✔
251
            dataset_options.open_options = Some(open_opts);
13✔
252
            return gdal_open_dataset_ex(&dataset_info.file_name, dataset_options);
13✔
253
        }
8✔
254

255
        if let Some(y) = &columns.y {
8✔
256
            let open_opts = &[
8✔
257
                &format!("X_POSSIBLE_NAMES={}", columns.x),
8✔
258
                &format!("Y_POSSIBLE_NAMES={y}"),
8✔
259
                headers.as_str(),
8✔
260
                "AUTODETECT_TYPE=YES",
8✔
261
            ];
8✔
262
            dataset_options.open_options = Some(open_opts);
8✔
263
            return gdal_open_dataset_ex(&dataset_info.file_name, dataset_options);
8✔
264
        }
×
265

×
266
        let open_opts = &[
×
267
            &format!("GEOM_POSSIBLE_NAMES={}", columns.x),
×
268
            headers.as_str(),
×
269
            "AUTODETECT_TYPE=YES",
×
270
        ];
×
271
        dataset_options.open_options = Some(open_opts);
×
272
        gdal_open_dataset_ex(&dataset_info.file_name, dataset_options)
×
273
    }
21✔
274

275
    fn is_csv(dataset_info: &OgrSourceDataset) -> bool {
54✔
276
        if let Some("csv" | "tsv") = dataset_info.file_name.extension().and_then(OsStr::to_str) {
54✔
277
            return true;
17✔
278
        }
37✔
279

37✔
280
        dataset_info.file_name.as_path().starts_with("CSV:")
37✔
281
    }
54✔
282

283
    pub fn was_spatial_filtered_by_ogr(&self) -> bool {
248✔
284
        self.was_spatial_filtered_by_ogr
248✔
285
    }
248✔
286

287
    pub fn was_time_filtered_by_ogr(&self) -> bool {
248✔
288
        self.was_time_filtered_by_ogr
248✔
289
    }
248✔
290
}
291

292
#[allow(clippy::copy_iterator)]
293
impl<'f> Iterator for &'f mut OgrDatasetIterator {
294
    type Item = Feature<'f>;
295

296
    fn next(&mut self) -> Option<Self::Item> {
9,816✔
297
        // fuse
9,816✔
298
        if self.has_ended.get() {
9,816✔
299
            return None;
47✔
300
        }
9,769✔
301

9,769✔
302
        let features_provider = self.dataset_iterator.borrow_features_provider();
9,769✔
303

9,769✔
304
        // We somehow have to tell the reference to adhere to the lifetime `'f`
9,769✔
305
        // On the other hand, we could implement this for `&'f _` instead of `&'f mut _` and get rid of the transmute.
9,769✔
306
        // However, it makes more sense to require a mutable reference here.
9,769✔
307
        let features_provider = unsafe { std::mem::transmute::<&'_ _, &'f _>(features_provider) };
9,769✔
308

9,769✔
309
        let next = feature_iterator_next(features_provider);
9,769✔
310

9,769✔
311
        if next.is_none() {
9,769✔
312
            self.has_ended.set(true);
51✔
313
        }
9,718✔
314

315
        next
9,769✔
316
    }
9,816✔
317
}
318

319
impl<'f> FusedIterator for &'f mut OgrDatasetIterator {}
320

321
// TODO: add this to the `gdal` crate
322
#[inline]
323
fn feature_iterator_next<'f>(features_provider: &'f FeaturesProvider) -> Option<Feature<'f>> {
9,769✔
324
    let layer_ref = features_provider.layer_ref();
9,769✔
325

9,769✔
326
    let c_feature = unsafe { gdal_sys::OGR_L_GetNextFeature(layer_ref.c_layer()) };
9,769✔
327
    if c_feature.is_null() {
9,769✔
328
        None
51✔
329
    } else {
330
        Some(unsafe { Feature::from_c_feature(layer_ref.defn(), c_feature) })
9,718✔
331
    }
332
}
9,769✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc