• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 9457158389

10 Jun 2024 11:45PM UTC coverage: 75.174% (+0.05%) from 75.121%
9457158389

push

github

web-flow
Add constructing TinyAsciiStr from utf16 (#5025)

Introduces TinyAsciiStr constructors from utf16 and converges on the
consensus from #4931.

---------

Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com>

65 of 82 new or added lines in 14 files covered. (79.27%)

3441 existing lines in 141 files now uncovered.

52850 of 70304 relevant lines covered (75.17%)

563298.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.97
/tools/bakeddata-scripts/src/main.rs
1
// This file is part of ICU4X. For terms of use, please see the file
1✔
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
extern crate icu_datagen;
6

7
use icu_datagen::baked_exporter;
8
use icu_datagen::fs_exporter;
9
use icu_datagen::fs_exporter::serializers::AbstractSerializer;
10
use icu_datagen::prelude::*;
11
use icu_provider::datagen::*;
12
use icu_provider::prelude::*;
13
use std::collections::BTreeMap;
14
use std::fs::File;
15
use std::io::Write;
16
use std::path::Path;
17
use std::sync::Mutex;
18

19
const REPO_VERSION: &str = env!("CARGO_PKG_VERSION");
20
const EXPERIMENTAL_VERSION: &str = "0.1.0";
21
const COMPONENTS: &[(&str, &[DataMarkerInfo], &str)] = &[
22
    ("calendar", icu::calendar::provider::MARKERS, REPO_VERSION),
23
    ("casemap", icu::casemap::provider::MARKERS, REPO_VERSION),
24
    ("collator", icu::collator::provider::MARKERS, REPO_VERSION),
25
    ("datetime", icu::datetime::provider::MARKERS, REPO_VERSION),
26
    ("decimal", icu::decimal::provider::MARKERS, REPO_VERSION),
27
    ("list", icu::list::provider::MARKERS, REPO_VERSION),
28
    ("locale", icu::locale::provider::MARKERS, REPO_VERSION),
29
    (
30
        "normalizer",
31
        icu::normalizer::provider::MARKERS,
32
        REPO_VERSION,
33
    ),
34
    ("plurals", icu::plurals::provider::MARKERS, REPO_VERSION),
35
    (
36
        "properties",
37
        icu::properties::provider::MARKERS,
38
        REPO_VERSION,
39
    ),
40
    ("segmenter", icu::segmenter::provider::MARKERS, REPO_VERSION),
41
    ("timezone", icu::timezone::provider::MARKERS, REPO_VERSION),
42
    (
43
        "experimental",
44
        icu::experimental::provider::MARKERS,
45
        EXPERIMENTAL_VERSION,
46
    ),
47
];
48

UNCOV
49
fn main() {
×
50
    simple_logger::SimpleLogger::new()
×
51
        .env()
UNCOV
52
        .with_level(log::LevelFilter::Info)
×
53
        .init()
54
        .unwrap();
55

56
    let args = std::env::args().skip(1).collect::<Vec<_>>();
×
57

UNCOV
58
    let components = if args.is_empty() {
×
59
        COMPONENTS
×
60
            .iter()
61
            .map(|(krate, markers, version)| (krate.to_string(), *markers, *version))
×
62
            .collect::<Vec<_>>()
63
    } else {
64
        let map =
65
            std::collections::HashMap::<&str, (&'static [DataMarkerInfo], &'static str)>::from_iter(
×
UNCOV
66
                COMPONENTS
×
67
                    .iter()
UNCOV
68
                    .map(|(krate, markers, version)| (*krate, (*markers, *version))),
×
69
            );
UNCOV
70
        args.into_iter()
×
71
            .filter_map(|krate| {
×
UNCOV
72
                map.get(krate.as_str())
×
73
                    .map(|(markers, version)| (krate, *markers, *version))
×
74
            })
×
75
            .collect()
76
    };
×
77

UNCOV
78
    let source = DatagenProvider::new_latest_tested();
×
79

UNCOV
80
    let driver = DatagenDriver::new()
×
81
        .with_locales_and_fallback(
82
            source
×
83
                .locales_for_coverage_levels([
×
UNCOV
84
                    CoverageLevel::Modern,
×
UNCOV
85
                    CoverageLevel::Moderate,
×
86
                    CoverageLevel::Basic,
×
87
                ])
88
                .unwrap()
89
                .into_iter()
90
                .map(LocaleFamily::with_descendants),
×
91
            Default::default(),
×
UNCOV
92
        )
×
93
        .with_recommended_segmenter_models();
94

95
    let mut options = baked_exporter::Options::default();
×
UNCOV
96
    options.overwrite = true;
×
97
    options.pretty = true;
×
98

99
    for (component, markers, version) in &components {
×
UNCOV
100
        let path = Path::new("provider/baked").join(component);
×
101

UNCOV
102
        let _ = std::fs::remove_dir_all(&path);
×
103
        for dir in ["", "src", "data"] {
×
104
            std::fs::create_dir(&path.join(dir)).unwrap();
×
105
        }
UNCOV
106
        for (file, template) in [
×
UNCOV
107
            ("build.rs", include_str!("../template/build.rs.template")),
×
UNCOV
108
            (
×
109
                "Cargo.toml",
110
                include_str!("../template/Cargo.toml.template"),
111
            ),
112
            ("LICENSE", include_str!("../LICENSE")),
×
113
            ("README.md", include_str!("../template/README.md.template")),
×
114
            (
×
115
                "src/lib.rs",
116
                include_str!("../template/src/lib.rs.template"),
117
            ),
118
        ] {
UNCOV
119
            std::fs::write(
×
UNCOV
120
                path.join(file),
×
UNCOV
121
                template
×
UNCOV
122
                    .replace("_component_", component)
×
UNCOV
123
                    .replace("_version_", version)
×
124
                    .replace("_cldr_tag_", DatagenProvider::LATEST_TESTED_CLDR_TAG)
125
                    .replace(
126
                        "_icuexport_tag_",
127
                        DatagenProvider::LATEST_TESTED_ICUEXPORT_TAG,
128
                    )
129
                    .replace(
130
                        "_segmenter_lstm_tag_",
131
                        DatagenProvider::LATEST_TESTED_SEGMENTER_LSTM_TAG,
132
                    ),
133
            )
×
UNCOV
134
            .unwrap();
×
135
        }
136

137
        let baked_exporter =
138
            baked_exporter::BakedExporter::new(path.join("data"), options).unwrap();
×
139
        let fingerprinter = PostcardFingerprintExporter {
×
UNCOV
140
            size_hash: Default::default(),
×
UNCOV
141
            fingerprints: crlify::BufWriterWithLineEndingFix::new(
×
142
                File::create(path.join("fingerprints.csv")).unwrap(),
×
143
            ),
UNCOV
144
        };
×
145

146
        driver
×
147
            .clone()
×
UNCOV
148
            .with_markers(markers.iter().copied())
×
149
            .export(
150
                &source,
UNCOV
151
                MultiExporter::new(vec![Box::new(baked_exporter), Box::new(fingerprinter)]),
×
UNCOV
152
            )
×
153
            .unwrap();
154

UNCOV
155
        for file in ["data/any.rs", "data/mod.rs"] {
×
UNCOV
156
            std::fs::remove_file(path.join(file)).unwrap();
×
157
        }
158
    }
×
UNCOV
159
}
×
160

161
struct PostcardFingerprintExporter<F> {
162
    size_hash: Mutex<BTreeMap<(DataMarkerInfo, String), (usize, u64)>>,
163
    fingerprints: F,
164
}
165

166
impl<F: Write + Send + Sync> DataExporter for PostcardFingerprintExporter<F> {
167
    fn put_payload(
×
168
        &self,
169
        marker: DataMarkerInfo,
170
        locale: &DataLocale,
171
        marker_attributes: &DataMarkerAttributes,
172
        payload_before: &DataPayload<ExportMarker>,
173
    ) -> Result<(), DataError> {
UNCOV
174
        let mut serialized = vec![];
×
175

UNCOV
176
        fs_exporter::serializers::Postcard::new(Default::default())
×
177
            .serialize(payload_before, &mut serialized)?;
×
178

179
        let size = serialized.len();
×
180

181
        // We're using SipHash, which is deprecated, but we want a stable hasher
182
        // (we're fine with it not being cryptographically secure since we're just using it to track diffs)
183
        #[allow(deprecated)]
184
        use std::hash::{Hash, Hasher, SipHasher};
185
        #[allow(deprecated)]
UNCOV
186
        let mut hasher = SipHasher::new();
×
187
        serialized.iter().for_each(|b| b.hash(&mut hasher));
×
UNCOV
188
        let hash = hasher.finish();
×
189

UNCOV
190
        self.size_hash.lock().expect("poison").insert(
×
191
            (
×
192
                marker,
UNCOV
193
                if marker.is_singleton && locale.is_und() {
×
194
                    "<singleton>".to_string()
×
195
                } else if !marker_attributes.is_empty() {
×
UNCOV
196
                    format!(
×
197
                        "{locale}/{marker_attributes}",
198
                        marker_attributes = marker_attributes as &str
×
199
                    )
200
                } else {
201
                    locale.to_string()
×
202
                },
203
            ),
204
            (size, hash),
×
UNCOV
205
        );
×
206

207
        Ok(())
×
UNCOV
208
    }
×
209

210
    fn flush(&self, _marker: DataMarkerInfo) -> Result<(), DataError> {
×
211
        Ok(())
×
UNCOV
212
    }
×
213

214
    fn flush_with_built_in_fallback(
×
215
        &self,
216
        _marker: DataMarkerInfo,
217
        _fallback_mode: BuiltInFallbackMode,
×
218
    ) -> Result<(), DataError> {
UNCOV
219
        Ok(())
×
UNCOV
220
    }
×
221

UNCOV
222
    fn close(&mut self) -> Result<(), DataError> {
×
UNCOV
223
        let mut seen = std::collections::HashMap::new();
×
UNCOV
224
        for ((marker, req), (size, hash)) in self.size_hash.get_mut().expect("poison").iter() {
×
UNCOV
225
            if let Some(deduped_req) = seen.get(hash) {
×
UNCOV
226
                writeln!(
×
UNCOV
227
                    &mut self.fingerprints,
×
228
                    "{marker}, {req}, {size}B, -> {deduped_req}",
UNCOV
229
                )?;
×
230
            } else {
UNCOV
231
                writeln!(&mut self.fingerprints, "{marker}, {req}, {size}B, {hash:x}",)?;
×
UNCOV
232
                seen.insert(hash, req);
×
233
            }
234
        }
UNCOV
235
        Ok(())
×
UNCOV
236
    }
×
UNCOV
237
    fn supports_built_in_fallback(&self) -> bool {
×
238
        true
UNCOV
239
    }
×
240
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc