• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 6815798908

09 Nov 2023 05:17PM UTC coverage: 72.607% (-2.4%) from 75.01%
6815798908

push

github

web-flow
Implement `Any/BufferProvider` for some smart pointers (#4255)

Allows storing them as a `Box<dyn Any/BufferProvider>` without using a
wrapper type that implements the trait.

44281 of 60987 relevant lines covered (72.61%)

201375.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.12
/components/segmenter/src/provider/mod.rs
1
// This file is part of ICU4X. For terms of use, please see the file
×
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
6
//!
7
//! <div class="stab unstable">
8
//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9
//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10
//! to be stable, their Rust representation might not be. Use with caution.
11
//! </div>
12
//!
13
//! Read more about data providers: [`icu_provider`]
14

15
// Provider structs must be stable
16
#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
17

18
mod lstm;
19
pub use lstm::*;
20

21
// Re-export this from the provider module because it is needed by datagen
22
#[cfg(feature = "datagen")]
23
pub use crate::rule_segmenter::RuleStatusType;
24

25
use icu_collections::codepointtrie::CodePointTrie;
26
use icu_provider::prelude::*;
27
use zerovec::ZeroVec;
28

29
#[cfg(feature = "compiled_data")]
30
#[derive(Debug)]
×
31
/// Baked data
32
///
33
/// <div class="stab unstable">
34
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
35
/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
36
/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
37
/// </div>
38
pub struct Baked;
39

40
#[cfg(feature = "compiled_data")]
41
const _: () = {
42
    pub mod icu {
43
        pub use crate as segmenter;
44
        pub use icu_collections as collections;
45
    }
46
    icu_segmenter_data::make_provider!(Baked);
47
    icu_segmenter_data::impl_segmenter_dictionary_w_auto_v1!(Baked);
48
    icu_segmenter_data::impl_segmenter_dictionary_wl_ext_v1!(Baked);
49
    icu_segmenter_data::impl_segmenter_grapheme_v1!(Baked);
50
    icu_segmenter_data::impl_segmenter_line_v1!(Baked);
51
    #[cfg(feature = "lstm")]
52
    icu_segmenter_data::impl_segmenter_lstm_wl_auto_v1!(Baked);
53
    icu_segmenter_data::impl_segmenter_sentence_v1!(Baked);
54
    icu_segmenter_data::impl_segmenter_word_v1!(Baked);
55
};
56

57
#[cfg(feature = "datagen")]
58
/// The latest minimum set of keys required by this component.
59
pub const KEYS: &[DataKey] = &[
60
    DictionaryForWordLineExtendedV1Marker::KEY,
61
    DictionaryForWordOnlyAutoV1Marker::KEY,
62
    GraphemeClusterBreakDataV1Marker::KEY,
63
    LineBreakDataV1Marker::KEY,
64
    LstmForWordLineAutoV1Marker::KEY,
65
    SentenceBreakDataV1Marker::KEY,
66
    WordBreakDataV1Marker::KEY,
67
];
68

69
/// Pre-processed Unicode data in the form of tables to be used for rule-based breaking.
70
///
71
/// <div class="stab unstable">
72
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
73
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
74
/// to be stable, their Rust representation might not be. Use with caution.
75
/// </div>
76
#[icu_provider::data_struct(
23,265✔
77
    marker(LineBreakDataV1Marker, "segmenter/line@1", singleton),
78
    marker(WordBreakDataV1Marker, "segmenter/word@1", singleton),
79
    marker(GraphemeClusterBreakDataV1Marker, "segmenter/grapheme@1", singleton),
80
    marker(SentenceBreakDataV1Marker, "segmenter/sentence@1", singleton)
81
)]
23,265✔
82
#[derive(Debug, PartialEq, Clone)]
×
83
#[cfg_attr(
84
    feature = "datagen",
85
    derive(serde::Serialize,databake::Bake),
×
86
    databake(path = icu_segmenter::provider),
87
)]
88
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
×
89
pub struct RuleBreakDataV1<'data> {
90
    /// Property table for rule-based breaking.
91
    #[cfg_attr(feature = "serde", serde(borrow))]
92
    pub property_table: RuleBreakPropertyTable<'data>,
×
93

94
    /// Break state table for rule-based breaking.
95
    #[cfg_attr(feature = "serde", serde(borrow))]
96
    pub break_state_table: RuleBreakStateTable<'data>,
×
97

98
    /// Rule status table for rule-based breaking.
99
    #[cfg_attr(feature = "serde", serde(borrow))]
100
    pub rule_status_table: RuleStatusTable<'data>,
×
101

102
    /// Number of properties; should be the square root of the length of [`Self::break_state_table`].
103
    pub property_count: u8,
×
104

105
    /// The index of the last simple state for [`Self::break_state_table`]. (A simple state has no
106
    /// `left` nor `right` in SegmenterProperty).
107
    pub last_codepoint_property: i8,
×
108

109
    /// The index of SOT (start of text) state for [`Self::break_state_table`].
110
    pub sot_property: u8,
×
111

112
    /// The index of EOT (end of text) state [`Self::break_state_table`].
113
    pub eot_property: u8,
×
114

115
    /// The index of "SA" state (or 127 if the complex language isn't handled) for
116
    /// [`Self::break_state_table`].
117
    pub complex_property: u8,
×
118
}
119

120
/// Property table for rule-based breaking.
121
///
122
/// <div class="stab unstable">
123
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
124
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
125
/// to be stable, their Rust representation might not be. Use with caution.
126
/// </div>
127
#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
×
128
#[cfg_attr(
129
    feature = "datagen",
130
    derive(serde::Serialize,databake::Bake),
×
131
    databake(path = icu_segmenter::provider),
132
)]
133
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
×
134
pub struct RuleBreakPropertyTable<'data>(
135
    #[cfg_attr(feature = "serde", serde(borrow))] pub CodePointTrie<'data, u8>,
×
136
);
137

138
/// Break state table for rule-based breaking.
139
///
140
/// <div class="stab unstable">
141
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
142
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
143
/// to be stable, their Rust representation might not be. Use with caution.
144
/// </div>
145
#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
×
146
#[cfg_attr(
147
    feature = "datagen",
148
    derive(serde::Serialize,databake::Bake),
×
149
    databake(path = icu_segmenter::provider),
150
)]
151
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
×
152
pub struct RuleBreakStateTable<'data>(
153
    #[cfg_attr(feature = "serde", serde(borrow))] pub ZeroVec<'data, i8>,
×
154
);
155

156
/// Rules status data for rule_status and is_word_like of word segmenter.
157
///
158
/// <div class="stab unstable">
159
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
160
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
161
/// to be stable, their Rust representation might not be. Use with caution.
162
/// </div>
163
#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
×
164
#[cfg_attr(
165
    feature = "datagen",
166
    derive(serde::Serialize,databake::Bake),
×
167
    databake(path = icu_segmenter::provider),
168
)]
169
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
×
170
pub struct RuleStatusTable<'data>(
171
    #[cfg_attr(feature = "serde", serde(borrow))] pub ZeroVec<'data, u8>,
×
172
);
173

174
/// char16trie data for dictionary break
175
///
176
/// <div class="stab unstable">
177
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
178
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
179
/// to be stable, their Rust representation might not be. Use with caution.
180
/// </div>
181
#[icu_provider::data_struct(
31✔
182
    DictionaryForWordOnlyAutoV1Marker = "segmenter/dictionary/w_auto@1",
183
    DictionaryForWordLineExtendedV1Marker = "segmenter/dictionary/wl_ext@1"
184
)]
31✔
185
#[derive(Debug, PartialEq, Clone)]
×
186
#[cfg_attr(
187
    feature = "datagen",
188
    derive(serde::Serialize,databake::Bake),
×
189
    databake(path = icu_segmenter::provider),
190
)]
191
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
×
192
pub struct UCharDictionaryBreakDataV1<'data> {
193
    /// Dictionary data of char16trie.
194
    #[cfg_attr(feature = "serde", serde(borrow))]
195
    pub trie_data: ZeroVec<'data, u16>,
×
196
}
197

198
pub(crate) struct UCharDictionaryBreakDataV1Marker;
199

200
impl DataMarker for UCharDictionaryBreakDataV1Marker {
201
    type Yokeable = UCharDictionaryBreakDataV1<'static>;
202
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc