• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OISF / suricata / 22550902417

01 Mar 2026 07:32PM UTC coverage: 68.401% (-5.3%) from 73.687%
22550902417

Pull #14922

github

web-flow
github-actions: bump actions/upload-artifact from 6.0.0 to 7.0.0

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 6.0.0 to 7.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v6...v7)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: 7.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Pull Request #14922: github-actions: bump actions/upload-artifact from 6.0.0 to 7.0.0

218243 of 319063 relevant lines covered (68.4%)

3284926.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.26
/rust/htp/src/utf8_decoder.rs
1
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
4
// and associated documentation files (the "Software"), to deal in the Software without restriction,
5
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
6
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
7
// furnished to do so, subject to the following conditions:
8
//
9
// The above copyright notice and this permission notice shall be included in all copies or
10
// substantial portions of the Software.
11
//
12
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
13
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
14
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
15
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
//
18
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
19
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
20
use crate::{
21
    bstr::Bstr,
22
    config::{DecoderConfig, HtpUnwanted},
23
    unicode_bestfit_map::UnicodeBestfitMap,
24
    util::{FlagOperations, HtpFlags},
25
};
26

27
static utf8d: [u8; 400] = [
28
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
33
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
34
    8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
35
    0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, 0xb, 0x6, 0x6,
36
    0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0, 0x1, 0x2, 0x3, 0x5, 0x8,
37
    0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
    1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1,
39
    1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
40
    1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
41
    1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
];
43
static utf8d_allow_overlong: [u8; 400] = [
44
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
49
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51
    0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, 0x6, 0x6, 0x6,
52
    0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0, 0x1, 0x2, 0x3, 0x5, 0x8,
53
    0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
    1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1,
55
    1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
56
    1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
57
    1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
];
59

60
#[derive(Clone)]
61
pub(crate) struct Utf8Decoder {
62
    bestfit_map: UnicodeBestfitMap,
63
    state: u32,
64
    seq: u32,
65
    codepoint: u32,
66
    pub(crate) flags: u64,
67
    pub(crate) seen_valid: bool,
68
    pub(crate) decoded_bytes: Vec<u8>,
69
}
70

71
impl Utf8Decoder {
72
    /// Make a new owned Utf8Decoder
73
    pub(crate) fn new(bestfit_map: UnicodeBestfitMap) -> Self {
2,644✔
74
        Self {
2,644✔
75
            bestfit_map,
2,644✔
76
            state: 0,
2,644✔
77
            seq: 0,
2,644✔
78
            codepoint: 0,
2,644✔
79
            flags: 0,
2,644✔
80
            seen_valid: false,
2,644✔
81
            decoded_bytes: Vec::new(),
2,644✔
82
        }
2,644✔
83
    }
2,644✔
84

85
    /// Decode utf8 byte using best-fit map.
86
    fn decode_byte(&mut self, encoded_byte: u8, is_last_byte: bool) {
65,876✔
87
        self.seq = self.seq.wrapping_add(1);
65,876✔
88
        self.decode_byte_allow_overlong(encoded_byte as u32);
65,876✔
89
        match self.state {
65,876✔
90
            0 => {
91
                if self.seq == 1 {
65,846✔
92
                    // ASCII character, which we just copy.
65,842✔
93
                    self.decoded_bytes.push(self.codepoint as u8);
65,842✔
94
                } else {
65,842✔
95
                    // A valid UTF-8 character, which we need to convert.
96
                    self.seen_valid = true;
4✔
97
                    // Check for overlong characters and set the flag accordingly.
4✔
98
                    if (self.seq == 2 && self.codepoint < 0x80)
4✔
99
                        || (self.seq == 3 && self.codepoint < 0x800)
4✔
100
                        || (self.seq == 4 && self.codepoint < 0x10000)
4✔
101
                    {
×
102
                        self.flags.set(HtpFlags::PATH_UTF8_OVERLONG);
×
103
                    }
4✔
104
                    // Special flag for half-width/full-width evasion.
105
                    if self.codepoint >= 0xff00 && self.codepoint <= 0xffef {
4✔
106
                        self.flags.set(HtpFlags::PATH_HALF_FULL_RANGE)
×
107
                    }
4✔
108
                    // Use best-fit mapping to convert to a single byte.
109
                    self.decoded_bytes.push(self.bestfit_codepoint());
4✔
110
                }
111
                self.seq = 0;
65,846✔
112
            }
113
            1 => {
114
                // Invalid UTF-8 character.
115
                self.flags.set(HtpFlags::PATH_UTF8_INVALID);
15✔
116
                // Output the replacement byte, replacing one or more invalid bytes.
15✔
117
                // If the invalid byte was first in a sequence, consume it. Otherwise,
15✔
118
                // assume it's the starting byte of the next character.
15✔
119
                self.state = 0;
15✔
120
                self.codepoint = 0;
15✔
121
                self.decoded_bytes.push(self.bestfit_map.replacement_byte);
15✔
122
                if self.seq != 1 {
15✔
123
                    self.seq = 0;
10✔
124
                    self.decode_byte(encoded_byte, is_last_byte);
10✔
125
                } else {
10✔
126
                    self.seq = 0;
5✔
127
                }
5✔
128
            }
129
            _ => {
130
                // The character is not yet formed.
131
                if is_last_byte {
15✔
132
                    // If the last input chunk ended with an incomplete byte sequence for a code point,
133
                    // this is an error and a replacement character is emitted hence starting from 1 not 0
134
                    for _ in 1..self.seq {
1✔
135
                        self.decoded_bytes.push(self.bestfit_map.replacement_byte);
×
136
                    }
×
137
                }
14✔
138
            }
139
        }
140
    }
65,876✔
141

142
    /// Decode a UTF-8 encoded path. Replaces a possibly-invalid utf8 byte stream
143
    /// with an ascii stream, storing the result in self.decoded_bytes. Overlong
144
    /// characters will be decoded and invalid characters will be replaced with
145
    /// the replacement byte specified in the bestfit_map. Best-fit mapping will be used
146
    /// to convert UTF-8 into a single-byte stream.
147
    fn decode_and_validate(&mut self, input: &[u8]) {
2,644✔
148
        //Reset all internals
2,644✔
149
        self.state = 0;
2,644✔
150
        self.seq = 0;
2,644✔
151
        self.codepoint = 0;
2,644✔
152
        self.flags = 0;
2,644✔
153
        self.decoded_bytes.clear();
2,644✔
154
        self.decoded_bytes.reserve(input.len());
2,644✔
155
        self.seen_valid = false;
2,644✔
156
        for (byte, is_last) in input
65,866✔
157
            .iter()
2,644✔
158
            .enumerate()
2,644✔
159
            .map(|(i, b)| (b, i + 1 == input.len()))
65,866✔
160
        {
65,866✔
161
            self.decode_byte(*byte, is_last);
65,866✔
162
        }
65,866✔
163
        // Did the input stream seem like a valid UTF-8 string?
164
        if self.seen_valid && !self.flags.is_set(HtpFlags::PATH_UTF8_INVALID) {
2,644✔
165
            self.flags.set(HtpFlags::PATH_UTF8_VALID)
4✔
166
        }
2,640✔
167
    }
2,644✔
168

169
    /// Process one byte of UTF-8 data and set the code point if one is available. Allows
170
    /// overlong characters in input.
171
    ///
172
    /// Sets the state to ACCEPT(0) for a valid character, REJECT(1) for an invalid character,
173
    ///         or OTHER(u32) if the character has not yet been formed
174
    fn decode_byte_allow_overlong(&mut self, byte: u32) {
65,876✔
175
        let type_0: u32 = utf8d_allow_overlong[byte as usize] as u32;
65,876✔
176
        self.codepoint = if self.state != 0 {
65,876✔
177
            (byte & 0x3f) | (self.codepoint << 6)
14✔
178
        } else {
179
            (0xff >> type_0) & byte
65,862✔
180
        };
181
        self.state = utf8d[(256u32)
65,876✔
182
            .wrapping_add((self.state).wrapping_mul(16))
65,876✔
183
            .wrapping_add(type_0) as usize] as u32;
65,876✔
184
    }
65,876✔
185

186
    /// Convert a Unicode codepoint into a single-byte, using best-fit
187
    /// mapping (as specified in the provided configuration structure).
188
    ///
189
    /// Returns converted single byte
190
    fn bestfit_codepoint(&self) -> u8 {
4✔
191
        // Is it a single-byte codepoint?
4✔
192
        if self.codepoint < 0x100 {
4✔
193
            return self.codepoint as u8;
4✔
194
        }
×
195
        self.bestfit_map.get(self.codepoint)
×
196
    }
4✔
197
}
198

199
/// Decode a UTF-8 encoded path. Replaces a possibly-invalid utf8 byte stream with
200
/// an ascii stream. Overlong characters will be decoded and invalid characters will
201
/// be replaced with the replacement byte specified in the cfg. Best-fit mapping will
202
/// be used to convert UTF-8 into a single-byte stream. The resulting decoded path will
203
/// be stored in the input path if the transaction cfg indicates it
204
pub(crate) fn decode_and_validate_inplace(
2,644✔
205
    cfg: &DecoderConfig, flags: &mut u64, status: &mut HtpUnwanted, path: &mut Bstr,
2,644✔
206
) {
2,644✔
207
    let mut decoder = Utf8Decoder::new(cfg.bestfit_map);
2,644✔
208
    decoder.decode_and_validate(path.as_slice());
2,644✔
209
    if cfg.utf8_convert_bestfit {
2,644✔
210
        path.clear();
2,161✔
211
        path.add(decoder.decoded_bytes.as_slice());
2,161✔
212
    }
2,624✔
213
    flags.set(decoder.flags);
2,644✔
214

2,644✔
215
    if flags.is_set(HtpFlags::PATH_UTF8_INVALID) && cfg.utf8_invalid_unwanted != HtpUnwanted::Ignore
2,644✔
216
    {
×
217
        *status = cfg.utf8_invalid_unwanted;
×
218
    }
2,644✔
219
}
2,644✔
220
#[cfg(test)]
221
mod tests {
222
    use crate::{
223
        bstr::Bstr, config::Config, config::HtpUnwanted, utf8_decoder::decode_and_validate_inplace,
224
    };
225
    use rstest::rstest;
226

227
    #[rstest]
228
    #[case(b"\xf1.\xf1\xef\xbd\x9dabcd", "?.?}abcd")]
229
    //1111 0000 1001 0000 1000 1101 1111 1111
230
    #[case::invalid_incomplete_seq(b"\xf0\x90\x8d\xff", "??")]
231
    //1110 0010 1000 0010
232
    #[case::invalid_incomplete_seq(b"\xe2\x82", "?")]
233
    //1100 0010 1111 1111 1111 0000
234
    #[case::invalid_incomplete_seq(b"\xc2\xff\xf0", "??")]
235
    //1111 0000 1001 0000 0010 1000 1011 1100
236
    #[case::invalid_incomplete_seq(b"\xf0\x90\x28\xbc", "?(?")]
237
    fn test_decode_and_validate_inplace(#[case] input: &[u8], #[case] expected: &str) {
238
        let mut cfg = Config::default();
239
        cfg.set_utf8_convert_bestfit(true);
240
        let mut i = Bstr::from(input);
241
        let mut flags = 0;
242
        let mut response_status_expected_number = HtpUnwanted::Ignore;
243
        decode_and_validate_inplace(
244
            &cfg.decoder_cfg,
245
            &mut flags,
246
            &mut response_status_expected_number,
247
            &mut i,
248
        );
249
        assert_eq!(i, Bstr::from(expected));
250
    }
251
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc