• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 383

08 Mar 2025 12:31AM UTC coverage: 88.643% (-0.9%) from 89.592%
383

Pull #112

github

web-flow
Merge 1bab852be into 4fce8878f
Pull Request #112: perf(encoding): optimize char accumulation

10 of 29 new or added lines in 2 files covered. (34.48%)

28 existing lines in 1 file now uncovered.

1280 of 1444 relevant lines covered (88.64%)

428.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.99
/encoding/decoding.mbt
1
// Copyright 2024 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///| The Unicode Replacement Character, which is used to replace invalid or unrecognized sequences during lossy decoding.
16
/// https://unicode.org/charts/nameslist/n_FFF0.html
17
pub const U_REP = '\u{FFFD}'
18

19
///|
20
let utf_8_len = [
21
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
22
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
26
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
29
  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
30
  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
]
32

33
///|
34
fn estimate_utf_16_len(utf_8_bs : FixedArray[Byte]) -> Int {
35
  let mut ascii_count = 0
22✔
36
  let utf_8_bs_len = utf_8_bs.length()
37
  for b in utf_8_bs {
38
    // ASCII
39
    if b <= 0x7F {
40
      ascii_count += 1
58✔
41
    }
42
  }
43
  utf_8_bs_len + ascii_count
44
}
45

46
///|
47
/// Create and return a `Decoder` for the specified character encoding.
48
///
49
/// The `Decoder` consumes byte sequences and decodes them into the original string format.
50
///
51
/// # Parameters
52
///
53
/// - `encoding`: The character encoding format to be used for decoding the input byte sequences.
54
///
55
/// # Returns
56
///
57
/// A `Decoder` instance that can be used to decode byte sequences into strings.
58
///
59
/// # Examples
60
///
61
/// ```moonbit
62
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
63
/// let decoder = decoder(UTF8)
64
/// inspect!(decoder.consume!(inputs[0]), content="abc")
65
/// inspect!(decoder.consume!(inputs[1]), content="")
66
/// inspect!(decoder.consume!(inputs[2]), content="🐰")
67
/// assert_true!(decoder.finish!().is_empty())
68
pub fn decoder(encoding : Encoding) -> Decoder {
69
  let i = FixedArray::default()
32✔
70
  let i_pos = 0
71
  let t = FixedArray::make(4, Byte::default())
72
  let t_len = 0
73
  let t_need = 0
74
  let k = match encoding {
75
    UTF8 => decode_utf_8
10✔
76
    UTF16 => decode_utf_16le
2✔
77
    UTF16LE => decode_utf_16le
11✔
78
    UTF16BE => decode_utf_16be
9✔
79
  }
80
  { i, i_pos, t, t_len, t_need, k, encoding }
81
}
82

83
///|
84
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string.
85
///
86
/// This function can work in streaming mode where bytes are consumed incrementally.
87
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
88
///
89
/// # Parameters
90
///
91
/// - `self`: The `Decoder` instance used to decode the byte sequence.
92
/// - `input`: The byte sequence to be decoded.
93
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
94
///
95
/// # Returns
96
///
97
/// A `String` representing the decoded content from the input byte sequence.
98
///
99
/// # Errors
100
///
101
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
102
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
103
///
104
/// # Examples
105
///
106
/// ```moonbit
107
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
108
/// let decoder = @encoding.decoder(UTF8)
109
/// inspect!(decoder.decode!(inputs[0], stream=true), content="abc")
110
/// inspect!(decoder.decode!(inputs[1], stream=true), content="")
111
/// inspect!(decoder.decode!(inputs[2], stream=false), content="🐰")
112
/// ```
113
pub fn decode!(self : Decoder, input : Bytes, stream~ : Bool = false) -> String {
114
  if input.length() > 0 {
76✔
115
    self.i_cont(input)
70✔
116
  }
117
  if self.i_rem() == 0 {
118
    return String::default()
6✔
119
  }
120
  let size_hint = match self.encoding {
121
    UTF16 | UTF16LE | UTF16BE => self.i_rem()
50✔
122
    UTF8 => estimate_utf_16_len(self.i)
20✔
123
  }
124
  let builder = StringBuilder::new(size_hint~)
125

126
  // drive decoder to decode
127
  loop self.decode_() {
128
    Uchar(u) => {
278✔
129
      builder.write_char(u)
130
      continue self.decode_()
131
    }
132
    Malformed(bs) =>
133
      if stream && self.t_need > 0 {
4✔
NEW
UNCOV
134
        builder.to_string()
×
135
      } else {
136
        raise MalformedError(bs)
4✔
137
      }
138
    End => builder.to_string()
42✔
139
    Refill(t) =>
140
      if stream {
24✔
141
        builder.to_string()
24✔
142
      } else {
UNCOV
143
        raise TruncatedError(t)
×
144
      }
145
  }
146
}
147

148
///|
149
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally.
150
///
151
/// This function calls `decode!` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
152
///
153
/// # Parameters
154
///
155
/// - `self`: The `Decoder` instance used to consume the byte sequence.
156
/// - `input`: The byte sequence to be consumed and decoded incrementally.
157
///
158
/// # Returns
159
///
160
/// A `String` representing the partially decoded content from the input byte sequence, as more bytes are expected.
161
///
162
/// # Errors
163
///
164
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
165
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
166
pub fn consume!(self : Decoder, input : Bytes) -> String {
167
  self.decode!(input, stream=true)
50✔
168
}
169

170
///|
171
/// Finalize the decoding process and return the remaining decoded string.
172
///
173
/// This function calls `decode!` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
174
/// and triggering the final decoding step to produce the remaining output.
175
///
176
/// # Parameters
177
///
178
/// - `self`: The `Decoder` instance used to finalize the decoding process.
179
///
180
/// # Returns
181
///
182
/// A `String` representing the final part of the decoded content, after all byte sequences have been processed.
183
///
184
/// # Errors
185
///
186
/// `MalformedError`: This error is raised if the remaining byte sequence is not properly formatted according to the specified encoding.
187
/// `TruncatedError`: This error is raised if the remaining byte sequence ends prematurely, implying that more data was expected for complete decoding.
188
pub fn finish!(self : Decoder) -> String {
189
  self.decode!(b"", stream=false)
6✔
190
}
191

192
///|
193
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
194
///
195
/// This function can work in streaming mode where bytes are consumed incrementally.
196
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
197
///
198
/// # Parameters
199
///
200
/// - `self`: The `Decoder` instance used to decode the byte sequence.
201
/// - `input`: The byte sequence to be decoded.
202
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
203
///
204
/// # Returns
205
///
206
/// A `String` representing the decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`).
207
pub fn decode_lossy(
208
  self : Decoder,
209
  input : Bytes,
210
  stream~ : Bool = false
211
) -> String {
212
  if input.length() > 0 {
6✔
213
    self.i_cont(input)
6✔
214
  }
215
  if self.i_rem() == 0 {
UNCOV
216
    return String::default()
×
217
  }
218
  let size_hint = match self.encoding {
219
    UTF16 | UTF16LE | UTF16BE => self.i_rem()
4✔
220
    UTF8 => estimate_utf_16_len(self.i)
2✔
221
  }
222
  let builder = StringBuilder::new(size_hint~)
223

224
  // drive decoder to decode
225
  loop self.decode_() {
226
    Uchar(u) => {
6✔
227
      builder.write_char(u)
228
      continue self.decode_()
229
    }
230
    Malformed(_) =>
231
      if stream && self.t_need > 0 {
9✔
NEW
UNCOV
232
        builder.to_string()
×
233
      } else {
234
        builder.write_char(U_REP)
9✔
235
        continue self.decode_()
236
      }
237
    End => builder.to_string()
6✔
238
    Refill(_) =>
239
      if stream {
3✔
NEW
UNCOV
240
        builder.to_string()
×
241
      } else {
242
        continue self.decode_()
3✔
243
      }
244
  }
245
}
246

247
///|
248
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
249
///
250
/// This function calls `decode_lossy` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
251
///
252
/// # Parameters
253
///
254
/// - `self`: The `Decoder` instance used to consume and decode the byte sequence.
255
/// - `input`: The byte sequence to be consumed and decoded incrementally.
256
///
257
/// # Returns
258
///
259
/// A `String` representing the partially decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), as more bytes are expected.
260
pub fn lossy_consume(self : Decoder, input : Bytes) -> String {
UNCOV
261
  self.decode_lossy(input, stream=true)
×
262
}
263

264
///|
265
/// Finalize the lossy decoding process and return the remaining decoded string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
266
///
267
/// This function calls `decode_lossy` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
268
/// and triggering the final decoding step to produce the remaining output.
269
///
270
/// # Parameters
271
///
272
/// - `self`: The `Decoder` instance used to finalize the lossy decoding process.
273
///
274
/// # Returns
275
///
276
/// A `String` representing the final part of the decoded content, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), after all byte sequences have been processed.
277
pub fn lossy_finish(self : Decoder) -> String {
UNCOV
278
  self.decode_lossy(b"", stream=false)
×
279
}
280

281
///|
282
fn i_cont(self : Decoder, input : Bytes) -> Unit {
283
  // concat `input` to `i`, drop decoded `i`
284
  let i_rem = @math.maximum(self.i_rem(), 0)
76✔
285
  let new_len = i_rem + input.length()
286
  // init a new `i`
287
  let new_i = FixedArray::make(new_len, Byte::default())
288
  if i_rem > 0 {
289
    // copy the remainder of the old `i` into the new `i`
UNCOV
290
    self.i.blit_to(new_i, len=i_rem, src_offset=self.i_pos)
×
291
  }
292
  // copy all `input` into new `i`, starting at the remainder of the old `i`
293
  new_i.blit_from_bytes(i_rem, input, 0, input.length())
294
  self.i = new_i
295
  // reset position to starting position
296
  self.i_pos = 0
297
}
298

299
// Implementations
300

301
///|
302
fn decode_(self : Decoder) -> Decode {
303
  (self.k)(self)
372✔
304
}
305

306
///|
307
fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
308
  self.k = k
324✔
309
  v
310
}
311

312
///|
313
fn i_rem(self : Decoder) -> Int {
314
  self.i.length() - self.i_pos
627✔
315
}
316

317
///|
318
fn t_need(self : Decoder, need : Int) -> Unit {
319
  self.t_len = 0
27✔
320
  self.t_need = need
321
}
322

323
///|
324
fn eoi(self : Decoder) -> Unit {
325
  self.i = FixedArray::default()
27✔
326
}
327

328
///|
329
fn refill(self : Decoder, k : Cont) -> Decode {
330
  self.eoi()
27✔
331
  self.ret(k, Decode::Refill(Bytes::from_fixedarray(self.t)))
332
}
333

334
///|
335
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
336
  fn blit(decoder : Decoder, l : Int) -> Unit {
54✔
337
    decoder.i.blit_to(
51✔
338
      decoder.t,
339
      len=l,
340
      dst_offset=decoder.t_len,
341
      src_offset=decoder.i_pos,
342
    )
343
    decoder.i_pos += l
344
    decoder.t_len += l
345
  }
346

347
  let rem = decoder.i_rem()
348
  if rem < 0 { // eoi
349
    k(decoder)
3✔
350
  } else {
351
    let need = decoder.t_need - decoder.t_len
51✔
352
    if rem < need {
353
      blit(decoder, rem)
27✔
354
      decoder.refill(@tuple.curry(t_fill)(k))
355
    } else {
356
      blit(decoder, need)
24✔
357
      k(decoder)
358
    }
359
  }
360
}
361

362
// UTF8
363

364
///|
365
fn decode_utf_8(self : Decoder) -> Decode {
366
  let rem = self.i_rem()
95✔
367
  if rem <= 0 {
368
    Decode::End
14✔
369
  } else {
370
    let idx = self.i[self.i_pos].to_int()
81✔
371
    let need = utf_8_len[idx]
372
    if rem < need {
373
      self.t_need(need)
6✔
374
      t_fill(t_decode_utf_8, self)
375
    } else {
376
      let j = self.i_pos
75✔
377
      if need == 0 {
378
        self.i_pos += 1
1✔
379
        self.ret(decode_utf_8, malformed(self.i, j, 1))
380
      } else {
381
        self.i_pos += need
74✔
382
        self.ret(decode_utf_8, r_utf_8(self.i, j, need))
383
      }
384
    }
385
  }
386
}
387

388
///|
389
fn t_decode_utf_8(self : Decoder) -> Decode {
390
  if self.t_len < self.t_need {
6✔
UNCOV
391
    self.ret(decode_utf_8, malformed(self.t, 0, self.t_len))
×
392
  } else {
393
    self.ret(decode_utf_8, r_utf_8(self.t, 0, self.t_len))
6✔
394
  }
395
}
396

397
///|
398
fn r_utf_8(bytes : FixedArray[Byte], offset : Int, length : Int) -> Decode {
399
  fn uchar(c : Int) {
80✔
400
    Uchar(Char::from_int(c))
74✔
401
  }
402

403
  match length {
404
    1 => uchar(bytes[offset].to_int())
44✔
405
    2 => {
6✔
406
      let b0 = bytes[offset].to_int()
407
      let b1 = bytes[offset + 1].to_int()
408
      if (b1 >> 6) != 0b10 {
409
        malformed(bytes, offset, length)
4✔
410
      } else {
411
        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
2✔
412
      }
413
    }
414
    3 => {
13✔
415
      let b0 = bytes[offset].to_int()
416
      let b1 = bytes[offset + 1].to_int()
417
      let b2 = bytes[offset + 2].to_int()
418
      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
419
      if (b2 >> 6) != 0b10 {
UNCOV
420
        malformed(bytes, offset, length)
×
421
      } else {
422
        match b0 {
13✔
423
          0xE0 =>
424
            if b1 < 0xA0 || 0xBF < b1 {
1✔
425
              malformed(bytes, offset, length)
1✔
426
            } else {
UNCOV
427
              uchar(c)
×
428
            }
429
          0xED =>
UNCOV
430
            if b1 < 0x80 || 0x9F < b1 {
×
UNCOV
431
              malformed(bytes, offset, length)
×
432
            } else {
UNCOV
433
              uchar(c)
×
434
            }
435
          _ =>
436
            if (b1 >> 6) != 0b10 {
12✔
UNCOV
437
              malformed(bytes, offset, length)
×
438
            } else {
439
              uchar(c)
12✔
440
            }
441
        }
442
      }
443
    }
444
    4 => {
17✔
445
      let b0 = bytes[offset].to_int()
446
      let b1 = bytes[offset + 1].to_int()
447
      let b2 = bytes[offset + 2].to_int()
448
      let b3 = bytes[offset + 3].to_int()
449
      let c = ((b0 & 0x07) << 18) |
450
        ((b1 & 0x3F) << 12) |
451
        ((b2 & 0x3F) << 6) |
452
        (b3 & 0x3F)
453
      if (b3 >> 6) != 0b10 || (b2 >> 6) != 0b10 {
454
        malformed(bytes, offset, length)
1✔
455
      } else {
456
        match b0 {
16✔
457
          0xF0 =>
458
            if b1 < 0x90 || 0xBF < b1 {
16✔
459
              malformed(bytes, offset, length)
×
460
            } else {
461
              uchar(c)
16✔
462
            }
463
          0xF4 =>
UNCOV
464
            if b1 < 0x80 || 0x8F < b1 {
×
UNCOV
465
              malformed(bytes, offset, length)
×
466
            } else {
UNCOV
467
              uchar(c)
×
468
            }
469
          _ =>
UNCOV
470
            if (b1 >> 6) != 0b10 {
×
UNCOV
471
              malformed(bytes, offset, length)
×
472
            } else {
UNCOV
473
              uchar(c)
×
474
            }
475
        }
476
      }
477
    }
UNCOV
478
    _ => panic()
×
479
  }
480
}
481

482
// UTF16LE
483

484
///|
485
priv enum UTF16Decode {
486
  Hi(Int)
487
  UTF16Malformed(Bytes)
488
  UTF16Uchar(Char)
489
}
490

491
///|
492
fn decode_utf_16le(self : Decoder) -> Decode {
493
  let rem = self.i_rem()
141✔
494
  if rem <= 0 {
495
    Decode::End
21✔
496
  } else if rem < 2 {
120✔
497
    self.t_need(2)
9✔
498
    t_fill(t_decode_utf_16le, self)
499
  } else {
500
    let j = self.i_pos
111✔
501
    self.i_pos += 2
502
    self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
503
  }
504
}
505

506
///|
507
fn t_decode_utf_16le(self : Decoder) -> Decode {
508
  if self.t_len < self.t_need {
9✔
UNCOV
509
    self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
×
510
  } else {
511
    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
9✔
512
  }
513
}
514

515
///|
516
fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
517
  match v {
120✔
518
    UTF16Uchar(u) => self.ret(decode_utf_16le, Uchar(u))
111✔
UNCOV
519
    UTF16Malformed(s) => self.ret(decode_utf_16le, Malformed(s))
×
520
    Hi(hi) => {
9✔
521
      let rem = self.i_rem()
522
      if rem < 2 {
523
        self.t_need(2)
4✔
524
        t_fill(@tuple.curry(t_decode_utf_16le_lo)(hi), self)
525
      } else {
526
        let j = self.i_pos
5✔
527
        let dcd = r_utf_16_lo(hi, self.i, j + 1, j)
528
        match dcd {
529
          Uchar(_) => self.i_pos += 2
4✔
530
          _ => ()
1✔
531
        }
532
        self.ret(decode_utf_16le, dcd)
533
      }
534
    }
535
  }
536
}
537

538
///|
539
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
540
  if decoder.t_len < decoder.t_need {
4✔
541
    decoder.ret(
2✔
542
      decode_utf_16le,
543
      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
544
    )
545
  } else {
546
    decoder.ret(decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
2✔
547
  }
548
}
549

550
///|
551
fn r_utf_16_lo(
552
  hi : Int,
553
  bytes : FixedArray[Byte],
554
  offset0 : Int,
555
  offset1 : Int
556
) -> Decode {
557
  let b0 = bytes[offset0].to_int()
13✔
558
  let b1 = bytes[offset1].to_int()
559
  let lo = (b0 << 8) | b1
560
  if lo < 0xDC00 || lo > 0xDFFF {
561
    // NOTE(jinser): only hi malformed, skip lo if lo is illegal
562
    //
563
    // For example, b"\xD8\x00\x00\x48" (BE)
564
    // Since \xD8\x00 is *legal* hi, here will try to parse lo next,
565
    // however the whole \xD8\x00\x00\x48 is *illegal* so the result will be a `Malformed[b"\xD8\x00\x00\x48"]`
566
    //
567
    // But \x00\x48 itself is a *legal* UTF16 code point with a value of `H`,
568
    // the ideal result should be: `[Malformed(b"\xD8\x00"), Uchar('H')]`
569
    //
570
    // > printf '\xD8\x00\x00\x48' | uconv --from-code UTF16BE --to-code UTF8 --from-callback substitute
571
    // �H
572
    Malformed([bytes[offset0], bytes[offset1]])
3✔
573
  } else {
574
    Uchar(Char::from_int(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
10✔
575
  }
576
}
577

578
///|
579
fn r_utf_16(
580
  bytes : FixedArray[Byte],
581
  offset0 : Int,
582
  offset1 : Int
583
) -> UTF16Decode {
584
  let b0 = bytes[offset0].to_int()
216✔
585
  let b1 = bytes[offset1].to_int()
586
  let u = (b0 << 8) | b1
587
  if u < 0xD800 || u > 0xDFFF {
588
    UTF16Uchar(Char::from_int(u))
200✔
589
  } else if u > 0xDBFF {
16✔
UNCOV
590
    UTF16Malformed(slice(bytes, @math.minimum(offset0, offset1), 2))
×
591
  } else {
592
    Hi(u)
16✔
593
  }
594
}
595

596
// UTF16BE
597

598
///|
599
fn decode_utf_16be(self : Decoder) -> Decode {
600
  let rem = self.i_rem()
109✔
601
  if rem <= 0 {
602
    Decode::End
13✔
603
  } else if rem < 2 {
96✔
604
    self.t_need(2)
7✔
605
    t_fill(t_decode_utf_16be, self)
606
  } else {
607
    let j = self.i_pos
89✔
608
    self.i_pos += 2
609
    self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
610
  }
611
}
612

613
///|
614
fn t_decode_utf_16be(self : Decoder) -> Decode {
615
  if self.t_len < self.t_need {
7✔
UNCOV
616
    self.ret(decode_utf_16be, malformed(self.t, 0, self.t_len))
×
617
  } else {
618
    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
7✔
619
  }
620
}
621

622
///|
623
fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
624
  match decode {
96✔
625
    UTF16Uchar(x) => self.ret(decode_utf_16be, Uchar(x))
89✔
UNCOV
626
    UTF16Malformed(x) => self.ret(decode_utf_16be, Malformed(x))
×
627
    Hi(hi) => {
7✔
628
      let rem = self.i_rem()
629
      if rem < 2 {
630
        self.t_need(2)
1✔
631
        t_fill(@tuple.curry(t_decode_utf_16be_lo)(hi), self)
632
      } else {
633
        let j = self.i_pos
6✔
634
        let dcd = r_utf_16_lo(hi, self.i, j, j + 1)
635
        match dcd {
636
          Uchar(_) => self.i_pos += 2
4✔
637
          _ => ()
2✔
638
        }
639
        self.ret(decode_utf_16be, dcd)
640
      }
641
    }
642
  }
643
}
644

645
///|
646
fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
647
  if self.t_len < self.t_need {
1✔
648
    self.ret(decode_utf_16be, malformed_pair(true, hi, self.t, 0, self.t_len))
1✔
649
  } else {
UNCOV
650
    self.ret(decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
×
651
  }
652
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc