• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 489

06 Jun 2025 10:13AM UTC coverage: 92.072% (+1.9%) from 90.218%
489

Pull #147

github

web-flow
Merge bea124531 into ae19a4d88
Pull Request #147: minor: adapt latest toolchain

23 of 29 new or added lines in 5 files covered. (79.31%)

29 existing lines in 8 files now uncovered.

1835 of 1993 relevant lines covered (92.07%)

407.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.95
/encoding/decoding.mbt
1
// Copyright 2024 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///| The Unicode Replacement Character, which is used to replace invalid or unrecognized sequences during lossy decoding.
16
/// https://unicode.org/charts/nameslist/n_FFF0.html
17
pub const U_REP = '\u{FFFD}'
18

19
///|
20
let utf_8_len = [
21
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
22
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
26
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
29
  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
30
  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
]
32

33
///|
34
/// Create and return a `Decoder` for the specified character encoding.
35
///
36
/// The `Decoder` consumes byte sequences and decodes them into the original string format.
37
///
38
/// # Parameters
39
///
40
/// - `encoding`: The character encoding format to be used for decoding the input byte sequences.
41
///
42
/// # Returns
43
///
44
/// A `Decoder` instance that can be used to decode byte sequences into strings.
45
///
46
/// # Examples
47
///
48
/// ```moonbit
49
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
50
/// let decoder = decoder(UTF8)
51
/// inspect!(decoder.consume!(inputs[0]), content="abc")
52
/// inspect!(decoder.consume!(inputs[1]), content="")
53
/// inspect!(decoder.consume!(inputs[2]), content="🐰")
54
/// assert_true!(decoder.finish!().is_empty())
55
pub fn decoder(encoding : Encoding) -> Decoder {
56
  let i = FixedArray::default()
55✔
57
  let i_pos = 0
58
  let t = FixedArray::make(4, Byte::default())
55✔
59
  let t_len = 0
60
  let t_need = 0
61
  let k = match encoding {
55✔
62
    UTF8 => Decoder::decode_utf_8
19✔
63
    UTF16 => Decoder::decode_utf_16le
4✔
64
    UTF16LE => Decoder::decode_utf_16le
18✔
65
    UTF16BE => Decoder::decode_utf_16be
14✔
66
  }
67
  { i, i_pos, t, t_len, t_need, k }
68
}
69

70
///|
71
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string.
72
///
73
/// This function can work in streaming mode where bytes are consumed incrementally.
74
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
75
///
76
/// # Parameters
77
///
78
/// - `self`: The `Decoder` instance used to decode the byte sequence.
79
/// - `input`: The byte sequence to be decoded.
80
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
81
///
82
/// # Returns
83
///
84
/// A `String` representing the decoded content from the input byte sequence.
85
///
86
/// # Errors
87
///
88
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
89
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
90
///
91
/// # Examples
92
///
93
/// ```moonbit
94
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
95
/// let decoder = @encoding.decoder(UTF8)
96
/// inspect!(decoder.decode!(inputs[0], stream=true), content="abc")
97
/// inspect!(decoder.decode!(inputs[1], stream=true), content="")
98
/// inspect!(decoder.decode!(inputs[2], stream=false), content="🐰")
99
/// ```
100
pub fn decode!(
101
  self : Decoder,
102
  input : @bytes.View,
103
  stream~ : Bool = false
104
) -> String {
105
  if input.length() > 0 {
76✔
106
    self.i_cont(input)
70✔
107
  }
108
  if self.i_rem() == 0 {
109
    return String::default()
6✔
110
  }
111

112
  // TODO: Estimate size_hint based on input and encoding more accurately
113
  let builder = StringBuilder::new(size_hint=input.length())
70✔
114

115
  // drive decoder to decode
116
  loop self.decode_() {
70✔
117
    Uchar(u) => {
278✔
118
      builder.write_char(u)
278✔
119
      continue self.decode_()
278✔
120
    }
121
    Malformed(bs) =>
122
      if stream && self.t_need > 0 {
4✔
123
        builder.to_string()
×
124
      } else {
125
        raise MalformedError(bs)
4✔
126
      }
127
    End => builder.to_string()
42✔
128
    Refill(t) =>
129
      if stream {
24✔
130
        builder.to_string()
24✔
131
      } else {
132
        raise TruncatedError(t)
×
133
      }
134
  }
135
}
136

137
///|
138
/// Decodes the given byte sequence using the specified decoder and writes the
139
/// result directly to a StringBuilder.
140
/// Similar to `decode!`, but writes the result to an existing StringBuilder
141
/// instead of creating a new String.
142
///
143
/// Parameters:
144
///
145
/// * `decoder` : The decoder instance used to decode the byte sequence.
146
/// * `input` : The byte sequence to be decoded.
147
/// * `output` : The StringBuilder where the decoded content will be written to.
148
///
149
/// Throws a `MalformedError` when the byte sequence is not properly formatted
150
/// according to the specified encoding.
151
///
152
/// Example:
153
///
154
/// ```moonbit
155
/// test "decode_to" {
156
///   let decoder = decoder(UTF8)
157
///   let buf = StringBuilder::new()
158
///   decoder.decode_to!(b"Hello", buf)
159
///   inspect!(buf.to_string(), content="Hello")
160
/// }
161
/// ```
162
pub fn Decoder::decode_to(
163
  self : Decoder,
164
  input : @bytes.View,
165
  output : StringBuilder,
166
  stream~ : Bool = false
167
) -> Unit! {
168
  if input.length() > 0 {
24✔
169
    self.i_cont(input)
22✔
170
  }
171
  if self.i_rem() == 0 {
172
    return
2✔
173
  }
174
  // drive decoder to decode
175
  loop self.decode_() {
22✔
176
    Uchar(u) => {
75✔
177
      output.write_char(u)
75✔
178
      continue self.decode_()
75✔
179
    }
180
    Malformed(bs) =>
181
      if stream && self.t_need > 0 {
2✔
182
        return
×
183
      } else {
184
        raise MalformedError(bs)
2✔
185
      }
186
    End => return
14✔
187
    Refill(t) => if stream { return } else { raise TruncatedError(t) }
1✔
188
  }
189
}
190

191
///|
192
pub fn decode_to(
193
  input : @bytes.View,
194
  output : StringBuilder,
195
  encoding~ : Encoding
196
) -> Unit! {
197
  decoder(encoding).decode_to!(input, output)
1✔
198
}
199

200
///|
201
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally.
202
///
203
/// This function calls `decode!` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
204
///
205
/// # Parameters
206
///
207
/// - `self`: The `Decoder` instance used to consume the byte sequence.
208
/// - `input`: The byte sequence to be consumed and decoded incrementally.
209
///
210
/// # Returns
211
///
212
/// A `String` representing the partially decoded content from the input byte sequence, as more bytes are expected.
213
///
214
/// # Errors
215
///
216
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
217
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
218
pub fn consume!(self : Decoder, input : @bytes.View) -> String {
219
  self.decode!(input, stream=true)
50✔
220
}
221

222
///|
223
/// Finalize the decoding process and return the remaining decoded string.
224
///
225
/// This function calls `decode!` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
226
/// and triggering the final decoding step to produce the remaining output.
227
///
228
/// # Parameters
229
///
230
/// - `self`: The `Decoder` instance used to finalize the decoding process.
231
///
232
/// # Returns
233
///
234
/// A `String` representing the final part of the decoded content, after all byte sequences have been processed.
235
///
236
/// # Errors
237
///
238
/// `MalformedError`: This error is raised if the remaining byte sequence is not properly formatted according to the specified encoding.
239
/// `TruncatedError`: This error is raised if the remaining byte sequence ends prematurely, implying that more data was expected for complete decoding.
240
pub fn finish!(self : Decoder) -> String {
241
  self.decode!(b"", stream=false)
6✔
242
}
243

244
///|
245
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
246
///
247
/// This function can work in streaming mode where bytes are consumed incrementally.
248
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
249
///
250
/// # Parameters
251
///
252
/// - `self`: The `Decoder` instance used to decode the byte sequence.
253
/// - `input`: The byte sequence to be decoded.
254
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
255
///
256
/// # Returns
257
///
258
/// A `String` representing the decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`).
259
pub fn decode_lossy(
260
  self : Decoder,
261
  input : @bytes.View,
262
  stream~ : Bool = false
263
) -> String {
264
  if input.length() > 0 {
6✔
265
    self.i_cont(input)
6✔
266
  }
267
  if self.i_rem() == 0 {
268
    return String::default()
×
269
  }
270

271
  // drive decoder to decode
272
  let chars = []
273
  loop self.decode_() {
6✔
274
    Uchar(u) => {
6✔
275
      chars.push(u)
6✔
276
      continue self.decode_()
6✔
277
    }
278
    Malformed(_) =>
279
      if stream && self.t_need > 0 {
9✔
280
        String::from_array(chars)
×
281
      } else {
282
        chars.push(U_REP)
9✔
283
        continue self.decode_()
9✔
284
      }
285
    End => String::from_array(chars)
6✔
286
    Refill(_) =>
287
      if stream {
3✔
288
        String::from_array(chars)
×
289
      } else {
290
        continue self.decode_()
3✔
291
      }
292
  }
293
}
294

295
///|
296
pub fn Decoder::decode_lossy_to(
297
  self : Decoder,
298
  input : @bytes.View,
299
  output : StringBuilder,
300
  stream~ : Bool = false
301
) -> Unit {
302
  if input.length() > 0 {
7✔
303
    self.i_cont(input)
6✔
304
  }
305
  if self.i_rem() == 0 {
306
    return
1✔
307
  }
308

309
  // drive decoder to decode
310
  loop self.decode_() {
6✔
311
    Uchar(u) => {
6✔
312
      output.write_char(u)
6✔
313
      continue self.decode_()
6✔
314
    }
315
    Malformed(_) =>
316
      if stream && self.t_need > 0 {
9✔
317
        return
×
318
      } else {
319
        output.write_char(U_REP)
9✔
320
        continue self.decode_()
9✔
321
      }
322
    End => return
6✔
323
    Refill(_) => if stream { return } else { continue self.decode_() }
3✔
324
  }
325
}
326

327
///|
328
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
329
///
330
/// This function calls `decode_lossy` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
331
///
332
/// # Parameters
333
///
334
/// - `self`: The `Decoder` instance used to consume and decode the byte sequence.
335
/// - `input`: The byte sequence to be consumed and decoded incrementally.
336
///
337
/// # Returns
338
///
339
/// A `String` representing the partially decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), as more bytes are expected.
340
pub fn lossy_consume(self : Decoder, input : @bytes.View) -> String {
341
  self.decode_lossy(input, stream=true)
×
342
}
343

344
///|
345
/// Finalize the lossy decoding process and return the remaining decoded string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
346
///
347
/// This function calls `decode_lossy` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
348
/// and triggering the final decoding step to produce the remaining output.
349
///
350
/// # Parameters
351
///
352
/// - `self`: The `Decoder` instance used to finalize the lossy decoding process.
353
///
354
/// # Returns
355
///
356
/// A `String` representing the final part of the decoded content, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), after all byte sequences have been processed.
357
pub fn lossy_finish(self : Decoder) -> String {
358
  self.decode_lossy(b"", stream=false)
×
359
}
360

361
///|
362
fn i_cont(self : Decoder, input : @bytes.View) -> Unit {
363
  // concat `input` to `i`, drop decoded `i`
364
  let i_rem = @math.maximum(self.i_rem(), 0)
104✔
365
  let new_len = i_rem + input.length()
104✔
366
  // init a new `i`
367
  let new_i = FixedArray::make(new_len, Byte::default())
104✔
368
  if i_rem > 0 {
369
    // copy the remainder of the old `i` into the new `i`
370
    self.i.blit_to(new_i, len=i_rem, src_offset=self.i_pos)
×
371
  }
372
  // copy all `input` into new `i`, starting at the remainder of the old `i`
373
  new_i.blit_from_bytesview(i_rem, input)
104✔
374
  self.i = new_i
375
  // reset position to starting position
376
  self.i_pos = 0
377
}
378

379
// Implementations
380

381
///|
382
fn decode_(self : Decoder) -> Decode {
383
  (self.k)(self)
493✔
384
}
385

386
///|
387
fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
388
  self.k = k
425✔
389
  v
390
}
391

392
///|
393
fn i_rem(self : Decoder) -> Int {
394
  self.i.length() - self.i_pos
778✔
395
}
396

397
///|
398
fn t_need(self : Decoder, need : Int) -> Unit {
399
  self.t_len = 0
36✔
400
  self.t_need = need
401
}
402

403
///|
404
fn eoi(self : Decoder) -> Unit {
405
  self.i = FixedArray::default()
36✔
406
}
407

408
///|
409
fn refill(self : Decoder, k : Cont) -> Decode {
410
  self.eoi()
36✔
411
  self.ret(k, Decode::Refill(Bytes::from_fixedarray(self.t)))
36✔
412
}
413

414
///|
415
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
416
  fn blit(decoder : Decoder, l : Int) -> Unit {
71✔
417
    decoder.i.blit_to(
65✔
418
      decoder.t,
419
      len=l,
420
      dst_offset=decoder.t_len,
421
      src_offset=decoder.i_pos,
422
    )
423
    decoder.i_pos += l
424
    decoder.t_len += l
425
  }
426

427
  let rem = decoder.i_rem()
71✔
428
  if rem < 0 { // eoi
429
    k(decoder)
6✔
430
  } else {
431
    let need = decoder.t_need - decoder.t_len
65✔
432
    if rem < need {
433
      blit(decoder, rem)
36✔
434
      decoder.refill(@tuple.curry(t_fill)(k))
36✔
435
    } else {
436
      blit(decoder, need)
29✔
437
      k(decoder)
29✔
438
    }
439
  }
440
}
441

442
// UTF8
443

444
///|
445
fn decode_utf_8(self : Decoder) -> Decode {
446
  let rem = self.i_rem()
123✔
447
  if rem <= 0 {
448
    Decode::End
19✔
449
  } else {
450
    let idx = self.i[self.i_pos].to_int()
104✔
451
    let need = utf_8_len[idx]
452
    if rem < need {
453
      self.t_need(need)
7✔
454
      t_fill(Decoder::t_decode_utf_8, self)
7✔
455
    } else {
456
      let j = self.i_pos
97✔
457
      if need == 0 {
458
        self.i_pos += 1
2✔
459
        self.ret(Decoder::decode_utf_8, malformed(self.i, j, 1))
2✔
460
      } else {
461
        self.i_pos += need
95✔
462
        self.ret(Decoder::decode_utf_8, r_utf_8(self.i, j, need))
95✔
463
      }
464
    }
465
  }
466
}
467

468
///|
469
fn t_decode_utf_8(self : Decoder) -> Decode {
470
  if self.t_len < self.t_need {
7✔
NEW
471
    self.ret(Decoder::decode_utf_8, malformed(self.t, 0, self.t_len))
×
472
  } else {
473
    self.ret(Decoder::decode_utf_8, r_utf_8(self.t, 0, self.t_len))
7✔
474
  }
475
}
476

477
///|
478
fn r_utf_8(bytes : FixedArray[Byte], offset : Int, length : Int) -> Decode {
479
  fn uchar(c : Int) {
102✔
480
    Uchar(Int::unsafe_to_char(c))
90✔
481
  }
482

483
  match length {
102✔
484
    1 => uchar(bytes[offset].to_int())
54✔
485
    2 => {
12✔
486
      let b0 = bytes[offset].to_int()
12✔
487
      let b1 = bytes[offset + 1].to_int()
12✔
488
      if (b1 >> 6) != 0b10 {
489
        malformed(bytes, offset, length)
8✔
490
      } else {
491
        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
4✔
492
      }
493
    }
494
    3 => {
16✔
495
      let b0 = bytes[offset].to_int()
16✔
496
      let b1 = bytes[offset + 1].to_int()
16✔
497
      let b2 = bytes[offset + 2].to_int()
16✔
498
      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
499
      if (b2 >> 6) != 0b10 {
500
        malformed(bytes, offset, length)
×
501
      } else {
502
        match b0 {
16✔
503
          0xE0 =>
504
            if b1 < 0xA0 || 0xBF < b1 {
2✔
505
              malformed(bytes, offset, length)
2✔
506
            } else {
507
              uchar(c)
×
508
            }
509
          0xED =>
510
            if b1 < 0x80 || 0x9F < b1 {
×
511
              malformed(bytes, offset, length)
×
512
            } else {
513
              uchar(c)
×
514
            }
515
          _ =>
516
            if (b1 >> 6) != 0b10 {
14✔
517
              malformed(bytes, offset, length)
×
518
            } else {
519
              uchar(c)
14✔
520
            }
521
        }
522
      }
523
    }
524
    4 => {
20✔
525
      let b0 = bytes[offset].to_int()
20✔
526
      let b1 = bytes[offset + 1].to_int()
20✔
527
      let b2 = bytes[offset + 2].to_int()
20✔
528
      let b3 = bytes[offset + 3].to_int()
20✔
529
      let c = ((b0 & 0x07) << 18) |
530
        ((b1 & 0x3F) << 12) |
531
        ((b2 & 0x3F) << 6) |
532
        (b3 & 0x3F)
533
      if (b3 >> 6) != 0b10 || (b2 >> 6) != 0b10 {
534
        malformed(bytes, offset, length)
2✔
535
      } else {
536
        match b0 {
18✔
537
          0xF0 =>
538
            if b1 < 0x90 || 0xBF < b1 {
18✔
539
              malformed(bytes, offset, length)
×
540
            } else {
541
              uchar(c)
18✔
542
            }
543
          0xF4 =>
544
            if b1 < 0x80 || 0x8F < b1 {
×
545
              malformed(bytes, offset, length)
×
546
            } else {
547
              uchar(c)
×
548
            }
549
          _ =>
550
            if (b1 >> 6) != 0b10 {
×
551
              malformed(bytes, offset, length)
×
552
            } else {
553
              uchar(c)
×
554
            }
555
        }
556
      }
557
    }
558
    _ => panic()
×
559
  }
560
}
561

562
// UTF16LE
563

564
///|
565
priv enum UTF16Decode {
566
  Hi(Int)
567
  UTF16Malformed(Bytes)
568
  UTF16Uchar(Char)
569
}
570

571
///|
572
fn decode_utf_16le(self : Decoder) -> Decode {
573
  let rem = self.i_rem()
196✔
574
  if rem <= 0 {
575
    Decode::End
31✔
576
  } else if rem < 2 {
165✔
577
    self.t_need(2)
10✔
578
    t_fill(Decoder::t_decode_utf_16le, self)
10✔
579
  } else {
580
    let j = self.i_pos
155✔
581
    self.i_pos += 2
582
    self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
155✔
583
  }
584
}
585

586
///|
587
fn t_decode_utf_16le(self : Decoder) -> Decode {
588
  if self.t_len < self.t_need {
10✔
NEW
589
    self.ret(Decoder::decode_utf_16le, malformed(self.t, 0, self.t_len))
×
590
  } else {
591
    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
10✔
592
  }
593
}
594

595
///|
596
fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
597
  match v {
165✔
598
    UTF16Uchar(u) => self.ret(Decoder::decode_utf_16le, Uchar(u))
146✔
NEW
599
    UTF16Malformed(s) => self.ret(Decoder::decode_utf_16le, Malformed(s))
×
600
    Hi(hi) => {
19✔
601
      let rem = self.i_rem()
19✔
602
      if rem < 2 {
603
        self.t_need(2)
10✔
604
        t_fill(@tuple.curry(t_decode_utf_16le_lo)(hi), self)
10✔
605
      } else {
606
        let j = self.i_pos
9✔
607
        let dcd = r_utf_16_lo(hi, self.i, j + 1, j)
9✔
608
        match dcd {
9✔
609
          Uchar(_) => self.i_pos += 2
8✔
610
          _ => ()
1✔
611
        }
612
        self.ret(Decoder::decode_utf_16le, dcd)
9✔
613
      }
614
    }
615
  }
616
}
617

618
///|
619
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
620
  if decoder.t_len < decoder.t_need {
9✔
621
    decoder.ret(
4✔
622
      Decoder::decode_utf_16le,
623
      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
4✔
624
    )
625
  } else {
626
    decoder.ret(Decoder::decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
5✔
627
  }
628
}
629

630
///|
631
fn r_utf_16_lo(
632
  hi : Int,
633
  bytes : FixedArray[Byte],
634
  offset0 : Int,
635
  offset1 : Int
636
) -> Decode {
637
  let b0 = bytes[offset0].to_int()
25✔
638
  let b1 = bytes[offset1].to_int()
25✔
639
  let lo = (b0 << 8) | b1
640
  if lo < 0xDC00 || lo > 0xDFFF {
641
    // NOTE(jinser): only hi malformed, skip lo if lo is illegal
642
    //
643
    // For example, b"\xD8\x00\x00\x48" (BE)
644
    // Since \xD8\x00 is *legal* hi, here will try to parse lo next,
645
    // however the whole \xD8\x00\x00\x48 is *illegal* so the result will be a `Malformed[b"\xD8\x00\x00\x48"]`
646
    //
647
    // But \x00\x48 itself is a *legal* UTF16 code point with a value of `H`,
648
    // the ideal result should be: `[Malformed(b"\xD8\x00"), Uchar('H')]`
649
    //
650
    // > printf '\xD8\x00\x00\x48' | uconv --from-code UTF16BE --to-code UTF8 --from-callback substitute
651
    // �H
652
    Malformed([bytes[offset0], bytes[offset1]])
4✔
653
  } else {
654
    Uchar(Int::unsafe_to_char(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
21✔
655
  }
656
}
657

658
///|
659
fn r_utf_16(
660
  bytes : FixedArray[Byte],
661
  offset0 : Int,
662
  offset1 : Int
663
) -> UTF16Decode {
664
  let b0 = bytes[offset0].to_int()
286✔
665
  let b1 = bytes[offset1].to_int()
286✔
666
  let u = (b0 << 8) | b1
667
  if u < 0xD800 || u > 0xDFFF {
668
    UTF16Uchar(Int::unsafe_to_char(u))
254✔
669
  } else if u > 0xDBFF {
32✔
670
    UTF16Malformed(slice(bytes, @math.minimum(offset0, offset1), 2))
×
671
  } else {
672
    Hi(u)
32✔
673
  }
674
}
675

676
// UTF16BE
677

678
///|
679
fn decode_utf_16be(self : Decoder) -> Decode {
680
  let rem = self.i_rem()
139✔
681
  if rem <= 0 {
682
    Decode::End
18✔
683
  } else if rem < 2 {
121✔
684
    self.t_need(2)
7✔
685
    t_fill(Decoder::t_decode_utf_16be, self)
7✔
686
  } else {
687
    let j = self.i_pos
114✔
688
    self.i_pos += 2
689
    self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
114✔
690
  }
691
}
692

693
///|
694
fn t_decode_utf_16be(self : Decoder) -> Decode {
695
  if self.t_len < self.t_need {
7✔
NEW
696
    self.ret(Decoder::decode_utf_16be, malformed(self.t, 0, self.t_len))
×
697
  } else {
698
    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
7✔
699
  }
700
}
701

702
///|
703
fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
704
  match decode {
121✔
705
    UTF16Uchar(x) => self.ret(Decoder::decode_utf_16be, Uchar(x))
108✔
NEW
706
    UTF16Malformed(x) => self.ret(Decoder::decode_utf_16be, Malformed(x))
×
707
    Hi(hi) => {
13✔
708
      let rem = self.i_rem()
13✔
709
      if rem < 2 {
710
        self.t_need(2)
2✔
711
        t_fill(@tuple.curry(t_decode_utf_16be_lo)(hi), self)
2✔
712
      } else {
713
        let j = self.i_pos
11✔
714
        let dcd = r_utf_16_lo(hi, self.i, j, j + 1)
11✔
715
        match dcd {
11✔
716
          Uchar(_) => self.i_pos += 2
8✔
717
          _ => ()
3✔
718
        }
719
        self.ret(Decoder::decode_utf_16be, dcd)
11✔
720
      }
721
    }
722
  }
723
}
724

725
///|
726
fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
727
  if self.t_len < self.t_need {
2✔
728
    self.ret(
2✔
729
      Decoder::decode_utf_16be,
730
      malformed_pair(true, hi, self.t, 0, self.t_len),
2✔
731
    )
732
  } else {
NEW
733
    self.ret(Decoder::decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
×
734
  }
735
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc