• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 608

04 Sep 2025 06:43AM UTC coverage: 90.99%. Remained the same
608

push

github

peter-jerry-ye
chore: update changelog and bump version

1939 of 2131 relevant lines covered (90.99%)

411.64 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.02
/encoding/decoding.mbt
1
// Copyright 2025 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///|
16
/// The Unicode Replacement Character, which is used to replace invalid or unrecognized sequences during lossy decoding.
17
/// https://unicode.org/charts/nameslist/n_FFF0.html
18
pub const U_REP = '\u{FFFD}'
19

20
///|
21
let utf_8_len = [
22
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
26
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
27
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30
  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
31
  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
]
33

34
///|
35
/// Create and return a `Decoder` for the specified character encoding.
36
///
37
/// The `Decoder` consumes byte sequences and decodes them into the original string format.
38
///
39
/// # Parameters
40
///
41
/// - `encoding`: The character encoding format to be used for decoding the input byte sequences.
42
///
43
/// # Returns
44
///
45
/// A `Decoder` instance that can be used to decode byte sequences into strings.
46
///
47
/// # Examples
48
///
49
/// ```moonbit
50
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
51
/// let decoder = decoder(UTF8)
52
/// inspect(decoder.consume(inputs[0]), content="abc")
53
/// inspect(decoder.consume(inputs[1]), content="")
54
/// inspect(decoder.consume(inputs[2]), content="🐰")
55
/// assert_true(decoder.finish().is_empty())
56
pub fn decoder(encoding : Encoding) -> Decoder {
57
  let i = FixedArray::default()
59✔
58
  let i_pos = 0
59
  let t = FixedArray::make(4, Byte::default())
59✔
60
  let t_len = 0
61
  let t_need = 0
62
  let k = match encoding {
59✔
63
    UTF8 => Decoder::decode_utf_8
23✔
64
    UTF16 => Decoder::decode_utf_16le
4✔
65
    UTF16LE => Decoder::decode_utf_16le
18✔
66
    UTF16BE => Decoder::decode_utf_16be
14✔
67
  }
68
  { i, i_pos, t, t_len, t_need, k }
69
}
70

71
///|
72
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string.
73
///
74
/// This function can work in streaming mode where bytes are consumed incrementally.
75
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
76
///
77
/// # Parameters
78
///
79
/// - `self`: The `Decoder` instance used to decode the byte sequence.
80
/// - `input`: The byte sequence to be decoded.
81
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
82
///
83
/// # Returns
84
///
85
/// A `String` representing the decoded content from the input byte sequence.
86
///
87
/// # Errors
88
///
89
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
90
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
91
///
92
/// # Examples
93
///
94
/// ```moonbit
95
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
96
/// let decoder = @encoding.decoder(UTF8)
97
/// inspect(decoder.decode(inputs[0], stream=true), content="abc")
98
/// inspect(decoder.decode(inputs[1], stream=true), content="")
99
/// inspect(decoder.decode(inputs[2], stream=false), content="🐰")
100
/// ```
101
pub fn decode(
102
  self : Decoder,
103
  input : @bytes.View,
104
  stream? : Bool = false,
105
) -> String raise Error {
106
  if input.length() > 0 {
87✔
107
    self.i_cont(input)
79✔
108
  }
109
  if self.i_rem() == 0 {
110
    return String::default()
8✔
111
  }
112

113
  // TODO: Estimate size_hint based on input and encoding more accurately
114
  let builder = StringBuilder::new(size_hint=input.length())
79✔
115

116
  // drive decoder to decode
117
  loop self.decode_() {
79✔
118
    Uchar(u) => {
290✔
119
      builder.write_char(u)
290✔
120
      continue self.decode_()
290✔
121
    }
122
    Malformed(bs) =>
123
      if stream && self.t_need > 0 {
4✔
124
        builder.to_string()
×
125
      } else {
126
        raise MalformedError(bs)
4✔
127
      }
128
    End => builder.to_string()
48✔
129
    Refill(t) =>
130
      if stream {
27✔
131
        builder.to_string()
27✔
132
      } else {
133
        raise TruncatedError(t)
×
134
      }
135
  }
136
}
137

138
///|
139
/// Decodes the given byte sequence using the specified decoder and writes the
140
/// result directly to a StringBuilder.
141
/// Similar to `decode!`, but writes the result to an existing StringBuilder
142
/// instead of creating a new String.
143
///
144
/// Parameters:
145
///
146
/// * `decoder` : The decoder instance used to decode the byte sequence.
147
/// * `input` : The byte sequence to be decoded.
148
/// * `output` : The StringBuilder where the decoded content will be written to.
149
///
150
/// Throws a `MalformedError` when the byte sequence is not properly formatted
151
/// according to the specified encoding.
152
///
153
/// Example:
154
///
155
/// ```moonbit
156
///   let decoder = decoder(UTF8)
157
///   let buf = StringBuilder::new()
158
///   decoder.decode_to(b"Hello", buf)
159
///   inspect(buf.to_string(), content="Hello")
160
/// ```
161
pub fn Decoder::decode_to(
162
  self : Decoder,
163
  input : @bytes.View,
164
  output : StringBuilder,
165
  stream? : Bool = false,
166
) -> Unit raise {
167
  if input.length() > 0 {
25✔
168
    self.i_cont(input)
23✔
169
  }
170
  if self.i_rem() == 0 {
171
    return
2✔
172
  }
173
  // drive decoder to decode
174
  loop self.decode_() {
23✔
175
    Uchar(u) => {
80✔
176
      output.write_char(u)
80✔
177
      continue self.decode_()
80✔
178
    }
179
    Malformed(bs) =>
180
      if stream && self.t_need > 0 {
2✔
181
        return
×
182
      } else {
183
        raise MalformedError(bs)
2✔
184
      }
185
    End => return
15✔
186
    Refill(t) => if stream { return } else { raise TruncatedError(t) }
1✔
187
  }
188
}
189

190
///|
191
pub fn decode_to(
192
  input : @bytes.View,
193
  output : StringBuilder,
194
  encoding~ : Encoding,
195
) -> Unit raise {
196
  decoder(encoding).decode_to(input, output)
1✔
197
}
198

199
///|
200
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally.
201
///
202
/// This function calls `decode!` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
203
///
204
/// # Parameters
205
///
206
/// - `self`: The `Decoder` instance used to consume the byte sequence.
207
/// - `input`: The byte sequence to be consumed and decoded incrementally.
208
///
209
/// # Returns
210
///
211
/// A `String` representing the partially decoded content from the input byte sequence, as more bytes are expected.
212
///
213
/// # Errors
214
///
215
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
216
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
217
pub fn consume(self : Decoder, input : @bytes.View) -> String raise Error {
218
  self.decode(input, stream=true)
56✔
219
}
220

221
///|
222
/// Finalize the decoding process and return the remaining decoded string.
223
///
224
/// This function calls `decode!` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
225
/// and triggering the final decoding step to produce the remaining output.
226
///
227
/// # Parameters
228
///
229
/// - `self`: The `Decoder` instance used to finalize the decoding process.
230
///
231
/// # Returns
232
///
233
/// A `String` representing the final part of the decoded content, after all byte sequences have been processed.
234
///
235
/// # Errors
236
///
237
/// `MalformedError`: This error is raised if the remaining byte sequence is not properly formatted according to the specified encoding.
238
/// `TruncatedError`: This error is raised if the remaining byte sequence ends prematurely, implying that more data was expected for complete decoding.
239
pub fn finish(self : Decoder) -> String raise Error {
240
  self.decode(b"", stream=false)
8✔
241
}
242

243
///|
244
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
245
///
246
/// This function can work in streaming mode where bytes are consumed incrementally.
247
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
248
///
249
/// # Parameters
250
///
251
/// - `self`: The `Decoder` instance used to decode the byte sequence.
252
/// - `input`: The byte sequence to be decoded.
253
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
254
///
255
/// # Returns
256
///
257
/// A `String` representing the decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`).
258
pub fn decode_lossy(
259
  self : Decoder,
260
  input : @bytes.View,
261
  stream? : Bool = false,
262
) -> String {
263
  if input.length() > 0 {
6✔
264
    self.i_cont(input)
6✔
265
  }
266
  if self.i_rem() == 0 {
267
    return String::default()
×
268
  }
269

270
  // drive decoder to decode
271
  let chars = []
272
  loop self.decode_() {
6✔
273
    Uchar(u) => {
6✔
274
      chars.push(u)
6✔
275
      continue self.decode_()
6✔
276
    }
277
    Malformed(_) =>
278
      if stream && self.t_need > 0 {
9✔
279
        String::from_array(chars)
×
280
      } else {
281
        chars.push(U_REP)
9✔
282
        continue self.decode_()
9✔
283
      }
284
    End => String::from_array(chars)
6✔
285
    Refill(_) =>
286
      if stream {
3✔
287
        String::from_array(chars)
×
288
      } else {
289
        continue self.decode_()
3✔
290
      }
291
  }
292
}
293

294
///|
295
pub fn Decoder::decode_lossy_to(
296
  self : Decoder,
297
  input : @bytes.View,
298
  output : StringBuilder,
299
  stream? : Bool = false,
300
) -> Unit {
301
  if input.length() > 0 {
7✔
302
    self.i_cont(input)
6✔
303
  }
304
  if self.i_rem() == 0 {
305
    return
1✔
306
  }
307

308
  // drive decoder to decode
309
  loop self.decode_() {
6✔
310
    Uchar(u) => {
6✔
311
      output.write_char(u)
6✔
312
      continue self.decode_()
6✔
313
    }
314
    Malformed(_) =>
315
      if stream && self.t_need > 0 {
9✔
316
        return
×
317
      } else {
318
        output.write_char(U_REP)
9✔
319
        continue self.decode_()
9✔
320
      }
321
    End => return
6✔
322
    Refill(_) => if stream { return } else { continue self.decode_() }
3✔
323
  }
324
}
325

326
///|
327
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
328
///
329
/// This function calls `decode_lossy` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
330
///
331
/// # Parameters
332
///
333
/// - `self`: The `Decoder` instance used to consume and decode the byte sequence.
334
/// - `input`: The byte sequence to be consumed and decoded incrementally.
335
///
336
/// # Returns
337
///
338
/// A `String` representing the partially decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), as more bytes are expected.
339
pub fn lossy_consume(self : Decoder, input : @bytes.View) -> String {
340
  self.decode_lossy(input, stream=true)
×
341
}
342

343
///|
344
/// Finalize the lossy decoding process and return the remaining decoded string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
345
///
346
/// This function calls `decode_lossy` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
347
/// and triggering the final decoding step to produce the remaining output.
348
///
349
/// # Parameters
350
///
351
/// - `self`: The `Decoder` instance used to finalize the lossy decoding process.
352
///
353
/// # Returns
354
///
355
/// A `String` representing the final part of the decoded content, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), after all byte sequences have been processed.
356
pub fn lossy_finish(self : Decoder) -> String {
357
  self.decode_lossy(b"", stream=false)
×
358
}
359

360
///|
361
fn i_cont(self : Decoder, input : @bytes.View) -> Unit {
362
  // concat `input` to `i`, drop decoded `i`
363
  let i_rem = @cmp.maximum(self.i_rem(), 0)
114✔
364
  let new_len = i_rem + input.length()
114✔
365
  // init a new `i`
366
  let new_i = FixedArray::make(new_len, Byte::default())
114✔
367
  if i_rem > 0 {
368
    // copy the remainder of the old `i` into the new `i`
369
    self.i.blit_to(new_i, len=i_rem, src_offset=self.i_pos)
×
370
  }
371
  // copy all `input` into new `i`, starting at the remainder of the old `i`
372
  new_i.blit_from_bytesview(i_rem, input)
114✔
373
  self.i = new_i
374
  // reset position to starting position
375
  self.i_pos = 0
376
}
377

378
// Implementations
379

380
///|
381
fn decode_(self : Decoder) -> Decode {
382
  (self.k)(self)
520✔
383
}
384

385
///|
386
fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
387
  self.k = k
445✔
388
  v
389
}
390

391
///|
392
fn i_rem(self : Decoder) -> Int {
393
  self.i.length() - self.i_pos
830✔
394
}
395

396
///|
397
fn t_need(self : Decoder, need : Int) -> Unit {
398
  self.t_len = 0
39✔
399
  self.t_need = need
400
}
401

402
///|
403
fn eoi(self : Decoder) -> Unit {
404
  self.i = FixedArray::default()
39✔
405
}
406

407
///|
408
fn refill(self : Decoder, k : Cont) -> Decode {
409
  self.eoi()
39✔
410
  self.ret(k, Decode::Refill(Bytes::from_fixedarray(self.t)))
39✔
411
}
412

413
///|
414
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
415
  fn blit(decoder : Decoder, l : Int) -> Unit {
77✔
416
    decoder.i.blit_to(
71✔
417
      decoder.t,
418
      len=l,
419
      dst_offset=decoder.t_len,
420
      src_offset=decoder.i_pos,
421
    )
422
    decoder.i_pos += l
423
    decoder.t_len += l
424
  }
425

426
  let rem = decoder.i_rem()
77✔
427
  if rem < 0 { // eoi
428
    k(decoder)
6✔
429
  } else {
430
    let need = decoder.t_need - decoder.t_len
71✔
431
    if rem < need {
432
      blit(decoder, rem)
39✔
433
      decoder.refill(curry(t_fill)(k))
39✔
434
    } else {
435
      blit(decoder, need)
32✔
436
      k(decoder)
32✔
437
    }
438
  }
439
}
440

441
// UTF8
442

443
///|
444
fn decode_utf_8(self : Decoder) -> Decode {
445
  let rem = self.i_rem()
147✔
446
  if rem <= 0 {
447
    Decode::End
26✔
448
  } else {
449
    let idx = self.i[self.i_pos].to_int()
121✔
450
    let need = utf_8_len[idx]
451
    if rem < need {
452
      self.t_need(need)
10✔
453
      t_fill(Decoder::t_decode_utf_8, self)
10✔
454
    } else {
455
      let j = self.i_pos
111✔
456
      if need == 0 {
457
        self.i_pos += 1
2✔
458
        self.ret(Decoder::decode_utf_8, malformed(self.i, j, 1))
2✔
459
      } else {
460
        self.i_pos += need
109✔
461
        self.ret(Decoder::decode_utf_8, r_utf_8(self.i, j, need))
109✔
462
      }
463
    }
464
  }
465
}
466

467
///|
468
fn t_decode_utf_8(self : Decoder) -> Decode {
469
  if self.t_len < self.t_need {
10✔
470
    self.ret(Decoder::decode_utf_8, malformed(self.t, 0, self.t_len))
×
471
  } else {
472
    self.ret(Decoder::decode_utf_8, r_utf_8(self.t, 0, self.t_len))
10✔
473
  }
474
}
475

476
///|
477
fn r_utf_8(bytes : FixedArray[Byte], offset : Int, length : Int) -> Decode {
478
  fn uchar(c : Int) {
119✔
479
    Uchar(Int::unsafe_to_char(c))
107✔
480
  }
481

482
  match length {
119✔
483
    1 => uchar(bytes[offset].to_int())
68✔
484
    2 => {
12✔
485
      let b0 = bytes[offset].to_int()
12✔
486
      let b1 = bytes[offset + 1].to_int()
12✔
487
      if b1 >> 6 != 0b10 {
488
        malformed(bytes, offset, length)
8✔
489
      } else {
490
        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
4✔
491
      }
492
    }
493
    3 => {
16✔
494
      let b0 = bytes[offset].to_int()
16✔
495
      let b1 = bytes[offset + 1].to_int()
16✔
496
      let b2 = bytes[offset + 2].to_int()
16✔
497
      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
498
      if b2 >> 6 != 0b10 {
499
        malformed(bytes, offset, length)
×
500
      } else {
501
        match b0 {
16✔
502
          0xE0 =>
503
            if b1 < 0xA0 || 0xBF < b1 {
2✔
504
              malformed(bytes, offset, length)
2✔
505
            } else {
506
              uchar(c)
×
507
            }
508
          0xED =>
509
            if b1 < 0x80 || 0x9F < b1 {
×
510
              malformed(bytes, offset, length)
×
511
            } else {
512
              uchar(c)
×
513
            }
514
          _ =>
515
            if b1 >> 6 != 0b10 {
14✔
516
              malformed(bytes, offset, length)
×
517
            } else {
518
              uchar(c)
14✔
519
            }
520
        }
521
      }
522
    }
523
    4 => {
23✔
524
      let b0 = bytes[offset].to_int()
23✔
525
      let b1 = bytes[offset + 1].to_int()
23✔
526
      let b2 = bytes[offset + 2].to_int()
23✔
527
      let b3 = bytes[offset + 3].to_int()
23✔
528
      let c = ((b0 & 0x07) << 18) |
529
        ((b1 & 0x3F) << 12) |
530
        ((b2 & 0x3F) << 6) |
531
        (b3 & 0x3F)
532
      if b3 >> 6 != 0b10 || b2 >> 6 != 0b10 {
533
        malformed(bytes, offset, length)
2✔
534
      } else {
535
        match b0 {
21✔
536
          0xF0 =>
537
            if b1 < 0x90 || 0xBF < b1 {
21✔
538
              malformed(bytes, offset, length)
×
539
            } else {
540
              uchar(c)
21✔
541
            }
542
          0xF4 =>
543
            if b1 < 0x80 || 0x8F < b1 {
×
544
              malformed(bytes, offset, length)
×
545
            } else {
546
              uchar(c)
×
547
            }
548
          _ =>
549
            if b1 >> 6 != 0b10 {
×
550
              malformed(bytes, offset, length)
×
551
            } else {
552
              uchar(c)
×
553
            }
554
        }
555
      }
556
    }
557
    _ => panic()
×
558
  }
559
}
560

561
// UTF16LE
562

563
///|
564
priv enum UTF16Decode {
565
  Hi(Int)
566
  UTF16Malformed(Bytes)
567
  UTF16Uchar(Char)
568
}
569

570
///|
571
fn decode_utf_16le(self : Decoder) -> Decode {
572
  let rem = self.i_rem()
196✔
573
  if rem <= 0 {
574
    Decode::End
31✔
575
  } else if rem < 2 {
165✔
576
    self.t_need(2)
10✔
577
    t_fill(Decoder::t_decode_utf_16le, self)
10✔
578
  } else {
579
    let j = self.i_pos
155✔
580
    self.i_pos += 2
581
    self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
155✔
582
  }
583
}
584

585
///|
586
fn t_decode_utf_16le(self : Decoder) -> Decode {
587
  if self.t_len < self.t_need {
10✔
588
    self.ret(Decoder::decode_utf_16le, malformed(self.t, 0, self.t_len))
×
589
  } else {
590
    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
10✔
591
  }
592
}
593

594
///|
595
fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
596
  match v {
165✔
597
    UTF16Uchar(u) => self.ret(Decoder::decode_utf_16le, Uchar(u))
146✔
598
    UTF16Malformed(s) => self.ret(Decoder::decode_utf_16le, Malformed(s))
×
599
    Hi(hi) => {
19✔
600
      let rem = self.i_rem()
19✔
601
      if rem < 2 {
602
        self.t_need(2)
10✔
603
        t_fill(curry(t_decode_utf_16le_lo)(hi), self)
10✔
604
      } else {
605
        let j = self.i_pos
9✔
606
        let dcd = r_utf_16_lo(hi, self.i, j + 1, j)
9✔
607
        match dcd {
9✔
608
          Uchar(_) => self.i_pos += 2
8✔
609
          _ => ()
1✔
610
        }
611
        self.ret(Decoder::decode_utf_16le, dcd)
9✔
612
      }
613
    }
614
  }
615
}
616

617
///|
618
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
619
  if decoder.t_len < decoder.t_need {
9✔
620
    decoder.ret(
4✔
621
      Decoder::decode_utf_16le,
622
      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
4✔
623
    )
624
  } else {
625
    decoder.ret(Decoder::decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
5✔
626
  }
627
}
628

629
///|
630
fn r_utf_16_lo(
631
  hi : Int,
632
  bytes : FixedArray[Byte],
633
  offset0 : Int,
634
  offset1 : Int,
635
) -> Decode {
636
  let b0 = bytes[offset0].to_int()
25✔
637
  let b1 = bytes[offset1].to_int()
25✔
638
  let lo = (b0 << 8) | b1
639
  if lo < 0xDC00 || lo > 0xDFFF {
640
    // NOTE(jinser): only hi malformed, skip lo if lo is illegal
641
    //
642
    // For example, b"\xD8\x00\x00\x48" (BE)
643
    // Since \xD8\x00 is *legal* hi, here will try to parse lo next,
644
    // however the whole \xD8\x00\x00\x48 is *illegal* so the result will be a `Malformed[b"\xD8\x00\x00\x48"]`
645
    //
646
    // But \x00\x48 itself is a *legal* UTF16 code point with a value of `H`,
647
    // the ideal result should be: `[Malformed(b"\xD8\x00"), Uchar('H')]`
648
    //
649
    // > printf '\xD8\x00\x00\x48' | uconv --from-code UTF16BE --to-code UTF8 --from-callback substitute
650
    // �H
651
    Malformed([bytes[offset0], bytes[offset1]])
4✔
652
  } else {
653
    Uchar(Int::unsafe_to_char(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
21✔
654
  }
655
}
656

657
///|
658
fn r_utf_16(
659
  bytes : FixedArray[Byte],
660
  offset0 : Int,
661
  offset1 : Int,
662
) -> UTF16Decode {
663
  let b0 = bytes[offset0].to_int()
286✔
664
  let b1 = bytes[offset1].to_int()
286✔
665
  let u = (b0 << 8) | b1
666
  if u < 0xD800 || u > 0xDFFF {
667
    UTF16Uchar(Int::unsafe_to_char(u))
254✔
668
  } else if u > 0xDBFF {
32✔
669
    UTF16Malformed(slice(bytes, @cmp.minimum(offset0, offset1), 2))
×
670
  } else {
671
    Hi(u)
32✔
672
  }
673
}
674

675
// UTF16BE
676

677
///|
678
fn decode_utf_16be(self : Decoder) -> Decode {
679
  let rem = self.i_rem()
139✔
680
  if rem <= 0 {
681
    Decode::End
18✔
682
  } else if rem < 2 {
121✔
683
    self.t_need(2)
7✔
684
    t_fill(Decoder::t_decode_utf_16be, self)
7✔
685
  } else {
686
    let j = self.i_pos
114✔
687
    self.i_pos += 2
688
    self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
114✔
689
  }
690
}
691

692
///|
693
fn t_decode_utf_16be(self : Decoder) -> Decode {
694
  if self.t_len < self.t_need {
7✔
695
    self.ret(Decoder::decode_utf_16be, malformed(self.t, 0, self.t_len))
×
696
  } else {
697
    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
7✔
698
  }
699
}
700

701
///|
702
fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
703
  match decode {
121✔
704
    UTF16Uchar(x) => self.ret(Decoder::decode_utf_16be, Uchar(x))
108✔
705
    UTF16Malformed(x) => self.ret(Decoder::decode_utf_16be, Malformed(x))
×
706
    Hi(hi) => {
13✔
707
      let rem = self.i_rem()
13✔
708
      if rem < 2 {
709
        self.t_need(2)
2✔
710
        t_fill(curry(t_decode_utf_16be_lo)(hi), self)
2✔
711
      } else {
712
        let j = self.i_pos
11✔
713
        let dcd = r_utf_16_lo(hi, self.i, j, j + 1)
11✔
714
        match dcd {
11✔
715
          Uchar(_) => self.i_pos += 2
8✔
716
          _ => ()
3✔
717
        }
718
        self.ret(Decoder::decode_utf_16be, dcd)
11✔
719
      }
720
    }
721
  }
722
}
723

724
///|
725
fn[T, U, V] curry(f : (T, U) -> V) -> (T) -> (U) -> V {
726
  fn(x : T) { fn(y : U) -> V { f(x, y) } }
49✔
727
}
728

729
///|
730
fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
731
  if self.t_len < self.t_need {
2✔
732
    self.ret(
2✔
733
      Decoder::decode_utf_16be,
734
      malformed_pair(true, hi, self.t, 0, self.t_len),
2✔
735
    )
736
  } else {
737
    self.ret(Decoder::decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
×
738
  }
739
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc