• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 673

27 Oct 2025 08:43AM UTC coverage: 90.323% (-0.004%) from 90.327%
673

Pull #199

github

web-flow
Merge 7f620090e into 1490c93c6
Pull Request #199: fix: lex_string flush bug ignores characters at start of input

1 of 1 new or added line in 1 file covered. (100.0%)

31 existing lines in 5 files now uncovered.

1988 of 2201 relevant lines covered (90.32%)

397.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.02
/encoding/decoding.mbt
1
// Copyright 2025 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///|
16
/// The Unicode Replacement Character, which is used to replace invalid or unrecognized sequences during lossy decoding.
17
/// https://unicode.org/charts/nameslist/n_FFF0.html
18
pub const U_REP = '\u{FFFD}'
19

20
///|
21
let utf_8_len = [
22
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
26
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
27
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30
  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
31
  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
]
33

34
///|
35
/// Create and return a `Decoder` for the specified character encoding.
36
///
37
/// The `Decoder` consumes byte sequences and decodes them into the original string format.
38
///
39
/// # Parameters
40
///
41
/// - `encoding`: The character encoding format to be used for decoding the input byte sequences.
42
///
43
/// # Returns
44
///
45
/// A `Decoder` instance that can be used to decode byte sequences into strings.
46
///
47
/// # Examples
48
///
49
/// ```moonbit
50
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
51
/// let decoder = decoder(UTF8)
52
/// inspect(decoder.consume(inputs[0]), content="abc")
53
/// inspect(decoder.consume(inputs[1]), content="")
54
/// inspect(decoder.consume(inputs[2]), content="🐰")
55
/// assert_true(decoder.finish().is_empty())
56
pub fn decoder(encoding : Encoding) -> Decoder {
57
  let i = FixedArray::default()
59✔
58
  let i_pos = 0
59
  let t = FixedArray::make(4, Byte::default())
59✔
60
  let t_len = 0
61
  let t_need = 0
62
  let k = match encoding {
59✔
63
    UTF8 => Decoder::decode_utf_8
23✔
64
    UTF16 => Decoder::decode_utf_16le
4✔
65
    UTF16LE => Decoder::decode_utf_16le
18✔
66
    UTF16BE => Decoder::decode_utf_16be
14✔
67
  }
68
  { i, i_pos, t, t_len, t_need, k }
69
}
70

71
///|
72
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string.
73
///
74
/// This function can work in streaming mode where bytes are consumed incrementally.
75
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
76
///
77
/// # Parameters
78
///
79
/// - `self`: The `Decoder` instance used to decode the byte sequence.
80
/// - `input`: The byte sequence to be decoded.
81
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
82
///
83
/// # Returns
84
///
85
/// A `String` representing the decoded content from the input byte sequence.
86
///
87
/// # Errors
88
///
89
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
90
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
91
///
92
/// # Examples
93
///
94
/// ```moonbit
95
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
96
/// let decoder = @encoding.decoder(UTF8)
97
/// inspect(decoder.decode(inputs[0], stream=true), content="abc")
98
/// inspect(decoder.decode(inputs[1], stream=true), content="")
99
/// inspect(decoder.decode(inputs[2], stream=false), content="🐰")
100
/// ```
101
pub fn Decoder::decode(
102
  self : Decoder,
103
  input : BytesView,
104
  stream? : Bool = false,
105
) -> String raise Error {
106
  if input.length() > 0 {
87✔
107
    self.i_cont(input)
79✔
108
  }
109
  if self.i_rem() == 0 {
110
    return String::default()
8✔
111
  }
112

113
  // TODO: Estimate size_hint based on input and encoding more accurately
114
  let builder = StringBuilder::new(size_hint=input.length())
79✔
115

116
  // drive decoder to decode
117
  loop self.decode_() {
79✔
118
    Uchar(u) => {
290✔
119
      builder.write_char(u)
290✔
120
      continue self.decode_()
290✔
121
    }
122
    Malformed(bs) =>
123
      if stream && self.t_need > 0 {
4✔
124
        builder.to_string()
×
125
      } else {
126
        raise MalformedError(bs)
4✔
127
      }
128
    End => builder.to_string()
48✔
129
    Refill(t) =>
130
      if stream {
27✔
131
        builder.to_string()
27✔
132
      } else {
133
        raise TruncatedError(t)
×
134
      }
135
  }
136
}
137

138
///|
139
/// Decodes the given byte sequence using the specified decoder and writes the
140
/// result directly to a StringBuilder.
141
/// Similar to `decode!`, but writes the result to an existing StringBuilder
142
/// instead of creating a new String.
143
///
144
/// Parameters:
145
///
146
/// * `decoder` : The decoder instance used to decode the byte sequence.
147
/// * `input` : The byte sequence to be decoded.
148
/// * `output` : The StringBuilder where the decoded content will be written to.
149
///
150
/// Throws a `MalformedError` when the byte sequence is not properly formatted
151
/// according to the specified encoding.
152
///
153
/// Example:
154
///
155
/// ```moonbit
156
///   let decoder = decoder(UTF8)
157
///   let buf = StringBuilder::new()
158
///   decoder.decode_to(b"Hello", buf)
159
///   inspect(buf.to_string(), content="Hello")
160
/// ```
161
pub fn Decoder::decode_to(
162
  self : Decoder,
163
  input : BytesView,
164
  output : StringBuilder,
165
  stream? : Bool = false,
166
) -> Unit raise {
167
  if input.length() > 0 {
25✔
168
    self.i_cont(input)
23✔
169
  }
170
  if self.i_rem() == 0 {
171
    return
2✔
172
  }
173
  // drive decoder to decode
174
  loop self.decode_() {
23✔
175
    Uchar(u) => {
80✔
176
      output.write_char(u)
80✔
177
      continue self.decode_()
80✔
178
    }
179
    Malformed(bs) =>
180
      if stream && self.t_need > 0 {
2✔
181
        return
×
182
      } else {
183
        raise MalformedError(bs)
2✔
184
      }
185
    End => return
15✔
186
    Refill(t) => if stream { return } else { raise TruncatedError(t) }
1✔
187
  }
188
}
189

190
///|
191
pub fn decode_to(
192
  input : BytesView,
193
  output : StringBuilder,
194
  encoding~ : Encoding,
195
) -> Unit raise {
196
  decoder(encoding).decode_to(input, output)
1✔
197
}
198

199
///|
200
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally.
201
///
202
/// This function calls `decode!` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
203
///
204
/// # Parameters
205
///
206
/// - `self`: The `Decoder` instance used to consume the byte sequence.
207
/// - `input`: The byte sequence to be consumed and decoded incrementally.
208
///
209
/// # Returns
210
///
211
/// A `String` representing the partially decoded content from the input byte sequence, as more bytes are expected.
212
///
213
/// # Errors
214
///
215
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
216
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
217
pub fn Decoder::consume(
218
  self : Decoder,
219
  input : BytesView,
220
) -> String raise Error {
221
  self.decode(input, stream=true)
56✔
222
}
223

224
///|
225
/// Finalize the decoding process and return the remaining decoded string.
226
///
227
/// This function calls `decode!` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
228
/// and triggering the final decoding step to produce the remaining output.
229
///
230
/// # Parameters
231
///
232
/// - `self`: The `Decoder` instance used to finalize the decoding process.
233
///
234
/// # Returns
235
///
236
/// A `String` representing the final part of the decoded content, after all byte sequences have been processed.
237
///
238
/// # Errors
239
///
240
/// `MalformedError`: This error is raised if the remaining byte sequence is not properly formatted according to the specified encoding.
241
/// `TruncatedError`: This error is raised if the remaining byte sequence ends prematurely, implying that more data was expected for complete decoding.
242
pub fn Decoder::finish(self : Decoder) -> String raise Error {
243
  self.decode(b"", stream=false)
8✔
244
}
245

246
///|
247
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
248
///
249
/// This function can work in streaming mode where bytes are consumed incrementally.
250
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
251
///
252
/// # Parameters
253
///
254
/// - `self`: The `Decoder` instance used to decode the byte sequence.
255
/// - `input`: The byte sequence to be decoded.
256
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
257
///
258
/// # Returns
259
///
260
/// A `String` representing the decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`).
261
pub fn Decoder::decode_lossy(
262
  self : Decoder,
263
  input : BytesView,
264
  stream? : Bool = false,
265
) -> String {
266
  if input.length() > 0 {
6✔
267
    self.i_cont(input)
6✔
268
  }
269
  if self.i_rem() == 0 {
UNCOV
270
    return String::default()
×
271
  }
272

273
  // drive decoder to decode
274
  let chars = []
275
  loop self.decode_() {
6✔
276
    Uchar(u) => {
6✔
277
      chars.push(u)
6✔
278
      continue self.decode_()
6✔
279
    }
280
    Malformed(_) =>
281
      if stream && self.t_need > 0 {
9✔
UNCOV
282
        String::from_array(chars)
×
283
      } else {
284
        chars.push(U_REP)
9✔
285
        continue self.decode_()
9✔
286
      }
287
    End => String::from_array(chars)
6✔
288
    Refill(_) =>
289
      if stream {
3✔
UNCOV
290
        String::from_array(chars)
×
291
      } else {
292
        continue self.decode_()
3✔
293
      }
294
  }
295
}
296

297
///|
298
pub fn Decoder::decode_lossy_to(
299
  self : Decoder,
300
  input : BytesView,
301
  output : StringBuilder,
302
  stream? : Bool = false,
303
) -> Unit {
304
  if input.length() > 0 {
7✔
305
    self.i_cont(input)
6✔
306
  }
307
  if self.i_rem() == 0 {
308
    return
1✔
309
  }
310

311
  // drive decoder to decode
312
  loop self.decode_() {
6✔
313
    Uchar(u) => {
6✔
314
      output.write_char(u)
6✔
315
      continue self.decode_()
6✔
316
    }
317
    Malformed(_) =>
318
      if stream && self.t_need > 0 {
9✔
UNCOV
319
        return
×
320
      } else {
321
        output.write_char(U_REP)
9✔
322
        continue self.decode_()
9✔
323
      }
324
    End => return
6✔
325
    Refill(_) => if stream { return } else { continue self.decode_() }
3✔
326
  }
327
}
328

329
///|
330
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
331
///
332
/// This function calls `decode_lossy` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
333
///
334
/// # Parameters
335
///
336
/// - `self`: The `Decoder` instance used to consume and decode the byte sequence.
337
/// - `input`: The byte sequence to be consumed and decoded incrementally.
338
///
339
/// # Returns
340
///
341
/// A `String` representing the partially decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), as more bytes are expected.
342
pub fn Decoder::lossy_consume(self : Decoder, input : BytesView) -> String {
UNCOV
343
  self.decode_lossy(input, stream=true)
×
344
}
345

346
///|
347
/// Finalize the lossy decoding process and return the remaining decoded string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
348
///
349
/// This function calls `decode_lossy` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
350
/// and triggering the final decoding step to produce the remaining output.
351
///
352
/// # Parameters
353
///
354
/// - `self`: The `Decoder` instance used to finalize the lossy decoding process.
355
///
356
/// # Returns
357
///
358
/// A `String` representing the final part of the decoded content, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), after all byte sequences have been processed.
359
pub fn Decoder::lossy_finish(self : Decoder) -> String {
UNCOV
360
  self.decode_lossy(b"", stream=false)
×
361
}
362

363
///|
364
fn Decoder::i_cont(self : Decoder, input : BytesView) -> Unit {
365
  // concat `input` to `i`, drop decoded `i`
366
  let i_rem = @cmp.maximum(self.i_rem(), 0)
114✔
367
  let new_len = i_rem + input.length()
114✔
368
  // init a new `i`
369
  let new_i = FixedArray::make(new_len, Byte::default())
114✔
370
  if i_rem > 0 {
371
    // copy the remainder of the old `i` into the new `i`
UNCOV
372
    self.i.blit_to(new_i, len=i_rem, src_offset=self.i_pos)
×
373
  }
374
  // copy all `input` into new `i`, starting at the remainder of the old `i`
375
  new_i.blit_from_bytesview(i_rem, input)
114✔
376
  self.i = new_i
377
  // reset position to starting position
378
  self.i_pos = 0
379
}
380

381
// Implementations
382

383
///|
384
fn Decoder::decode_(self : Decoder) -> Decode {
385
  (self.k)(self)
520✔
386
}
387

388
///|
389
fn Decoder::ret(self : Decoder, k : Cont, v : Decode) -> Decode {
390
  self.k = k
445✔
391
  v
392
}
393

394
///|
395
fn Decoder::i_rem(self : Decoder) -> Int {
396
  self.i.length() - self.i_pos
830✔
397
}
398

399
///|
400
fn Decoder::t_need(self : Decoder, need : Int) -> Unit {
401
  self.t_len = 0
39✔
402
  self.t_need = need
403
}
404

405
///|
406
fn Decoder::eoi(self : Decoder) -> Unit {
407
  self.i = FixedArray::default()
39✔
408
}
409

410
///|
411
fn Decoder::refill(self : Decoder, k : Cont) -> Decode {
412
  self.eoi()
39✔
413
  self.ret(k, Decode::Refill(Bytes::from_fixedarray(self.t)))
39✔
414
}
415

416
///|
417
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
418
  fn blit(decoder : Decoder, l : Int) -> Unit {
77✔
419
    decoder.i.blit_to(
71✔
420
      decoder.t,
421
      len=l,
422
      dst_offset=decoder.t_len,
423
      src_offset=decoder.i_pos,
424
    )
425
    decoder.i_pos += l
426
    decoder.t_len += l
427
  }
428

429
  let rem = decoder.i_rem()
77✔
430
  if rem < 0 { // eoi
431
    k(decoder)
6✔
432
  } else {
433
    let need = decoder.t_need - decoder.t_len
71✔
434
    if rem < need {
435
      blit(decoder, rem)
39✔
436
      decoder.refill(curry(t_fill)(k))
39✔
437
    } else {
438
      blit(decoder, need)
32✔
439
      k(decoder)
32✔
440
    }
441
  }
442
}
443

444
// UTF8
445

446
///|
447
fn Decoder::decode_utf_8(self : Decoder) -> Decode {
448
  let rem = self.i_rem()
147✔
449
  if rem <= 0 {
450
    Decode::End
26✔
451
  } else {
452
    let idx = self.i[self.i_pos].to_int()
121✔
453
    let need = utf_8_len[idx]
454
    if rem < need {
455
      self.t_need(need)
10✔
456
      t_fill(Decoder::t_decode_utf_8, self)
10✔
457
    } else {
458
      let j = self.i_pos
111✔
459
      if need == 0 {
460
        self.i_pos += 1
2✔
461
        self.ret(Decoder::decode_utf_8, malformed(self.i, j, 1))
2✔
462
      } else {
463
        self.i_pos += need
109✔
464
        self.ret(Decoder::decode_utf_8, r_utf_8(self.i, j, need))
109✔
465
      }
466
    }
467
  }
468
}
469

470
///|
471
fn Decoder::t_decode_utf_8(self : Decoder) -> Decode {
472
  if self.t_len < self.t_need {
10✔
UNCOV
473
    self.ret(Decoder::decode_utf_8, malformed(self.t, 0, self.t_len))
×
474
  } else {
475
    self.ret(Decoder::decode_utf_8, r_utf_8(self.t, 0, self.t_len))
10✔
476
  }
477
}
478

479
///|
480
fn r_utf_8(bytes : FixedArray[Byte], offset : Int, length : Int) -> Decode {
481
  fn uchar(c : Int) {
119✔
482
    Uchar(Int::unsafe_to_char(c))
107✔
483
  }
484

485
  match length {
119✔
486
    1 => uchar(bytes[offset].to_int())
68✔
487
    2 => {
12✔
488
      let b0 = bytes[offset].to_int()
12✔
489
      let b1 = bytes[offset + 1].to_int()
12✔
490
      if b1 >> 6 != 0b10 {
491
        malformed(bytes, offset, length)
8✔
492
      } else {
493
        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
4✔
494
      }
495
    }
496
    3 => {
16✔
497
      let b0 = bytes[offset].to_int()
16✔
498
      let b1 = bytes[offset + 1].to_int()
16✔
499
      let b2 = bytes[offset + 2].to_int()
16✔
500
      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
501
      if b2 >> 6 != 0b10 {
UNCOV
502
        malformed(bytes, offset, length)
×
503
      } else {
504
        match b0 {
16✔
505
          0xE0 =>
506
            if b1 < 0xA0 || 0xBF < b1 {
2✔
507
              malformed(bytes, offset, length)
2✔
508
            } else {
509
              uchar(c)
×
510
            }
511
          0xED =>
512
            if b1 < 0x80 || 0x9F < b1 {
×
UNCOV
513
              malformed(bytes, offset, length)
×
514
            } else {
UNCOV
515
              uchar(c)
×
516
            }
517
          _ =>
518
            if b1 >> 6 != 0b10 {
14✔
UNCOV
519
              malformed(bytes, offset, length)
×
520
            } else {
521
              uchar(c)
14✔
522
            }
523
        }
524
      }
525
    }
526
    4 => {
23✔
527
      let b0 = bytes[offset].to_int()
23✔
528
      let b1 = bytes[offset + 1].to_int()
23✔
529
      let b2 = bytes[offset + 2].to_int()
23✔
530
      let b3 = bytes[offset + 3].to_int()
23✔
531
      let c = ((b0 & 0x07) << 18) |
532
        ((b1 & 0x3F) << 12) |
533
        ((b2 & 0x3F) << 6) |
534
        (b3 & 0x3F)
535
      if b3 >> 6 != 0b10 || b2 >> 6 != 0b10 {
536
        malformed(bytes, offset, length)
2✔
537
      } else {
538
        match b0 {
21✔
539
          0xF0 =>
540
            if b1 < 0x90 || 0xBF < b1 {
21✔
UNCOV
541
              malformed(bytes, offset, length)
×
542
            } else {
543
              uchar(c)
21✔
544
            }
545
          0xF4 =>
546
            if b1 < 0x80 || 0x8F < b1 {
×
UNCOV
547
              malformed(bytes, offset, length)
×
548
            } else {
549
              uchar(c)
×
550
            }
551
          _ =>
552
            if b1 >> 6 != 0b10 {
×
UNCOV
553
              malformed(bytes, offset, length)
×
554
            } else {
UNCOV
555
              uchar(c)
×
556
            }
557
        }
558
      }
559
    }
UNCOV
560
    _ => panic()
×
561
  }
562
}
563

564
// UTF16LE
565

566
///|
567
priv enum UTF16Decode {
568
  Hi(Int)
569
  UTF16Malformed(Bytes)
570
  UTF16Uchar(Char)
571
}
572

573
///|
574
fn Decoder::decode_utf_16le(self : Decoder) -> Decode {
575
  let rem = self.i_rem()
196✔
576
  if rem <= 0 {
577
    Decode::End
31✔
578
  } else if rem < 2 {
165✔
579
    self.t_need(2)
10✔
580
    t_fill(Decoder::t_decode_utf_16le, self)
10✔
581
  } else {
582
    let j = self.i_pos
155✔
583
    self.i_pos += 2
584
    self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
155✔
585
  }
586
}
587

588
///|
589
fn Decoder::t_decode_utf_16le(self : Decoder) -> Decode {
590
  if self.t_len < self.t_need {
10✔
UNCOV
591
    self.ret(Decoder::decode_utf_16le, malformed(self.t, 0, self.t_len))
×
592
  } else {
593
    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
10✔
594
  }
595
}
596

597
///|
598
fn Decoder::decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
599
  match v {
165✔
600
    UTF16Uchar(u) => self.ret(Decoder::decode_utf_16le, Uchar(u))
146✔
UNCOV
601
    UTF16Malformed(s) => self.ret(Decoder::decode_utf_16le, Malformed(s))
×
602
    Hi(hi) => {
19✔
603
      let rem = self.i_rem()
19✔
604
      if rem < 2 {
605
        self.t_need(2)
10✔
606
        t_fill(curry(t_decode_utf_16le_lo)(hi), self)
10✔
607
      } else {
608
        let j = self.i_pos
9✔
609
        let dcd = r_utf_16_lo(hi, self.i, j + 1, j)
9✔
610
        match dcd {
9✔
611
          Uchar(_) => self.i_pos += 2
8✔
612
          _ => ()
1✔
613
        }
614
        self.ret(Decoder::decode_utf_16le, dcd)
9✔
615
      }
616
    }
617
  }
618
}
619

620
///|
621
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
622
  if decoder.t_len < decoder.t_need {
9✔
623
    decoder.ret(
4✔
624
      Decoder::decode_utf_16le,
625
      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
4✔
626
    )
627
  } else {
628
    decoder.ret(Decoder::decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
5✔
629
  }
630
}
631

632
///|
633
fn r_utf_16_lo(
634
  hi : Int,
635
  bytes : FixedArray[Byte],
636
  offset0 : Int,
637
  offset1 : Int,
638
) -> Decode {
639
  let b0 = bytes[offset0].to_int()
25✔
640
  let b1 = bytes[offset1].to_int()
25✔
641
  let lo = (b0 << 8) | b1
642
  if lo < 0xDC00 || lo > 0xDFFF {
643
    // NOTE(jinser): only hi malformed, skip lo if lo is illegal
644
    //
645
    // For example, b"\xD8\x00\x00\x48" (BE)
646
    // Since \xD8\x00 is *legal* hi, here will try to parse lo next,
647
    // however the whole \xD8\x00\x00\x48 is *illegal* so the result will be a `Malformed[b"\xD8\x00\x00\x48"]`
648
    //
649
    // But \x00\x48 itself is a *legal* UTF16 code point with a value of `H`,
650
    // the ideal result should be: `[Malformed(b"\xD8\x00"), Uchar('H')]`
651
    //
652
    // > printf '\xD8\x00\x00\x48' | uconv --from-code UTF16BE --to-code UTF8 --from-callback substitute
653
    // �H
654
    Malformed([bytes[offset0], bytes[offset1]])
4✔
655
  } else {
656
    Uchar(Int::unsafe_to_char(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
21✔
657
  }
658
}
659

660
///|
661
fn r_utf_16(
662
  bytes : FixedArray[Byte],
663
  offset0 : Int,
664
  offset1 : Int,
665
) -> UTF16Decode {
666
  let b0 = bytes[offset0].to_int()
286✔
667
  let b1 = bytes[offset1].to_int()
286✔
668
  let u = (b0 << 8) | b1
669
  if u < 0xD800 || u > 0xDFFF {
670
    UTF16Uchar(Int::unsafe_to_char(u))
254✔
671
  } else if u > 0xDBFF {
32✔
UNCOV
672
    UTF16Malformed(slice(bytes, @cmp.minimum(offset0, offset1), 2))
×
673
  } else {
674
    Hi(u)
32✔
675
  }
676
}
677

678
// UTF16BE
679

680
///|
681
fn Decoder::decode_utf_16be(self : Decoder) -> Decode {
682
  let rem = self.i_rem()
139✔
683
  if rem <= 0 {
684
    Decode::End
18✔
685
  } else if rem < 2 {
121✔
686
    self.t_need(2)
7✔
687
    t_fill(Decoder::t_decode_utf_16be, self)
7✔
688
  } else {
689
    let j = self.i_pos
114✔
690
    self.i_pos += 2
691
    self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
114✔
692
  }
693
}
694

695
///|
696
fn Decoder::t_decode_utf_16be(self : Decoder) -> Decode {
697
  if self.t_len < self.t_need {
7✔
UNCOV
698
    self.ret(Decoder::decode_utf_16be, malformed(self.t, 0, self.t_len))
×
699
  } else {
700
    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
7✔
701
  }
702
}
703

704
///|
705
fn Decoder::decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
706
  match decode {
121✔
707
    UTF16Uchar(x) => self.ret(Decoder::decode_utf_16be, Uchar(x))
108✔
UNCOV
708
    UTF16Malformed(x) => self.ret(Decoder::decode_utf_16be, Malformed(x))
×
709
    Hi(hi) => {
13✔
710
      let rem = self.i_rem()
13✔
711
      if rem < 2 {
712
        self.t_need(2)
2✔
713
        t_fill(curry(t_decode_utf_16be_lo)(hi), self)
2✔
714
      } else {
715
        let j = self.i_pos
11✔
716
        let dcd = r_utf_16_lo(hi, self.i, j, j + 1)
11✔
717
        match dcd {
11✔
718
          Uchar(_) => self.i_pos += 2
8✔
719
          _ => ()
3✔
720
        }
721
        self.ret(Decoder::decode_utf_16be, dcd)
11✔
722
      }
723
    }
724
  }
725
}
726

727
///|
728
fn[T, U, V] curry(f : (T, U) -> V) -> (T) -> (U) -> V {
729
  fn(x : T) { fn(y : U) -> V { f(x, y) } }
49✔
730
}
731

732
///|
733
fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
734
  if self.t_len < self.t_need {
2✔
735
    self.ret(
2✔
736
      Decoder::decode_utf_16be,
737
      malformed_pair(true, hi, self.t, 0, self.t_len),
2✔
738
    )
739
  } else {
UNCOV
740
    self.ret(Decoder::decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
×
741
  }
742
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc