• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 766

08 Apr 2026 08:50AM UTC coverage: 87.633% (+0.3%) from 87.368%
766

Pull #232

github

web-flow
Merge 49d276d17 into 890b53b6d
Pull Request #232: fix: normalize result in Path::resolve

149 of 160 new or added lines in 20 files covered. (93.13%)

1 existing line in 1 file now uncovered.

2395 of 2733 relevant lines covered (87.63%)

335.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.58
/encoding/decoding.mbt
1
// Copyright 2025 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///|
16
/// The Unicode Replacement Character, which is used to replace invalid or unrecognized sequences during lossy decoding.
17
/// https://unicode.org/charts/nameslist/n_FFF0.html
18
pub const U_REP = '\u{FFFD}'
19

20
///|
21
let utf_8_len : ReadOnlyArray[Int] = [
22
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
26
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
27
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30
  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
31
  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
]
33

34
///|
35
/// Create and return a `Decoder` for the specified character encoding.
36
///
37
/// The `Decoder` consumes byte sequences and decodes them into the original string format.
38
///
39
/// # Parameters
40
///
41
/// - `encoding`: The character encoding format to be used for decoding the input byte sequences.
42
///
43
/// # Returns
44
///
45
/// A `Decoder` instance that can be used to decode byte sequences into strings.
46
///
47
/// # Examples
48
///
49
/// ```moonbit
50
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
51
/// let decoder = decoder(UTF8)
52
/// inspect(decoder.consume(inputs[0]), content="abc")
53
/// inspect(decoder.consume(inputs[1]), content="")
54
/// inspect(decoder.consume(inputs[2]), content="🐰")
55
/// assert_true(decoder.finish().is_empty())
56
pub fn decoder(encoding : Encoding) -> Decoder {
57
  let i = FixedArray::default()
58✔
58
  let i_pos = 0
59
  let t = FixedArray::make(4, Byte::default())
58✔
60
  let t_len = 0
61
  let t_need = 0
62
  let k = match encoding {
58✔
63
    UTF8 => Decoder::decode_utf_8
22✔
64
    UTF16 => Decoder::decode_utf_16le
4✔
65
    UTF16LE => Decoder::decode_utf_16le
18✔
66
    UTF16BE => Decoder::decode_utf_16be
14✔
67
  }
68
  { i, i_pos, t, t_len, t_need, k }
69
}
70

71
///|
72
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string.
73
///
74
/// This function can work in streaming mode where bytes are consumed incrementally.
75
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
76
///
77
/// # Parameters
78
///
79
/// - `self`: The `Decoder` instance used to decode the byte sequence.
80
/// - `input`: The byte sequence to be decoded.
81
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
82
///
83
/// # Returns
84
///
85
/// A `String` representing the decoded content from the input byte sequence.
86
///
87
/// # Errors
88
///
89
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
90
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
91
///
92
/// # Examples
93
///
94
/// ```moonbit check
95
/// test {
96
///   let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
97
///   let decoder = @encoding.decoder(UTF8)
98
///   inspect(decoder.decode(inputs[0], stream=true), content="abc")
99
///   inspect(decoder.decode(inputs[1], stream=true), content="")
100
///   inspect(decoder.decode(inputs[2], stream=false), content="🐰")
101
/// }
102
/// ```
103
pub fn Decoder::decode(
104
  self : Decoder,
105
  input : BytesView,
106
  stream? : Bool = false,
107
) -> String raise Error {
108
  if input.length() > 0 {
83✔
109
    self.i_cont(input)
76✔
110
  }
111
  if self.i_rem() == 0 {
83✔
112
    return String::default()
7✔
113
  }
114

115
  // TODO: Estimate size_hint based on input and encoding more accurately
116
  let builder = StringBuilder::new(size_hint=input.length())
76✔
117

118
  // drive decoder to decode
119
  for state = self.decode_() {
76✔
120
    match state {
362✔
121
      Uchar(u) => {
286✔
122
        builder.write_char(u)
286✔
123
        continue self.decode_()
286✔
124
      }
125
      Malformed(bs) =>
126
        if stream && self.t_need > 0 {
4✔
NEW
127
          break builder.to_string()
×
128
        } else {
129
          raise MalformedError(bs)
4✔
130
        }
131
      End => break builder.to_string()
46✔
132
      Refill(t) =>
133
        if stream {
26✔
134
          break builder.to_string()
26✔
135
        } else {
NEW
136
          raise TruncatedError(t)
×
137
        }
138
    }
139
  }
140
}
141

142
///|
143
/// Decodes the given byte sequence using the specified decoder and writes the
144
/// result directly to a StringBuilder.
145
/// Similar to `decode!`, but writes the result to an existing StringBuilder
146
/// instead of creating a new String.
147
///
148
/// Parameters:
149
///
150
/// * `decoder` : The decoder instance used to decode the byte sequence.
151
/// * `input` : The byte sequence to be decoded.
152
/// * `output` : The StringBuilder where the decoded content will be written to.
153
///
154
/// Throws a `MalformedError` when the byte sequence is not properly formatted
155
/// according to the specified encoding.
156
///
157
/// Example:
158
///
159
/// ```moonbit check
160
/// test {
161
///   let decoder = decoder(UTF8)
162
///   let buf = StringBuilder::new()
163
///   decoder.decode_to(b"Hello", buf)
164
///   inspect(buf.to_string(), content="Hello")
165
/// }
166
/// ```
167
pub fn Decoder::decode_to(
168
  self : Decoder,
169
  input : BytesView,
170
  output : StringBuilder,
171
  stream? : Bool = false,
172
) -> Unit raise {
173
  if input.length() > 0 {
25✔
174
    self.i_cont(input)
23✔
175
  }
176
  if self.i_rem() == 0 {
25✔
177
    return
2✔
178
  }
179
  // drive decoder to decode
180
  for state = self.decode_() {
23✔
181
    match state {
103✔
182
      Uchar(u) => {
80✔
183
        output.write_char(u)
80✔
184
        continue self.decode_()
80✔
185
      }
186
      Malformed(bs) =>
187
        if stream && self.t_need > 0 {
2✔
NEW
188
          break
×
189
        } else {
190
          raise MalformedError(bs)
2✔
191
        }
192
      End => break
15✔
193
      Refill(t) => if stream { break } else { raise TruncatedError(t) }
1✔
194
    }
195
  }
196
}
197

198
///|
199
pub fn decode_to(
200
  input : BytesView,
201
  output : StringBuilder,
202
  encoding~ : Encoding,
203
) -> Unit raise {
204
  decoder(encoding).decode_to(input, output)
1✔
205
}
206

207
///|
208
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally.
209
///
210
/// This function calls `decode!` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
211
///
212
/// # Parameters
213
///
214
/// - `self`: The `Decoder` instance used to consume the byte sequence.
215
/// - `input`: The byte sequence to be consumed and decoded incrementally.
216
///
217
/// # Returns
218
///
219
/// A `String` representing the partially decoded content from the input byte sequence, as more bytes are expected.
220
///
221
/// # Errors
222
///
223
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
224
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
225
pub fn Decoder::consume(
226
  self : Decoder,
227
  input : BytesView,
228
) -> String raise Error {
229
  self.decode(input, stream=true)
53✔
230
}
231

232
///|
233
/// Finalize the decoding process and return the remaining decoded string.
234
///
235
/// This function calls `decode!` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
236
/// and triggering the final decoding step to produce the remaining output.
237
///
238
/// # Parameters
239
///
240
/// - `self`: The `Decoder` instance used to finalize the decoding process.
241
///
242
/// # Returns
243
///
244
/// A `String` representing the final part of the decoded content, after all byte sequences have been processed.
245
///
246
/// # Errors
247
///
248
/// `MalformedError`: This error is raised if the remaining byte sequence is not properly formatted according to the specified encoding.
249
/// `TruncatedError`: This error is raised if the remaining byte sequence ends prematurely, implying that more data was expected for complete decoding.
250
pub fn Decoder::finish(self : Decoder) -> String raise Error {
251
  self.decode(b"", stream=false)
7✔
252
}
253

254
///|
255
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
256
///
257
/// This function can work in streaming mode where bytes are consumed incrementally.
258
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
259
///
260
/// # Parameters
261
///
262
/// - `self`: The `Decoder` instance used to decode the byte sequence.
263
/// - `input`: The byte sequence to be decoded.
264
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
265
///
266
/// # Returns
267
///
268
/// A `String` representing the decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`).
269
pub fn Decoder::decode_lossy(
270
  self : Decoder,
271
  input : BytesView,
272
  stream? : Bool = false,
273
) -> String {
274
  if input.length() > 0 {
6✔
275
    self.i_cont(input)
6✔
276
  }
277
  if self.i_rem() == 0 {
6✔
278
    return String::default()
×
279
  }
280

281
  // drive decoder to decode
282
  let chars = []
283
  for state = self.decode_() {
6✔
284
    match state {
24✔
285
      Uchar(u) => {
6✔
286
        chars.push(u)
6✔
287
        continue self.decode_()
6✔
288
      }
289
      Malformed(_) =>
290
        if stream && self.t_need > 0 {
9✔
NEW
291
          break String::from_array(chars)
×
292
        } else {
293
          chars.push(U_REP)
9✔
294
          continue self.decode_()
9✔
295
        }
296
      End => break String::from_array(chars)
6✔
297
      Refill(_) =>
298
        if stream {
3✔
NEW
299
          break String::from_array(chars)
×
300
        } else {
301
          continue self.decode_()
3✔
302
        }
303
    }
304
  }
305
}
306

307
///|
308
pub fn Decoder::decode_lossy_to(
309
  self : Decoder,
310
  input : BytesView,
311
  output : StringBuilder,
312
  stream? : Bool = false,
313
) -> Unit {
314
  if input.length() > 0 {
7✔
315
    self.i_cont(input)
6✔
316
  }
317
  if self.i_rem() == 0 {
7✔
318
    return
1✔
319
  }
320

321
  // drive decoder to decode
322
  for state = self.decode_() {
6✔
323
    match state {
24✔
324
      Uchar(u) => {
6✔
325
        output.write_char(u)
6✔
326
        continue self.decode_()
6✔
327
      }
328
      Malformed(_) =>
329
        if stream && self.t_need > 0 {
9✔
NEW
330
          break
×
331
        } else {
332
          output.write_char(U_REP)
9✔
333
          continue self.decode_()
9✔
334
        }
335
      End => break
6✔
336
      Refill(_) => if stream { break } else { continue self.decode_() }
3✔
337
    }
338
  }
339
}
340

341
///|
342
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
343
///
344
/// This function calls `decode_lossy` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
345
///
346
/// # Parameters
347
///
348
/// - `self`: The `Decoder` instance used to consume and decode the byte sequence.
349
/// - `input`: The byte sequence to be consumed and decoded incrementally.
350
///
351
/// # Returns
352
///
353
/// A `String` representing the partially decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), as more bytes are expected.
354
pub fn Decoder::lossy_consume(self : Decoder, input : BytesView) -> String {
355
  self.decode_lossy(input, stream=true)
×
356
}
357

358
///|
359
/// Finalize the lossy decoding process and return the remaining decoded string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
360
///
361
/// This function calls `decode_lossy` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
362
/// and triggering the final decoding step to produce the remaining output.
363
///
364
/// # Parameters
365
///
366
/// - `self`: The `Decoder` instance used to finalize the lossy decoding process.
367
///
368
/// # Returns
369
///
370
/// A `String` representing the final part of the decoded content, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), after all byte sequences have been processed.
371
pub fn Decoder::lossy_finish(self : Decoder) -> String {
372
  self.decode_lossy(b"", stream=false)
×
373
}
374

375
///|
376
fn Decoder::i_cont(self : Decoder, input : BytesView) -> Unit {
377
  // concat `input` to `i`, drop decoded `i`
378
  let i_rem = @cmp.maximum(self.i_rem(), 0)
111✔
379
  let new_len = i_rem + input.length()
111✔
380
  // init a new `i`
381
  let new_i = FixedArray::make(new_len, Byte::default())
111✔
382
  if i_rem > 0 {
383
    // copy the remainder of the old `i` into the new `i`
384
    self.i.blit_to(new_i, len=i_rem, src_offset=self.i_pos)
×
385
  }
386
  // copy all `input` into new `i`, starting at the remainder of the old `i`
387
  new_i.blit_from_bytesview(i_rem, input)
111✔
388
  self.i = new_i
389
  // reset position to starting position
390
  self.i_pos = 0
391
}
392

393
// Implementations
394

395
///|
396
fn Decoder::decode_(self : Decoder) -> Decode {
397
  (self.k)(self)
513✔
398
}
399

400
///|
401
fn Decoder::ret(self : Decoder, k : Cont, v : Decode) -> Decode {
402
  self.k = k
440✔
403
  v
404
}
405

406
///|
407
fn Decoder::i_rem(self : Decoder) -> Int {
408
  self.i.length() - self.i_pos
815✔
409
}
410

411
///|
412
fn Decoder::t_need(self : Decoder, need : Int) -> Unit {
413
  self.t_len = 0
38✔
414
  self.t_need = need
415
}
416

417
///|
418
fn Decoder::eoi(self : Decoder) -> Unit {
419
  self.i = FixedArray::default()
38✔
420
}
421

422
///|
423
fn Decoder::refill(self : Decoder, k : Cont) -> Decode {
424
  self.eoi()
38✔
425
  self.ret(k, Decode::Refill(Bytes::from_array(self.t)))
38✔
426
}
427

428
///|
429
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
430
  fn blit(decoder : Decoder, l : Int) -> Unit {
75✔
431
    decoder.i.blit_to(
69✔
432
      decoder.t,
433
      len=l,
434
      dst_offset=decoder.t_len,
435
      src_offset=decoder.i_pos,
436
    )
437
    decoder.i_pos += l
438
    decoder.t_len += l
439
  }
440

441
  let rem = decoder.i_rem()
75✔
442
  if rem < 0 { // eoi
443
    k(decoder)
6✔
444
  } else {
445
    let need = decoder.t_need - decoder.t_len
69✔
446
    if rem < need {
447
      blit(decoder, rem)
38✔
448
      decoder.refill(curry(t_fill)(k))
38✔
449
    } else {
450
      blit(decoder, need)
31✔
451
      k(decoder)
31✔
452
    }
453
  }
454
}
455

456
// UTF8
457

458
///|
459
fn Decoder::decode_utf_8(self : Decoder) -> Decode {
460
  let rem = self.i_rem()
141✔
461
  if rem <= 0 {
462
    Decode::End
24✔
463
  } else {
464
    let idx = self.i[self.i_pos].to_int()
117✔
465
    let need = utf_8_len[idx]
466
    if rem < need {
467
      self.t_need(need)
9✔
468
      t_fill(Decoder::t_decode_utf_8, self)
9✔
469
    } else {
470
      let j = self.i_pos
108✔
471
      if need == 0 {
472
        self.i_pos += 1
2✔
473
        self.ret(Decoder::decode_utf_8, malformed(self.i, j, 1))
2✔
474
      } else {
475
        self.i_pos += need
106✔
476
        self.ret(Decoder::decode_utf_8, r_utf_8(self.i, j, need))
106✔
477
      }
478
    }
479
  }
480
}
481

482
///|
483
fn Decoder::t_decode_utf_8(self : Decoder) -> Decode {
484
  if self.t_len < self.t_need {
9✔
485
    self.ret(Decoder::decode_utf_8, malformed(self.t, 0, self.t_len))
×
486
  } else {
487
    self.ret(Decoder::decode_utf_8, r_utf_8(self.t, 0, self.t_len))
9✔
488
  }
489
}
490

491
///|
492
fn r_utf_8(bytes : FixedArray[Byte], offset : Int, length : Int) -> Decode {
493
  fn uchar(c : Int) {
115✔
494
    Uchar(Int::unsafe_to_char(c))
103✔
495
  }
496

497
  match length {
115✔
498
    1 => uchar(bytes[offset].to_int())
65✔
499
    2 => {
12✔
500
      let b0 = bytes[offset].to_int()
12✔
501
      let b1 = bytes[offset + 1].to_int()
12✔
502
      if b1 >> 6 != 0b10 {
503
        malformed(bytes, offset, length)
8✔
504
      } else {
505
        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
4✔
506
      }
507
    }
508
    3 => {
16✔
509
      let b0 = bytes[offset].to_int()
16✔
510
      let b1 = bytes[offset + 1].to_int()
16✔
511
      let b2 = bytes[offset + 2].to_int()
16✔
512
      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
513
      if b2 >> 6 != 0b10 {
514
        malformed(bytes, offset, length)
×
515
      } else {
516
        match b0 {
16✔
517
          0xE0 =>
518
            if b1 < 0xA0 || 0xBF < b1 {
2✔
519
              malformed(bytes, offset, length)
2✔
520
            } else {
521
              uchar(c)
×
522
            }
523
          0xED =>
524
            if b1 < 0x80 || 0x9F < b1 {
×
525
              malformed(bytes, offset, length)
×
526
            } else {
527
              uchar(c)
×
528
            }
529
          _ =>
530
            if b1 >> 6 != 0b10 {
14✔
531
              malformed(bytes, offset, length)
×
532
            } else {
533
              uchar(c)
14✔
534
            }
535
        }
536
      }
537
    }
538
    4 => {
22✔
539
      let b0 = bytes[offset].to_int()
22✔
540
      let b1 = bytes[offset + 1].to_int()
22✔
541
      let b2 = bytes[offset + 2].to_int()
22✔
542
      let b3 = bytes[offset + 3].to_int()
22✔
543
      let c = ((b0 & 0x07) << 18) |
544
        ((b1 & 0x3F) << 12) |
545
        ((b2 & 0x3F) << 6) |
546
        (b3 & 0x3F)
547
      if b3 >> 6 != 0b10 || b2 >> 6 != 0b10 {
548
        malformed(bytes, offset, length)
2✔
549
      } else {
550
        match b0 {
20✔
551
          0xF0 =>
552
            if b1 < 0x90 || 0xBF < b1 {
20✔
553
              malformed(bytes, offset, length)
×
554
            } else {
555
              uchar(c)
20✔
556
            }
557
          0xF4 =>
558
            if b1 < 0x80 || 0x8F < b1 {
×
559
              malformed(bytes, offset, length)
×
560
            } else {
561
              uchar(c)
×
562
            }
563
          _ =>
564
            if b1 >> 6 != 0b10 {
×
565
              malformed(bytes, offset, length)
×
566
            } else {
567
              uchar(c)
×
568
            }
569
        }
570
      }
571
    }
572
    _ => panic()
×
573
  }
574
}
575

576
// UTF16LE
577

578
///|
579
priv enum UTF16Decode {
580
  Hi(Int)
581
  UTF16Malformed(Bytes)
582
  UTF16Uchar(Char)
583
}
584

585
///|
586
fn Decoder::decode_utf_16le(self : Decoder) -> Decode {
587
  let rem = self.i_rem()
196✔
588
  if rem <= 0 {
589
    Decode::End
31✔
590
  } else if rem < 2 {
165✔
591
    self.t_need(2)
10✔
592
    t_fill(Decoder::t_decode_utf_16le, self)
10✔
593
  } else {
594
    let j = self.i_pos
155✔
595
    self.i_pos += 2
596
    self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
155✔
597
  }
598
}
599

600
///|
601
fn Decoder::t_decode_utf_16le(self : Decoder) -> Decode {
602
  if self.t_len < self.t_need {
10✔
603
    self.ret(Decoder::decode_utf_16le, malformed(self.t, 0, self.t_len))
×
604
  } else {
605
    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
10✔
606
  }
607
}
608

609
///|
610
fn Decoder::decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
611
  match v {
165✔
612
    UTF16Uchar(u) => self.ret(Decoder::decode_utf_16le, Uchar(u))
146✔
613
    UTF16Malformed(s) => self.ret(Decoder::decode_utf_16le, Malformed(s))
×
614
    Hi(hi) => {
19✔
615
      let rem = self.i_rem()
19✔
616
      if rem < 2 {
617
        self.t_need(2)
10✔
618
        t_fill(curry(t_decode_utf_16le_lo)(hi), self)
10✔
619
      } else {
620
        let j = self.i_pos
9✔
621
        let dcd = r_utf_16_lo(hi, self.i, j + 1, j)
9✔
622
        match dcd {
9✔
623
          Uchar(_) => self.i_pos += 2
8✔
624
          _ => ()
1✔
625
        }
626
        self.ret(Decoder::decode_utf_16le, dcd)
9✔
627
      }
628
    }
629
  }
630
}
631

632
///|
633
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
634
  if decoder.t_len < decoder.t_need {
9✔
635
    decoder.ret(
4✔
636
      Decoder::decode_utf_16le,
637
      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
4✔
638
    )
639
  } else {
640
    decoder.ret(Decoder::decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
5✔
641
  }
642
}
643

644
///|
645
fn r_utf_16_lo(
646
  hi : Int,
647
  bytes : FixedArray[Byte],
648
  offset0 : Int,
649
  offset1 : Int,
650
) -> Decode {
651
  let b0 = bytes[offset0].to_int()
25✔
652
  let b1 = bytes[offset1].to_int()
25✔
653
  let lo = (b0 << 8) | b1
654
  if lo < 0xDC00 || lo > 0xDFFF {
655
    // NOTE(jinser): only hi malformed, skip lo if lo is illegal
656
    //
657
    // For example, b"\xD8\x00\x00\x48" (BE)
658
    // Since \xD8\x00 is *legal* hi, here will try to parse lo next,
659
    // however the whole \xD8\x00\x00\x48 is *illegal* so the result will be a `Malformed[b"\xD8\x00\x00\x48"]`
660
    //
661
    // But \x00\x48 itself is a *legal* UTF16 code point with a value of `H`,
662
    // the ideal result should be: `[Malformed(b"\xD8\x00"), Uchar('H')]`
663
    //
664
    // > printf '\xD8\x00\x00\x48' | uconv --from-code UTF16BE --to-code UTF8 --from-callback substitute
665
    // �H
666
    Malformed([bytes[offset0], bytes[offset1]])
4✔
667
  } else {
668
    Uchar(Int::unsafe_to_char(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
21✔
669
  }
670
}
671

672
///|
673
fn r_utf_16(
674
  bytes : FixedArray[Byte],
675
  offset0 : Int,
676
  offset1 : Int,
677
) -> UTF16Decode {
678
  let b0 = bytes[offset0].to_int()
286✔
679
  let b1 = bytes[offset1].to_int()
286✔
680
  let u = (b0 << 8) | b1
681
  if u < 0xD800 || u > 0xDFFF {
682
    UTF16Uchar(Int::unsafe_to_char(u))
254✔
683
  } else if u > 0xDBFF {
32✔
684
    UTF16Malformed(slice(bytes, @cmp.minimum(offset0, offset1), 2))
×
685
  } else {
686
    Hi(u)
32✔
687
  }
688
}
689

690
// UTF16BE
691

692
///|
693
fn Decoder::decode_utf_16be(self : Decoder) -> Decode {
694
  let rem = self.i_rem()
139✔
695
  if rem <= 0 {
696
    Decode::End
18✔
697
  } else if rem < 2 {
121✔
698
    self.t_need(2)
7✔
699
    t_fill(Decoder::t_decode_utf_16be, self)
7✔
700
  } else {
701
    let j = self.i_pos
114✔
702
    self.i_pos += 2
703
    self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
114✔
704
  }
705
}
706

707
///|
708
fn Decoder::t_decode_utf_16be(self : Decoder) -> Decode {
709
  if self.t_len < self.t_need {
7✔
710
    self.ret(Decoder::decode_utf_16be, malformed(self.t, 0, self.t_len))
×
711
  } else {
712
    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
7✔
713
  }
714
}
715

716
///|
717
fn Decoder::decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
718
  match decode {
121✔
719
    UTF16Uchar(x) => self.ret(Decoder::decode_utf_16be, Uchar(x))
108✔
720
    UTF16Malformed(x) => self.ret(Decoder::decode_utf_16be, Malformed(x))
×
721
    Hi(hi) => {
13✔
722
      let rem = self.i_rem()
13✔
723
      if rem < 2 {
724
        self.t_need(2)
2✔
725
        t_fill(curry(t_decode_utf_16be_lo)(hi), self)
2✔
726
      } else {
727
        let j = self.i_pos
11✔
728
        let dcd = r_utf_16_lo(hi, self.i, j, j + 1)
11✔
729
        match dcd {
11✔
730
          Uchar(_) => self.i_pos += 2
8✔
731
          _ => ()
3✔
732
        }
733
        self.ret(Decoder::decode_utf_16be, dcd)
11✔
734
      }
735
    }
736
  }
737
}
738

739
///|
740
fn[T, U, V] curry(f : (T, U) -> V) -> (T) -> (U) -> V {
741
  fn(x : T) { fn(y : U) -> V { f(x, y) } }
48✔
742
}
743

744
///|
745
fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
746
  if self.t_len < self.t_need {
2✔
747
    self.ret(
2✔
748
      Decoder::decode_utf_16be,
749
      malformed_pair(true, hi, self.t, 0, self.t_len),
2✔
750
    )
751
  } else {
752
    self.ret(Decoder::decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
×
753
  }
754
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc