• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 763

08 Apr 2026 06:55AM UTC coverage: 87.368%. Remained the same
763

Pull #232

github

web-flow
Merge 7ce24b553 into 890b53b6d
Pull Request #232: fix: normalize result in Path::resolve

0 of 2 new or added lines in 2 files covered. (0.0%)

101 existing lines in 9 files now uncovered.

2317 of 2652 relevant lines covered (87.37%)

345.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.31
/encoding/decoding.mbt
1
// Copyright 2025 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///|
16
/// The Unicode Replacement Character, which is used to replace invalid or unrecognized sequences during lossy decoding.
17
/// https://unicode.org/charts/nameslist/n_FFF0.html
18
pub const U_REP = '\u{FFFD}'
19

20
///|
21
let utf_8_len : ReadOnlyArray[Int] = [
22
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
26
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
27
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30
  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
31
  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
]
33

34
///|
35
/// Create and return a `Decoder` for the specified character encoding.
36
///
37
/// The `Decoder` consumes byte sequences and decodes them into the original string format.
38
///
39
/// # Parameters
40
///
41
/// - `encoding`: The character encoding format to be used for decoding the input byte sequences.
42
///
43
/// # Returns
44
///
45
/// A `Decoder` instance that can be used to decode byte sequences into strings.
46
///
47
/// # Examples
48
///
49
/// ```moonbit
50
/// let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
51
/// let decoder = decoder(UTF8)
52
/// inspect(decoder.consume(inputs[0]), content="abc")
53
/// inspect(decoder.consume(inputs[1]), content="")
54
/// inspect(decoder.consume(inputs[2]), content="🐰")
55
/// assert_true(decoder.finish().is_empty())
56
pub fn decoder(encoding : Encoding) -> Decoder {
57
  let i = FixedArray::default()
58✔
58
  let i_pos = 0
59
  let t = FixedArray::make(4, Byte::default())
58✔
60
  let t_len = 0
61
  let t_need = 0
62
  let k = match encoding {
58✔
63
    UTF8 => Decoder::decode_utf_8
22✔
64
    UTF16 => Decoder::decode_utf_16le
4✔
65
    UTF16LE => Decoder::decode_utf_16le
18✔
66
    UTF16BE => Decoder::decode_utf_16be
14✔
67
  }
68
  { i, i_pos, t, t_len, t_need, k }
69
}
70

71
///|
72
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string.
73
///
74
/// This function can work in streaming mode where bytes are consumed incrementally.
75
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
76
///
77
/// # Parameters
78
///
79
/// - `self`: The `Decoder` instance used to decode the byte sequence.
80
/// - `input`: The byte sequence to be decoded.
81
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
82
///
83
/// # Returns
84
///
85
/// A `String` representing the decoded content from the input byte sequence.
86
///
87
/// # Errors
88
///
89
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
90
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
91
///
92
/// # Examples
93
///
94
/// ```moonbit check
95
/// test {
96
///   let inputs = [b"abc", b"\xf0", b"\x9f\x90\xb0"] // UTF8(🐰) == <F09F 90B0>
97
///   let decoder = @encoding.decoder(UTF8)
98
///   inspect(decoder.decode(inputs[0], stream=true), content="abc")
99
///   inspect(decoder.decode(inputs[1], stream=true), content="")
100
///   inspect(decoder.decode(inputs[2], stream=false), content="🐰")
101
/// }
102
/// ```
103
pub fn Decoder::decode(
104
  self : Decoder,
105
  input : BytesView,
106
  stream? : Bool = false,
107
) -> String raise Error {
108
  if input.length() > 0 {
83✔
109
    self.i_cont(input)
76✔
110
  }
111
  if self.i_rem() == 0 {
83✔
112
    return String::default()
7✔
113
  }
114

115
  // TODO: Estimate size_hint based on input and encoding more accurately
116
  let builder = StringBuilder::new(size_hint=input.length())
76✔
117

118
  // drive decoder to decode
119
  loop self.decode_() {
76✔
120
    Uchar(u) => {
286✔
121
      builder.write_char(u)
286✔
122
      continue self.decode_()
286✔
123
    }
124
    Malformed(bs) =>
125
      if stream && self.t_need > 0 {
4✔
126
        builder.to_string()
×
127
      } else {
128
        raise MalformedError(bs)
4✔
129
      }
130
    End => builder.to_string()
46✔
131
    Refill(t) =>
132
      if stream {
26✔
133
        builder.to_string()
26✔
134
      } else {
135
        raise TruncatedError(t)
×
136
      }
137
  }
138
}
139

140
///|
141
/// Decodes the given byte sequence using the specified decoder and writes the
142
/// result directly to a StringBuilder.
143
/// Similar to `decode!`, but writes the result to an existing StringBuilder
144
/// instead of creating a new String.
145
///
146
/// Parameters:
147
///
148
/// * `decoder` : The decoder instance used to decode the byte sequence.
149
/// * `input` : The byte sequence to be decoded.
150
/// * `output` : The StringBuilder where the decoded content will be written to.
151
///
152
/// Throws a `MalformedError` when the byte sequence is not properly formatted
153
/// according to the specified encoding.
154
///
155
/// Example:
156
///
157
/// ```moonbit check
158
/// test {
159
///   let decoder = decoder(UTF8)
160
///   let buf = StringBuilder::new()
161
///   decoder.decode_to(b"Hello", buf)
162
///   inspect(buf.to_string(), content="Hello")
163
/// }
164
/// ```
165
pub fn Decoder::decode_to(
166
  self : Decoder,
167
  input : BytesView,
168
  output : StringBuilder,
169
  stream? : Bool = false,
170
) -> Unit raise {
171
  if input.length() > 0 {
25✔
172
    self.i_cont(input)
23✔
173
  }
174
  if self.i_rem() == 0 {
25✔
175
    return
2✔
176
  }
177
  // drive decoder to decode
178
  loop self.decode_() {
23✔
179
    Uchar(u) => {
80✔
180
      output.write_char(u)
80✔
181
      continue self.decode_()
80✔
182
    }
183
    Malformed(bs) =>
184
      if stream && self.t_need > 0 {
2✔
UNCOV
185
        return
×
186
      } else {
187
        raise MalformedError(bs)
2✔
188
      }
189
    End => return
15✔
190
    Refill(t) => if stream { return } else { raise TruncatedError(t) }
1✔
191
  }
192
}
193

194
///|
195
pub fn decode_to(
196
  input : BytesView,
197
  output : StringBuilder,
198
  encoding~ : Encoding,
199
) -> Unit raise {
200
  decoder(encoding).decode_to(input, output)
1✔
201
}
202

203
///|
204
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally.
205
///
206
/// This function calls `decode!` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
207
///
208
/// # Parameters
209
///
210
/// - `self`: The `Decoder` instance used to consume the byte sequence.
211
/// - `input`: The byte sequence to be consumed and decoded incrementally.
212
///
213
/// # Returns
214
///
215
/// A `String` representing the partially decoded content from the input byte sequence, as more bytes are expected.
216
///
217
/// # Errors
218
///
219
/// `MalformedError`: when the byte sequence is not properly formatted according to the specified encoding.
220
/// `TruncatedError`: when the byte sequence ends prematurely, implying that more data is expected for complete decoding.
221
pub fn Decoder::consume(
222
  self : Decoder,
223
  input : BytesView,
224
) -> String raise Error {
225
  self.decode(input, stream=true)
53✔
226
}
227

228
///|
229
/// Finalize the decoding process and return the remaining decoded string.
230
///
231
/// This function calls `decode!` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
232
/// and triggering the final decoding step to produce the remaining output.
233
///
234
/// # Parameters
235
///
236
/// - `self`: The `Decoder` instance used to finalize the decoding process.
237
///
238
/// # Returns
239
///
240
/// A `String` representing the final part of the decoded content, after all byte sequences have been processed.
241
///
242
/// # Errors
243
///
244
/// `MalformedError`: This error is raised if the remaining byte sequence is not properly formatted according to the specified encoding.
245
/// `TruncatedError`: This error is raised if the remaining byte sequence ends prematurely, implying that more data was expected for complete decoding.
246
pub fn Decoder::finish(self : Decoder) -> String raise Error {
247
  self.decode(b"", stream=false)
7✔
248
}
249

250
///|
251
/// Decode the given byte sequence using the specified `Decoder` and return the resulting string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
252
///
253
/// This function can work in streaming mode where bytes are consumed incrementally.
254
/// When `stream` is `false`, it indicates the end of the input and triggers the final decoding step.
255
///
256
/// # Parameters
257
///
258
/// - `self`: The `Decoder` instance used to decode the byte sequence.
259
/// - `input`: The byte sequence to be decoded.
260
/// - `stream~`: A boolean indicating whether more bytes will be supplied for decoding. It defaults to `false`.
261
///
262
/// # Returns
263
///
264
/// A `String` representing the decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`).
265
pub fn Decoder::decode_lossy(
266
  self : Decoder,
267
  input : BytesView,
268
  stream? : Bool = false,
269
) -> String {
270
  if input.length() > 0 {
6✔
271
    self.i_cont(input)
6✔
272
  }
273
  if self.i_rem() == 0 {
6✔
UNCOV
274
    return String::default()
×
275
  }
276

277
  // drive decoder to decode
278
  let chars = []
279
  loop self.decode_() {
6✔
280
    Uchar(u) => {
6✔
281
      chars.push(u)
6✔
282
      continue self.decode_()
6✔
283
    }
284
    Malformed(_) =>
285
      if stream && self.t_need > 0 {
9✔
286
        String::from_array(chars)
×
287
      } else {
288
        chars.push(U_REP)
9✔
289
        continue self.decode_()
9✔
290
      }
291
    End => String::from_array(chars)
6✔
292
    Refill(_) =>
293
      if stream {
3✔
294
        String::from_array(chars)
×
295
      } else {
296
        continue self.decode_()
3✔
297
      }
298
  }
299
}
300

301
///|
302
pub fn Decoder::decode_lossy_to(
303
  self : Decoder,
304
  input : BytesView,
305
  output : StringBuilder,
306
  stream? : Bool = false,
307
) -> Unit {
308
  if input.length() > 0 {
7✔
309
    self.i_cont(input)
6✔
310
  }
311
  if self.i_rem() == 0 {
7✔
312
    return
1✔
313
  }
314

315
  // drive decoder to decode
316
  loop self.decode_() {
6✔
317
    Uchar(u) => {
6✔
318
      output.write_char(u)
6✔
319
      continue self.decode_()
6✔
320
    }
321
    Malformed(_) =>
322
      if stream && self.t_need > 0 {
9✔
323
        return
×
324
      } else {
325
        output.write_char(U_REP)
9✔
326
        continue self.decode_()
9✔
327
      }
328
    End => return
6✔
329
    Refill(_) => if stream { return } else { continue self.decode_() }
3✔
330
  }
331
}
332

333
///|
334
/// Consume the given byte sequence using the specified `Decoder` and return the resulting string incrementally, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
335
///
336
/// This function calls `decode_lossy` with the `stream` parameter set to `true`, indicating that more bytes will follow for decoding.
337
///
338
/// # Parameters
339
///
340
/// - `self`: The `Decoder` instance used to consume and decode the byte sequence.
341
/// - `input`: The byte sequence to be consumed and decoded incrementally.
342
///
343
/// # Returns
344
///
345
/// A `String` representing the partially decoded content from the input byte sequence, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), as more bytes are expected.
346
pub fn Decoder::lossy_consume(self : Decoder, input : BytesView) -> String {
UNCOV
347
  self.decode_lossy(input, stream=true)
×
348
}
349

350
///|
351
/// Finalize the lossy decoding process and return the remaining decoded string, replacing any invalid sequences with the Unicode Replacement Character (`U+FFFD`).
352
///
353
/// This function calls `decode_lossy` with the `stream` parameter set to `false`, indicating that no more bytes will be supplied
354
/// and triggering the final decoding step to produce the remaining output.
355
///
356
/// # Parameters
357
///
358
/// - `self`: The `Decoder` instance used to finalize the lossy decoding process.
359
///
360
/// # Returns
361
///
362
/// A `String` representing the final part of the decoded content, with any invalid sequences replaced by the Unicode Replacement Character (`U+FFFD`), after all byte sequences have been processed.
363
pub fn Decoder::lossy_finish(self : Decoder) -> String {
UNCOV
364
  self.decode_lossy(b"", stream=false)
×
365
}
366

367
///|
368
fn Decoder::i_cont(self : Decoder, input : BytesView) -> Unit {
369
  // concat `input` to `i`, drop decoded `i`
370
  let i_rem = @cmp.maximum(self.i_rem(), 0)
111✔
371
  let new_len = i_rem + input.length()
111✔
372
  // init a new `i`
373
  let new_i = FixedArray::make(new_len, Byte::default())
111✔
374
  if i_rem > 0 {
375
    // copy the remainder of the old `i` into the new `i`
UNCOV
376
    self.i.blit_to(new_i, len=i_rem, src_offset=self.i_pos)
×
377
  }
378
  // copy all `input` into new `i`, starting at the remainder of the old `i`
379
  new_i.blit_from_bytesview(i_rem, input)
111✔
380
  self.i = new_i
381
  // reset position to starting position
382
  self.i_pos = 0
383
}
384

385
// Implementations
386

387
///|
388
fn Decoder::decode_(self : Decoder) -> Decode {
389
  (self.k)(self)
513✔
390
}
391

392
///|
393
fn Decoder::ret(self : Decoder, k : Cont, v : Decode) -> Decode {
394
  self.k = k
440✔
395
  v
396
}
397

398
///|
399
fn Decoder::i_rem(self : Decoder) -> Int {
400
  self.i.length() - self.i_pos
815✔
401
}
402

403
///|
404
fn Decoder::t_need(self : Decoder, need : Int) -> Unit {
405
  self.t_len = 0
38✔
406
  self.t_need = need
407
}
408

409
///|
410
fn Decoder::eoi(self : Decoder) -> Unit {
411
  self.i = FixedArray::default()
38✔
412
}
413

414
///|
415
fn Decoder::refill(self : Decoder, k : Cont) -> Decode {
416
  self.eoi()
38✔
417
  self.ret(k, Decode::Refill(Bytes::from_array(self.t)))
38✔
418
}
419

420
///|
421
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
422
  fn blit(decoder : Decoder, l : Int) -> Unit {
75✔
423
    decoder.i.blit_to(
69✔
424
      decoder.t,
425
      len=l,
426
      dst_offset=decoder.t_len,
427
      src_offset=decoder.i_pos,
428
    )
429
    decoder.i_pos += l
430
    decoder.t_len += l
431
  }
432

433
  let rem = decoder.i_rem()
75✔
434
  if rem < 0 { // eoi
435
    k(decoder)
6✔
436
  } else {
437
    let need = decoder.t_need - decoder.t_len
69✔
438
    if rem < need {
439
      blit(decoder, rem)
38✔
440
      decoder.refill(curry(t_fill)(k))
38✔
441
    } else {
442
      blit(decoder, need)
31✔
443
      k(decoder)
31✔
444
    }
445
  }
446
}
447

448
// UTF8
449

450
///|
451
fn Decoder::decode_utf_8(self : Decoder) -> Decode {
452
  let rem = self.i_rem()
141✔
453
  if rem <= 0 {
454
    Decode::End
24✔
455
  } else {
456
    let idx = self.i[self.i_pos].to_int()
117✔
457
    let need = utf_8_len[idx]
458
    if rem < need {
459
      self.t_need(need)
9✔
460
      t_fill(Decoder::t_decode_utf_8, self)
9✔
461
    } else {
462
      let j = self.i_pos
108✔
463
      if need == 0 {
464
        self.i_pos += 1
2✔
465
        self.ret(Decoder::decode_utf_8, malformed(self.i, j, 1))
2✔
466
      } else {
467
        self.i_pos += need
106✔
468
        self.ret(Decoder::decode_utf_8, r_utf_8(self.i, j, need))
106✔
469
      }
470
    }
471
  }
472
}
473

474
///|
475
fn Decoder::t_decode_utf_8(self : Decoder) -> Decode {
476
  if self.t_len < self.t_need {
9✔
UNCOV
477
    self.ret(Decoder::decode_utf_8, malformed(self.t, 0, self.t_len))
×
478
  } else {
479
    self.ret(Decoder::decode_utf_8, r_utf_8(self.t, 0, self.t_len))
9✔
480
  }
481
}
482

483
///|
484
fn r_utf_8(bytes : FixedArray[Byte], offset : Int, length : Int) -> Decode {
485
  fn uchar(c : Int) {
115✔
486
    Uchar(Int::unsafe_to_char(c))
103✔
487
  }
488

489
  match length {
115✔
490
    1 => uchar(bytes[offset].to_int())
65✔
491
    2 => {
12✔
492
      let b0 = bytes[offset].to_int()
12✔
493
      let b1 = bytes[offset + 1].to_int()
12✔
494
      if b1 >> 6 != 0b10 {
495
        malformed(bytes, offset, length)
8✔
496
      } else {
497
        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
4✔
498
      }
499
    }
500
    3 => {
16✔
501
      let b0 = bytes[offset].to_int()
16✔
502
      let b1 = bytes[offset + 1].to_int()
16✔
503
      let b2 = bytes[offset + 2].to_int()
16✔
504
      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
505
      if b2 >> 6 != 0b10 {
UNCOV
506
        malformed(bytes, offset, length)
×
507
      } else {
508
        match b0 {
16✔
509
          0xE0 =>
510
            if b1 < 0xA0 || 0xBF < b1 {
2✔
511
              malformed(bytes, offset, length)
2✔
512
            } else {
UNCOV
513
              uchar(c)
×
514
            }
515
          0xED =>
UNCOV
516
            if b1 < 0x80 || 0x9F < b1 {
×
UNCOV
517
              malformed(bytes, offset, length)
×
518
            } else {
UNCOV
519
              uchar(c)
×
520
            }
521
          _ =>
522
            if b1 >> 6 != 0b10 {
14✔
UNCOV
523
              malformed(bytes, offset, length)
×
524
            } else {
525
              uchar(c)
14✔
526
            }
527
        }
528
      }
529
    }
530
    4 => {
22✔
531
      let b0 = bytes[offset].to_int()
22✔
532
      let b1 = bytes[offset + 1].to_int()
22✔
533
      let b2 = bytes[offset + 2].to_int()
22✔
534
      let b3 = bytes[offset + 3].to_int()
22✔
535
      let c = ((b0 & 0x07) << 18) |
536
        ((b1 & 0x3F) << 12) |
537
        ((b2 & 0x3F) << 6) |
538
        (b3 & 0x3F)
539
      if b3 >> 6 != 0b10 || b2 >> 6 != 0b10 {
540
        malformed(bytes, offset, length)
2✔
541
      } else {
542
        match b0 {
20✔
543
          0xF0 =>
544
            if b1 < 0x90 || 0xBF < b1 {
20✔
UNCOV
545
              malformed(bytes, offset, length)
×
546
            } else {
547
              uchar(c)
20✔
548
            }
549
          0xF4 =>
UNCOV
550
            if b1 < 0x80 || 0x8F < b1 {
×
UNCOV
551
              malformed(bytes, offset, length)
×
552
            } else {
553
              uchar(c)
×
554
            }
555
          _ =>
UNCOV
556
            if b1 >> 6 != 0b10 {
×
UNCOV
557
              malformed(bytes, offset, length)
×
558
            } else {
559
              uchar(c)
×
560
            }
561
        }
562
      }
563
    }
564
    _ => panic()
×
565
  }
566
}
567

568
// UTF16LE
569

570
///|
571
priv enum UTF16Decode {
572
  Hi(Int)
573
  UTF16Malformed(Bytes)
574
  UTF16Uchar(Char)
575
}
576

577
///|
578
fn Decoder::decode_utf_16le(self : Decoder) -> Decode {
579
  let rem = self.i_rem()
196✔
580
  if rem <= 0 {
581
    Decode::End
31✔
582
  } else if rem < 2 {
165✔
583
    self.t_need(2)
10✔
584
    t_fill(Decoder::t_decode_utf_16le, self)
10✔
585
  } else {
586
    let j = self.i_pos
155✔
587
    self.i_pos += 2
588
    self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
155✔
589
  }
590
}
591

592
///|
593
fn Decoder::t_decode_utf_16le(self : Decoder) -> Decode {
594
  if self.t_len < self.t_need {
10✔
UNCOV
595
    self.ret(Decoder::decode_utf_16le, malformed(self.t, 0, self.t_len))
×
596
  } else {
597
    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
10✔
598
  }
599
}
600

601
///|
602
fn Decoder::decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
603
  match v {
165✔
604
    UTF16Uchar(u) => self.ret(Decoder::decode_utf_16le, Uchar(u))
146✔
UNCOV
605
    UTF16Malformed(s) => self.ret(Decoder::decode_utf_16le, Malformed(s))
×
606
    Hi(hi) => {
19✔
607
      let rem = self.i_rem()
19✔
608
      if rem < 2 {
609
        self.t_need(2)
10✔
610
        t_fill(curry(t_decode_utf_16le_lo)(hi), self)
10✔
611
      } else {
612
        let j = self.i_pos
9✔
613
        let dcd = r_utf_16_lo(hi, self.i, j + 1, j)
9✔
614
        match dcd {
9✔
615
          Uchar(_) => self.i_pos += 2
8✔
616
          _ => ()
1✔
617
        }
618
        self.ret(Decoder::decode_utf_16le, dcd)
9✔
619
      }
620
    }
621
  }
622
}
623

624
///|
625
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
626
  if decoder.t_len < decoder.t_need {
9✔
627
    decoder.ret(
4✔
628
      Decoder::decode_utf_16le,
629
      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
4✔
630
    )
631
  } else {
632
    decoder.ret(Decoder::decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
5✔
633
  }
634
}
635

636
///|
637
fn r_utf_16_lo(
638
  hi : Int,
639
  bytes : FixedArray[Byte],
640
  offset0 : Int,
641
  offset1 : Int,
642
) -> Decode {
643
  let b0 = bytes[offset0].to_int()
25✔
644
  let b1 = bytes[offset1].to_int()
25✔
645
  let lo = (b0 << 8) | b1
646
  if lo < 0xDC00 || lo > 0xDFFF {
647
    // NOTE(jinser): only hi malformed, skip lo if lo is illegal
648
    //
649
    // For example, b"\xD8\x00\x00\x48" (BE)
650
    // Since \xD8\x00 is *legal* hi, here will try to parse lo next,
651
    // however the whole \xD8\x00\x00\x48 is *illegal* so the result will be a `Malformed[b"\xD8\x00\x00\x48"]`
652
    //
653
    // But \x00\x48 itself is a *legal* UTF16 code point with a value of `H`,
654
    // the ideal result should be: `[Malformed(b"\xD8\x00"), Uchar('H')]`
655
    //
656
    // > printf '\xD8\x00\x00\x48' | uconv --from-code UTF16BE --to-code UTF8 --from-callback substitute
657
    // �H
658
    Malformed([bytes[offset0], bytes[offset1]])
4✔
659
  } else {
660
    Uchar(Int::unsafe_to_char(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
21✔
661
  }
662
}
663

664
///|
665
fn r_utf_16(
666
  bytes : FixedArray[Byte],
667
  offset0 : Int,
668
  offset1 : Int,
669
) -> UTF16Decode {
670
  let b0 = bytes[offset0].to_int()
286✔
671
  let b1 = bytes[offset1].to_int()
286✔
672
  let u = (b0 << 8) | b1
673
  if u < 0xD800 || u > 0xDFFF {
674
    UTF16Uchar(Int::unsafe_to_char(u))
254✔
675
  } else if u > 0xDBFF {
32✔
UNCOV
676
    UTF16Malformed(slice(bytes, @cmp.minimum(offset0, offset1), 2))
×
677
  } else {
678
    Hi(u)
32✔
679
  }
680
}
681

682
// UTF16BE
683

684
///|
685
fn Decoder::decode_utf_16be(self : Decoder) -> Decode {
686
  let rem = self.i_rem()
139✔
687
  if rem <= 0 {
688
    Decode::End
18✔
689
  } else if rem < 2 {
121✔
690
    self.t_need(2)
7✔
691
    t_fill(Decoder::t_decode_utf_16be, self)
7✔
692
  } else {
693
    let j = self.i_pos
114✔
694
    self.i_pos += 2
695
    self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
114✔
696
  }
697
}
698

699
///|
700
fn Decoder::t_decode_utf_16be(self : Decoder) -> Decode {
701
  if self.t_len < self.t_need {
7✔
UNCOV
702
    self.ret(Decoder::decode_utf_16be, malformed(self.t, 0, self.t_len))
×
703
  } else {
704
    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
7✔
705
  }
706
}
707

708
///|
709
fn Decoder::decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
710
  match decode {
121✔
711
    UTF16Uchar(x) => self.ret(Decoder::decode_utf_16be, Uchar(x))
108✔
UNCOV
712
    UTF16Malformed(x) => self.ret(Decoder::decode_utf_16be, Malformed(x))
×
713
    Hi(hi) => {
13✔
714
      let rem = self.i_rem()
13✔
715
      if rem < 2 {
716
        self.t_need(2)
2✔
717
        t_fill(curry(t_decode_utf_16be_lo)(hi), self)
2✔
718
      } else {
719
        let j = self.i_pos
11✔
720
        let dcd = r_utf_16_lo(hi, self.i, j, j + 1)
11✔
721
        match dcd {
11✔
722
          Uchar(_) => self.i_pos += 2
8✔
723
          _ => ()
3✔
724
        }
725
        self.ret(Decoder::decode_utf_16be, dcd)
11✔
726
      }
727
    }
728
  }
729
}
730

731
///|
732
fn[T, U, V] curry(f : (T, U) -> V) -> (T) -> (U) -> V {
733
  fn(x : T) { fn(y : U) -> V { f(x, y) } }
48✔
734
}
735

736
///|
737
fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
738
  if self.t_len < self.t_need {
2✔
739
    self.ret(
2✔
740
      Decoder::decode_utf_16be,
741
      malformed_pair(true, hi, self.t, 0, self.t_len),
2✔
742
    )
743
  } else {
UNCOV
744
    self.ret(Decoder::decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
×
745
  }
746
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc