• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 289

04 Dec 2024 02:48PM UTC coverage: 84.795% (-3.0%) from 87.841%
289

Pull #78

github

web-flow
Merge 03aaaf488 into d72df8024
Pull Request #78: feat: new package encoding

98 of 159 new or added lines in 3 files covered. (61.64%)

1160 of 1368 relevant lines covered (84.8%)

424.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.08
/encoding/encoding.mbt
1
// Copyright 2024 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///|
16
/// Encode a given string to the specified character encoding and returns the resulting bytes.
17
///
18
/// # Parameters
19
///
20
/// - `encoding` : The target encoding format.
21
/// - `src`: The input string to be encoded.
22
///
23
/// # Returns
24
///
25
/// A `bytes` representing the encoded string in the selected format.
26
///
27
/// # Examples
28
///
29
/// ```moonbit
30
/// let src = "Hello, World!"
31
/// let encoded_bytes = encode(UTF8, src)
32
/// ```
33
pub fn encode(encoding : Encoding, src : String) -> Bytes {
34
  // NOTE: special case: MoonBit String are already valid UTF16(LE) bytes
35
  match encoding {
4✔
36
    UTF16 | UTF16LE => return src.to_bytes()
2✔
37
    _ => ()
2✔
38
  }
39
  let bytes = src.to_bytes()
40
  let chars = decode_strict(UTF16LE, bytes)
41
  let new_buf = @buffer.T::new(size_hint=bytes.length())
42
  let write = match encoding {
43
    UTF8 => write_utf8_char
1✔
44
    UTF16BE => write_utf16be_char
1✔
NEW
45
    _ => abort("unreachable")
×
46
  }
47
  for char in chars {
48
    // SAFETY: Assume String are always valid UTF16LE
49
    write(new_buf, char.unwrap())
50
  }
51
  new_buf.to_bytes()
52
}
53

54
///|
55
/// Write a char into buffer as UTF8.
56
pub fn write_utf8_char(buf : @buffer.T, value : Char) -> Unit {
57
  let bytes = to_utf8_bytes(value)
33✔
58
  buf.write_bytes(bytes)
59
}
60

61
///|
62
/// Write a char into buffer as UTF16LE.
63
/// Alias for `write_utf16le_char`
64
pub fn write_utf16_char(buf : @buffer.T, value : Char) -> Unit {
NEW
65
  write_utf16le_char(buf, value)
×
66
}
67

68
///|
69
/// Write a char into buffer as UTF16LE.
70
pub fn write_utf16le_char(buf : @buffer.T, value : Char) -> Unit {
71
  let bytes = to_utf16le_bytes(value)
18✔
72
  buf.write_bytes(bytes)
73
}
74

75
///|
76
/// Write a char into buffer as UTF16BE.
77
pub fn write_utf16be_char(buf : @buffer.T, value : Char) -> Unit {
78
  let bytes = to_utf16be_bytes(value)
43✔
79
  buf.write_bytes(bytes)
80
}
81

82
///|
83
fn to_utf8_bytes(value : Char) -> Bytes {
84
  let code = value.to_uint()
33✔
85
  match code {
NEW
86
    _..<0x80 => {
×
87
      let b0 = ((code & 0x7F) | 0x00).to_byte()
88
      @bytes.of([b0])
89
    }
NEW
90
    _..<0x0800 => {
×
91
      let b0 = (((code >> 6) & 0x1F) | 0xC0).to_byte()
92
      let b1 = ((code & 0x3F) | 0x80).to_byte()
93
      @bytes.of([b0, b1])
94
    }
95
    _..<0x010000 => {
22✔
96
      let b0 = (((code >> 12) & 0x0F) | 0xE0).to_byte()
97
      let b1 = (((code >> 6) & 0x3F) | 0x80).to_byte()
98
      let b2 = ((code & 0x3F) | 0x80).to_byte()
99
      @bytes.of([b0, b1, b2])
100
    }
101
    _..<0x110000 => {
11✔
102
      let b0 = (((code >> 18) & 0x07) | 0xF0).to_byte()
103
      let b1 = (((code >> 12) & 0x3F) | 0x80).to_byte()
104
      let b2 = (((code >> 6) & 0x3F) | 0x80).to_byte()
105
      let b3 = ((code & 0x3F) | 0x80).to_byte()
106
      @bytes.of([b0, b1, b2, b3])
107
    }
NEW
108
    _ => abort("Char out of range")
×
109
  }
110
}
111

112
///|
113
fn to_utf16le_bytes(value : Char) -> Bytes {
114
  let code = value.to_uint()
18✔
115
  if code < 0x10000 {
116
    let b0 = (code & 0xFF).to_byte()
12✔
117
    let b1 = (code >> 8).to_byte()
118
    @bytes.of([b0, b1])
119
  } else if code < 0x110000 {
6✔
120
    let hi = code - 0x10000
6✔
121
    let lo = (hi >> 10) | 0xD800
122
    let hi = (hi & 0x3FF) | 0xDC00
123
    let b0 = (lo & 0xFF).to_byte()
124
    let b1 = (lo >> 8).to_byte()
125
    let b2 = (hi & 0xFF).to_byte()
126
    let b3 = (hi >> 8).to_byte()
127
    @bytes.of([b0, b1, b2, b3])
128
  } else {
NEW
129
    abort("Char out of range")
×
130
  }
131
}
132

133
///|
134
fn to_utf16be_bytes(value : Char) -> Bytes {
135
  let code = value.to_uint()
43✔
136
  if code < 0x10000 {
137
    let b0 = (code >> 0xFF).to_byte()
37✔
138
    let b1 = (code & 0xFF).to_byte()
139
    @bytes.of([b0, b1])
140
  } else if code < 0x110000 {
6✔
141
    let hi = code - 0x10000
6✔
142
    let lo = (hi >> 10) | 0xD800
143
    let hi = (hi & 0x3FF) | 0xDC00
144
    let b0 = (lo >> 8).to_byte()
145
    let b1 = (lo & 0xFF).to_byte()
146
    let b2 = (hi >> 8).to_byte()
147
    let b3 = (hi & 0xFF).to_byte()
148
    @bytes.of([b0, b1, b2, b3])
149
  } else {
NEW
150
    abort("Char out of range")
×
151
  }
152
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc