• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonbitlang / x / 799

25 May 2026 03:36AM UTC coverage: 87.36% (-0.009%) from 87.369%
799

Pull #242

github

web-flow
Merge 435d448c3 into ac75a1b04
Pull Request #242: perf(encoding): UTF-8 encoder walks code units inline instead of for char in src

6 of 7 new or added lines in 1 file covered. (85.71%)

17 existing lines in 3 files now uncovered.

2336 of 2674 relevant lines covered (87.36%)

342.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/encoding/encoding.mbt
1
// Copyright 2025 International Digital Economy Academy
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
///|
16
/// Encode a given string to the specified character encoding and returns the resulting bytes.
17
///
18
/// # Parameters
19
///
20
/// - `encoding` : The target encoding format.
21
/// - `src`: The input string to be encoded.
22
///
23
/// # Returns
24
///
25
/// A `bytes` representing the encoded string in the selected format.
26
///
27
/// # Examples
28
///
29
/// ```moonbit check
30
/// test {
31
///   let src = "Hello, World!"
32
///   let encoded_bytes = encode(UTF8, src)
33
///   assert_eq(
34
///     encoded_bytes,
35
///     Bytes::from_array([
36
///       72, 101, 108, 108, 111, 44, 32, 87, 111, 114, 108, 100, 33,
37
///     ]),
38
///   )
39
/// }
40
/// ```
41
pub fn encode(encoding : Encoding, src : String) -> Bytes {
42
  // NOTE: special case: MoonBit String are already valid UTF16(LE) bytes
43
  match encoding {
50✔
44
    UTF16 | UTF16LE => {
44✔
45
      let arr = FixedArray::make(src.length() * 2, b'\x00')
44✔
46
      arr.blit_from_string(0, src, 0, src.length())
44✔
47
      return arr.unsafe_reinterpret_as_bytes()
44✔
48
    }
49
    _ => ()
6✔
50
  }
51
  let new_buf = Buffer(size_hint=src.length() * 4)
6✔
52
  let write = match encoding {
6✔
53
    UTF8 => write_utf8_char
4✔
54
    UTF16BE => write_utf16be_char
2✔
55
    _ => abort("unreachable")
×
56
  }
57
  // Walk by UTF-16 code unit and assemble surrogate pairs inline. This
58
  // avoids `for char in src`, which goes through String::iter +
59
  // Iter::next per char (~30% of self time on an ASCII-heavy 64 KiB
60
  // bench even though the inner body is small).
61
  let len = src.length()
6✔
62
  let mut i = 0
63
  while i < len {
64
    let cu = src.unsafe_get(i).to_int()
104✔
65
    i += 1
66
    let cp = if cu >= 0xD800 && cu <= 0xDBFF && i < len {
67
      let cu2 = src.unsafe_get(i).to_int()
8✔
68
      if cu2 >= 0xDC00 && cu2 <= 0xDFFF {
69
        i += 1
8✔
70
        (cu - 0xD800) * 0x400 + (cu2 - 0xDC00) + 0x10000
71
      } else {
NEW
72
        cu
×
73
      }
74
    } else {
75
      cu
96✔
76
    }
77
    // SAFETY: Assume String contains valid UTF-16
78
    write(new_buf, cp.unsafe_to_char())
104✔
79
  }
80
  new_buf.to_bytes()
6✔
81
}
82

83
///|
84
/// Encodes a string into the specified character encoding and writes the result
85
/// directly into a buffer.
86
///
87
/// Parameters:
88
///
89
/// * `string` : The input string to be encoded.
90
/// * `buffer` : The buffer where the encoded bytes will be written to.
91
/// * `encoding` : The target encoding format. Defaults to UTF8 if not specified.
92
///
93
/// Example:
94
///
95
/// ```moonbit check
96
/// test {
97
///   let buf = Buffer()
98
///   let text = "Hello, world"
99
///   @encoding.encode_to(text, buf, encoding=UTF16)
100
///   inspect(buf.to_string(), content="Hello, world")
101
/// }
102
/// ```
103
pub fn encode_to(
104
  src : String,
105
  buffer : @buffer.Buffer,
106
  encoding~ : Encoding,
107
) -> Unit {
108
  match encoding {
5✔
109
    UTF8 =>
110
      for char in src {
1✔
111
        write_utf8_char(buffer, char)
3✔
112
      }
113
    UTF16BE =>
114
      for char in src {
1✔
115
        write_utf16be_char(buffer, char)
25✔
116
      }
117
    UTF16 | UTF16LE => buffer.write_string_utf16le(src)
3✔
118
  }
119
}
120

121
///|
122
/// Converts a Char to UTF-8 bytes.
123
///
124
/// Note that this function incurs allocation overhead
125
/// due to the intermediate creation of new buffers.
126
pub fn to_utf8_bytes(value : Char) -> Bytes {
127
  let buf = Buffer(size_hint=4)
5✔
128
  write_utf8_char(buf, value)
5✔
129
  buf.to_bytes()
5✔
130
}
131

132
///|
133
/// Converts a Char to UTF-16LE bytes.
134
///
135
/// Alias for `to_utf16le_bytes`.
136
///
137
/// Note that this function incurs allocation overhead
138
/// due to the intermediate creation of new buffers.
139
pub fn to_utf16_bytes(value : Char) -> Bytes {
140
  to_utf16le_bytes(value)
4✔
141
}
142

143
///|
144
/// Converts a Char to UTF-16LE bytes.
145
///
146
/// Note that this function incurs allocation overhead
147
/// due to the intermediate creation of new buffers.
148
pub fn to_utf16le_bytes(value : Char) -> Bytes {
149
  let buf = Buffer(size_hint=4)
8✔
150
  write_utf16le_char(buf, value)
8✔
151
  buf.to_bytes()
8✔
152
}
153

154
///|
155
/// Converts a Char to UTF-16BE bytes.
156
///
157
/// Note that this function incurs allocation overhead
158
/// due to the intermediate creation of new buffers.
159
pub fn to_utf16be_bytes(value : Char) -> Bytes {
160
  let buf = Buffer(size_hint=4)
4✔
161
  write_utf16be_char(buf, value)
4✔
162
  buf.to_bytes()
4✔
163
}
164

165
///|
166
/// Write a char into buffer as UTF8.
167
pub fn write_utf8_char(buf : @buffer.Buffer, value : Char) -> Unit {
168
  let code = value.to_uint()
87✔
169
  match code {
87✔
170
    _..<0x80 => {
33✔
171
      let b0 = ((code & 0x7F) | 0x00).to_byte()
33✔
172
      buf.write_byte(b0)
33✔
173
    }
174
    _..<0x0800 => {
2✔
175
      let b0 = (((code >> 6) & 0x1F) | 0xC0).to_byte()
2✔
176
      let b1 = ((code & 0x3F) | 0x80).to_byte()
2✔
177
      buf.write_byte(b0)
2✔
178
      buf.write_byte(b1)
2✔
179
    }
180
    _..<0x010000 => {
31✔
181
      let b0 = (((code >> 12) & 0x0F) | 0xE0).to_byte()
31✔
182
      let b1 = (((code >> 6) & 0x3F) | 0x80).to_byte()
31✔
183
      let b2 = ((code & 0x3F) | 0x80).to_byte()
31✔
184
      buf.write_byte(b0)
31✔
185
      buf.write_byte(b1)
31✔
186
      buf.write_byte(b2)
31✔
187
    }
188
    _..<0x110000 => {
21✔
189
      let b0 = (((code >> 18) & 0x07) | 0xF0).to_byte()
21✔
190
      let b1 = (((code >> 12) & 0x3F) | 0x80).to_byte()
21✔
191
      let b2 = (((code >> 6) & 0x3F) | 0x80).to_byte()
21✔
192
      let b3 = ((code & 0x3F) | 0x80).to_byte()
21✔
193
      buf.write_byte(b0)
21✔
194
      buf.write_byte(b1)
21✔
195
      buf.write_byte(b2)
21✔
196
      buf.write_byte(b3)
21✔
197
    }
198
    _ => abort("Char out of range")
×
199
  }
200
}
201

202
///|
203
/// Write a char into buffer as UTF16LE.
204
/// Alias for `write_utf16le_char`
205
pub fn write_utf16_char(buf : @buffer.Buffer, value : Char) -> Unit {
206
  write_utf16le_char(buf, value)
4✔
207
}
208

209
///|
210
/// Write a char into buffer as UTF16LE.
211
pub fn write_utf16le_char(buf : @buffer.Buffer, value : Char) -> Unit {
212
  let code = value.to_uint()
64✔
213
  if code < 0x10000 {
214
    let b0 = (code & 0xFF).to_byte()
44✔
215
    let b1 = (code >> 8).to_byte()
44✔
216
    buf.write_byte(b0)
44✔
217
    buf.write_byte(b1)
44✔
218
  } else if code < 0x110000 {
20✔
219
    let hi = code - 0x10000
20✔
220
    let lo = (hi >> 10) | 0xD800
221
    let hi = (hi & 0x3FF) | 0xDC00
222
    let b0 = (lo & 0xFF).to_byte()
20✔
223
    let b1 = (lo >> 8).to_byte()
20✔
224
    let b2 = (hi & 0xFF).to_byte()
20✔
225
    let b3 = (hi >> 8).to_byte()
20✔
226
    buf.write_byte(b0)
20✔
227
    buf.write_byte(b1)
20✔
228
    buf.write_byte(b2)
20✔
229
    buf.write_byte(b3)
20✔
230
  } else {
231
    abort("Char out of range")
×
232
  }
233
}
234

235
///|
236
/// Write a char into buffer as UTF16BE.
237
pub fn write_utf16be_char(buf : @buffer.Buffer, value : Char) -> Unit {
238
  let code = value.to_uint()
116✔
239
  if code < 0x10000 {
240
    let b0 = (code >> 8).to_byte()
106✔
241
    let b1 = (code & 0xFF).to_byte()
106✔
242
    buf.write_byte(b0)
106✔
243
    buf.write_byte(b1)
106✔
244
  } else if code < 0x110000 {
10✔
245
    let hi = code - 0x10000
10✔
246
    let lo = (hi >> 10) | 0xD800
247
    let hi = (hi & 0x3FF) | 0xDC00
248
    let b0 = (lo >> 8).to_byte()
10✔
249
    let b1 = (lo & 0xFF).to_byte()
10✔
250
    let b2 = (hi >> 8).to_byte()
10✔
251
    let b3 = (hi & 0xFF).to_byte()
10✔
252
    buf.write_byte(b0)
10✔
253
    buf.write_byte(b1)
10✔
254
    buf.write_byte(b2)
10✔
255
    buf.write_byte(b3)
10✔
256
  } else {
257
    abort("Char out of range")
×
258
  }
259
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc