• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

libbitcoin / libbitcoin-system / 22832990031

09 Mar 2026 12:06AM UTC coverage: 81.289% (+0.07%) from 81.216%
22832990031

Pull #1791

github

web-flow
Merge 3697e6efc into 032c34e64
Pull Request #1791: [RFC] Replace ICU dependency with embedded Unicode tables

139 of 150 new or added lines in 1 file covered. (92.67%)

30 existing lines in 3 files now uncovered.

10987 of 13516 relevant lines covered (81.29%)

3503968.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.7
/src/wallet/mnemonics/mnemonic.cpp
1
/**
2
 * Copyright (c) 2011-2026 libbitcoin developers (see AUTHORS)
3
 *
4
 * This file is part of libbitcoin.
5
 *
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU Affero General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU Affero General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Affero General Public License
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include <bitcoin/system/wallet/mnemonics/mnemonic.hpp>
20

21
#include <bitcoin/system/data/data.hpp>
22
#include <bitcoin/system/hash/hash.hpp>
23
#include <bitcoin/system/math/math.hpp>
24
#include <bitcoin/system/radix/radix.hpp>
25
#include <bitcoin/system/unicode/unicode.hpp>
26
#include <bitcoin/system/wallet/context.hpp>
27
#include <bitcoin/system/wallet/keys/hd_private.hpp>
28
#include <bitcoin/system/words/words.hpp>
29

30
namespace libbitcoin {
31
namespace system {
32
namespace wallet {
33

34
// local constants
35
// ----------------------------------------------------------------------------
36

37
// 2^11 = 2048 implies 11 bits exactly indexes every possible dictionary word.
38
static const auto index_bits = narrow_cast<uint8_t>(
39
    system::floored_log2(mnemonic::dictionary::size()));
40

41
// private static
42
// ----------------------------------------------------------------------------
43

44
const mnemonic::dictionaries mnemonic::dictionaries_
45
{
46
    {
47
        mnemonic::dictionary{ language::en, words::mnemonic::en },
48
        mnemonic::dictionary{ language::es, words::mnemonic::es },
49
        mnemonic::dictionary{ language::it, words::mnemonic::it },
50
        mnemonic::dictionary{ language::fr, words::mnemonic::fr },
51
        mnemonic::dictionary{ language::cs, words::mnemonic::cs },
52
        mnemonic::dictionary{ language::pt, words::mnemonic::pt },
53
        mnemonic::dictionary{ language::ja, words::mnemonic::ja },
54
        mnemonic::dictionary{ language::ko, words::mnemonic::ko },
55
        mnemonic::dictionary{ language::zh_Hans, words::mnemonic::zh_Hans },
56
        mnemonic::dictionary{ language::zh_Hant, words::mnemonic::zh_Hant }
57
    }
58
};
59

60
// protected static (coders)
61
// ----------------------------------------------------------------------------
62

63
// Entropy requires wordlist mapping because of the checksum.
64
string_list mnemonic::encoder(const data_chunk& entropy,
48✔
65
    language identifier) NOEXCEPT
66
{
67
    // Bytes are the base2048 encoding, so this is byte decoding.
68
    const auto buffer = splice(entropy, { checksum_byte(entropy) });
48✔
69
    return decode_base2048_list(buffer, identifier);
48✔
70
}
71

72
// Entropy requires wordlist mapping because of the checksum.
73
data_chunk mnemonic::decoder(const string_list& words,
141✔
74
    language identifier) NOEXCEPT
75
{
76
    // Words are the base2048 decoding, so this is word encoding.
77
    data_chunk buffer;
141✔
78
    if (!encode_base2048_list(buffer, words, identifier))
141✔
79
        return {};
×
80

81
    // Entropy is always byte aligned.
82
    // Checksum is in high order bits of last buffer byte, zero-padded.
83
    const data_chunk entropy{ buffer.begin(), std::prev(buffer.end()) };
141✔
84
    return buffer.back() == checksum_byte(entropy) ? entropy : data_chunk{};
141✔
85
}
86

87
long_hash mnemonic::seeder(const string_list& words,
99✔
88
    const std::string& passphrase) NOEXCEPT
89
{
90
    constexpr size_t hmac_iterations = 2048;
99✔
91
    constexpr auto passphrase_prefix = "mnemonic";
99✔
92

93
    // Passphrase is limited to ascii (normal) if HAVE_ICU undefind.
94
    std::string phrase{ passphrase };
99✔
95

96
    LCOV_EXCL_START("Always succeeds unless HAVE_ICU undefined.")
97

98
    // Unlike Electrum, BIP39 does not perform any further normalization.
99
    if (!to_compatibility_decomposition(phrase))
100
        return {};
101

102
    LCOV_EXCL_STOP()
103

104
    // Words are in normal (lower, nfkd) form, even without ICU.
105
    return pbkd<sha512>::key<long_hash_size>(system::join(words),
198✔
106
        passphrase_prefix + phrase, hmac_iterations);
198✔
107
}
108

109
uint8_t mnemonic::checksum_byte(const data_chunk& entropy) NOEXCEPT
189✔
110
{
111
    // The high order bits of the first sha256_hash byte are the checksum.
112
    // Only 4, 5, 6, 7, or 8 bits of the hash are used (based on size).
113
    const auto mask_bits = byte_bits - checksum_bits(entropy);
189✔
114
    const auto checksum_mask = max_uint8 << mask_bits;
189✔
115
    return sha256_hash(entropy).front() & checksum_mask;
189✔
116
}
117

118
// protected static (sizers)
119
// ----------------------------------------------------------------------------
120

121
size_t mnemonic::checksum_bits(const data_slice& entropy) NOEXCEPT
189✔
122
{
123
    return entropy.size() / entropy_multiple;
189✔
124
}
125

UNCOV
126
size_t mnemonic::checksum_bits(const string_list& words) NOEXCEPT
×
127
{
UNCOV
128
    return words.size() / word_multiple;
×
129
}
130

UNCOV
131
size_t mnemonic::entropy_bits(const data_slice& entropy) NOEXCEPT
×
132
{
UNCOV
133
    return to_bits(entropy.size());
×
134
}
135

UNCOV
136
size_t mnemonic::entropy_bits(const string_list& words) NOEXCEPT
×
137
{
UNCOV
138
    return words.size() * index_bits - checksum_bits(words);
×
139
}
140

UNCOV
141
size_t mnemonic::entropy_size(const string_list& words) NOEXCEPT
×
142
{
UNCOV
143
    return to_floored_bytes(entropy_bits(words));
×
144
}
145

UNCOV
146
size_t mnemonic::word_count(const data_slice& entropy) NOEXCEPT
×
147
{
UNCOV
148
    return (entropy_bits(entropy) + checksum_bits(entropy)) / index_bits;
×
149
}
150

151
// protected static (checkers)
152
// ----------------------------------------------------------------------------
153

154
bool mnemonic::is_ambiguous(const string_list& words, language requested,
143✔
155
    language derived) NOEXCEPT
156
{
157
    // HACK: There are 100 same words in en/fr, all with distinct indexes.
158
    // If matches en and unspecified then check fr, since en is searched first.
159
    return
143✔
160
        derived == language::en &&
143✔
161
        requested == language::none &&
143✔
162
        contained_by(words, language::fr) == language::fr;
57✔
163
}
164

165
// public static
166
// ----------------------------------------------------------------------------
167

168
language mnemonic::contained_by(const string_list& words,
213✔
169
    language identifier) NOEXCEPT
170
{
171
    return dictionaries_.contains(words, identifier);
213✔
172
}
173

174
bool mnemonic::is_valid_dictionary(language identifier) NOEXCEPT
11✔
175
{
176
    return dictionaries_.exists(identifier);
11✔
177
}
178

179
bool mnemonic::is_valid_entropy_size(size_t size) NOEXCEPT
55✔
180
{
181
    return (is_zero(size % entropy_multiple) &&
55✔
182
        size >= entropy_minimum && size <= entropy_maximum);
55✔
183
}
184

185
bool mnemonic::is_valid_word_count(size_t count) NOEXCEPT
172✔
186
{
187
    return (is_zero(count % word_multiple) &&
172✔
188
        count >= word_minimum && count <= word_maximum);
172✔
189
}
190

191
// construction
192
// ----------------------------------------------------------------------------
193

194
mnemonic::mnemonic() NOEXCEPT
107✔
195
  : languages()
107✔
196
{
197
}
107✔
198

199
mnemonic::mnemonic(const std::string& sentence, language identifier) NOEXCEPT
57✔
200
  : mnemonic(split(sentence, identifier), identifier)
57✔
201
{
202
}
57✔
203

204
mnemonic::mnemonic(const string_list& words, language identifier) NOEXCEPT
165✔
205
  : mnemonic(from_words(words, identifier))
165✔
206
{
207
}
165✔
208

209
mnemonic::mnemonic(const data_chunk& entropy, language identifier) NOEXCEPT
48✔
210
  : mnemonic(from_entropy(entropy, identifier))
48✔
211
{
212
}
48✔
213

214
// protected
215
mnemonic::mnemonic(const data_chunk& entropy, const string_list& words,
115✔
216
    language identifier) NOEXCEPT
115✔
217
  : languages(entropy, words, identifier)
115✔
218
{
219
}
115✔
220

221
// protected (factories)
222
// ----------------------------------------------------------------------------
223

224
mnemonic mnemonic::from_entropy(const data_chunk& entropy,
48✔
225
    language identifier) NOEXCEPT
226
{
227
    if (!is_valid_entropy_size(entropy.size()))
48✔
UNCOV
228
        return {};
×
229

230
    if (!dictionaries_.exists(identifier))
48✔
UNCOV
231
        return {};
×
232

233
    // Save entropy and derived words.
234
    return { entropy, encoder(entropy, identifier), identifier };
48✔
235
}
236

237
mnemonic mnemonic::from_words(const string_list& words,
165✔
238
    language identifier) NOEXCEPT
239
{
240
    if (!is_valid_word_count(words.size()))
165✔
241
        return {};
18✔
242

243
    // Normalize to improve chance of dictionary matching.
244
    const auto tokens = try_normalize(words);
147✔
245
    const auto lexicon = contained_by(tokens, identifier);
147✔
246

247
    if (lexicon == language::none)
147✔
248
        return {};
4✔
249

250
    if (identifier != language::none && lexicon != identifier)
143✔
UNCOV
251
        return {};
×
252

253
    // HACK: en-fr dictionary ambiguity.
254
    if (is_ambiguous(tokens, identifier, lexicon))
143✔
255
        return {};
2✔
256

257
    const auto entropy = decoder(tokens, lexicon);
141✔
258

259
    // Checksum verification failed.
260
    if (entropy.empty())
141✔
261
        return {};
76✔
262

263
    // Save derived entropy and dictionary words, originals are discarded.
264
    return { entropy, tokens, lexicon };
65✔
265
}
147✔
266

267
// public methods
268
// ----------------------------------------------------------------------------
269

270
long_hash mnemonic::to_seed(const std::string& passphrase) const NOEXCEPT
99✔
271
{
272
    if (!(*this))
99✔
UNCOV
273
        return {};
×
274

275
    return seeder(words(), passphrase);
99✔
276
}
277

278
hd_private mnemonic::to_key(const std::string& passphrase,
100✔
279
    const context& context) const NOEXCEPT
280
{
281
    if (!(*this))
100✔
282
        return {};
1✔
283

284
    // The key will be invalid if the secret does not ec verify.
285
    return { to_chunk(to_seed(passphrase)), context.hd_prefixes() };
198✔
286
}
287

288
} // namespace wallet
289
} // namespace system
290
} // namespace libbitcoin
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc