22832990031

Committed 09 Mar 2026 12:06AM UTC coverage: 81.289% (+0.07%) from 81.216%

Build # 22832990031

Build Type

Pull #1791

github

Committed by

web-flow

Commit Message

Merge 3697e6efc into 032c34e64

Pull Request Pull Request #1791: [RFC] Replace ICU dependency with embedded Unicode tables

Coverage Stats

139 of 150 new or added lines in 1 file covered. (92.67%)

30 existing lines in 3 files now uncovered.

10987 of 13516 relevant lines covered (81.29%)

3503968.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.7

/src/wallet/mnemonics/mnemonic.cpp

/**
 * Copyright (c) 2011-2026 libbitcoin developers (see AUTHORS)
 *
 * This file is part of libbitcoin.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#include <bitcoin/system/wallet/mnemonics/mnemonic.hpp>

#include <bitcoin/system/data/data.hpp>
#include <bitcoin/system/hash/hash.hpp>
#include <bitcoin/system/math/math.hpp>
#include <bitcoin/system/radix/radix.hpp>
#include <bitcoin/system/unicode/unicode.hpp>
#include <bitcoin/system/wallet/context.hpp>
#include <bitcoin/system/wallet/keys/hd_private.hpp>
#include <bitcoin/system/words/words.hpp>

namespace libbitcoin {
namespace system {
namespace wallet {

// local constants
// ----------------------------------------------------------------------------

// 2^11 = 2048 implies 11 bits exactly indexes every possible dictionary word.
static const auto index_bits = narrow_cast<uint8_t>(
    system::floored_log2(mnemonic::dictionary::size()));

// private static
// ----------------------------------------------------------------------------

const mnemonic::dictionaries mnemonic::dictionaries_
{
    {
        mnemonic::dictionary{ language::en, words::mnemonic::en },
        mnemonic::dictionary{ language::es, words::mnemonic::es },
        mnemonic::dictionary{ language::it, words::mnemonic::it },
        mnemonic::dictionary{ language::fr, words::mnemonic::fr },
        mnemonic::dictionary{ language::cs, words::mnemonic::cs },
        mnemonic::dictionary{ language::pt, words::mnemonic::pt },
        mnemonic::dictionary{ language::ja, words::mnemonic::ja },
        mnemonic::dictionary{ language::ko, words::mnemonic::ko },
        mnemonic::dictionary{ language::zh_Hans, words::mnemonic::zh_Hans },
        mnemonic::dictionary{ language::zh_Hant, words::mnemonic::zh_Hant }
    }
};

// protected static (coders)
// ----------------------------------------------------------------------------

// Entropy requires wordlist mapping because of the checksum.
string_list mnemonic::encoder(const data_chunk& entropy,
    language identifier) NOEXCEPT
{
    // Bytes are the base2048 encoding, so this is byte decoding.
    const auto buffer = splice(entropy, { checksum_byte(entropy) });
    return decode_base2048_list(buffer, identifier);
}

// Entropy requires wordlist mapping because of the checksum.
data_chunk mnemonic::decoder(const string_list& words,
    language identifier) NOEXCEPT
{
    // Words are the base2048 decoding, so this is word encoding.
    data_chunk buffer;
    if (!encode_base2048_list(buffer, words, identifier))
        return {};

    // Entropy is always byte aligned.
    // Checksum is in high order bits of last buffer byte, zero-padded.
    const data_chunk entropy{ buffer.begin(), std::prev(buffer.end()) };
    return buffer.back() == checksum_byte(entropy) ? entropy : data_chunk{};
}

long_hash mnemonic::seeder(const string_list& words,
    const std::string& passphrase) NOEXCEPT
{
    constexpr size_t hmac_iterations = 2048;
    constexpr auto passphrase_prefix = "mnemonic";

    // Passphrase is limited to ascii (normal) if HAVE_ICU undefind.
    std::string phrase{ passphrase };

    LCOV_EXCL_START("Always succeeds unless HAVE_ICU undefined.")

    // Unlike Electrum, BIP39 does not perform any further normalization.
    if (!to_compatibility_decomposition(phrase))
        return {};

    LCOV_EXCL_STOP()

    // Words are in normal (lower, nfkd) form, even without ICU.
    return pbkd<sha512>::key<long_hash_size>(system::join(words),
        passphrase_prefix + phrase, hmac_iterations);
}

uint8_t mnemonic::checksum_byte(const data_chunk& entropy) NOEXCEPT
{
    // The high order bits of the first sha256_hash byte are the checksum.
    // Only 4, 5, 6, 7, or 8 bits of the hash are used (based on size).
    const auto mask_bits = byte_bits - checksum_bits(entropy);
    const auto checksum_mask = max_uint8 << mask_bits;
    return sha256_hash(entropy).front() & checksum_mask;
}

// protected static (sizers)
// ----------------------------------------------------------------------------

size_t mnemonic::checksum_bits(const data_slice& entropy) NOEXCEPT
{
    return entropy.size() / entropy_multiple;
}

size_t mnemonic::checksum_bits(const string_list& words) NOEXCEPT
{
    return words.size() / word_multiple;
}

size_t mnemonic::entropy_bits(const data_slice& entropy) NOEXCEPT
{
    return to_bits(entropy.size());
}

size_t mnemonic::entropy_bits(const string_list& words) NOEXCEPT
{
    return words.size() * index_bits - checksum_bits(words);
}

size_t mnemonic::entropy_size(const string_list& words) NOEXCEPT
{
    return to_floored_bytes(entropy_bits(words));
}

size_t mnemonic::word_count(const data_slice& entropy) NOEXCEPT
{
    return (entropy_bits(entropy) + checksum_bits(entropy)) / index_bits;
}

// protected static (checkers)
// ----------------------------------------------------------------------------

bool mnemonic::is_ambiguous(const string_list& words, language requested,
    language derived) NOEXCEPT
{
    // HACK: There are 100 same words in en/fr, all with distinct indexes.
    // If matches en and unspecified then check fr, since en is searched first.
    return
        derived == language::en &&
        requested == language::none &&
        contained_by(words, language::fr) == language::fr;
}

// public static
// ----------------------------------------------------------------------------

language mnemonic::contained_by(const string_list& words,
    language identifier) NOEXCEPT
{
    return dictionaries_.contains(words, identifier);
}

bool mnemonic::is_valid_dictionary(language identifier) NOEXCEPT
{
    return dictionaries_.exists(identifier);
}

bool mnemonic::is_valid_entropy_size(size_t size) NOEXCEPT
{
    return (is_zero(size % entropy_multiple) &&
        size >= entropy_minimum && size <= entropy_maximum);
}

bool mnemonic::is_valid_word_count(size_t count) NOEXCEPT
{
    return (is_zero(count % word_multiple) &&
        count >= word_minimum && count <= word_maximum);
}

// construction
// ----------------------------------------------------------------------------

mnemonic::mnemonic() NOEXCEPT
  : languages()
{
}

mnemonic::mnemonic(const std::string& sentence, language identifier) NOEXCEPT
  : mnemonic(split(sentence, identifier), identifier)
{
}

mnemonic::mnemonic(const string_list& words, language identifier) NOEXCEPT
  : mnemonic(from_words(words, identifier))
{
}

mnemonic::mnemonic(const data_chunk& entropy, language identifier) NOEXCEPT
  : mnemonic(from_entropy(entropy, identifier))
{
}

// protected
mnemonic::mnemonic(const data_chunk& entropy, const string_list& words,
    language identifier) NOEXCEPT
  : languages(entropy, words, identifier)
{
}

// protected (factories)
// ----------------------------------------------------------------------------

mnemonic mnemonic::from_entropy(const data_chunk& entropy,
    language identifier) NOEXCEPT
{
    if (!is_valid_entropy_size(entropy.size()))
        return {};

    if (!dictionaries_.exists(identifier))
        return {};

    // Save entropy and derived words.
    return { entropy, encoder(entropy, identifier), identifier };
}

mnemonic mnemonic::from_words(const string_list& words,
    language identifier) NOEXCEPT
{
    if (!is_valid_word_count(words.size()))
        return {};

    // Normalize to improve chance of dictionary matching.
    const auto tokens = try_normalize(words);
    const auto lexicon = contained_by(tokens, identifier);

    if (lexicon == language::none)
        return {};

    if (identifier != language::none && lexicon != identifier)
        return {};

    // HACK: en-fr dictionary ambiguity.
    if (is_ambiguous(tokens, identifier, lexicon))
        return {};

    const auto entropy = decoder(tokens, lexicon);

    // Checksum verification failed.
    if (entropy.empty())
        return {};

    // Save derived entropy and dictionary words, originals are discarded.
    return { entropy, tokens, lexicon };
}

// public methods
// ----------------------------------------------------------------------------

long_hash mnemonic::to_seed(const std::string& passphrase) const NOEXCEPT
{
    if (!(*this))
        return {};

    return seeder(words(), passphrase);
}

hd_private mnemonic::to_key(const std::string& passphrase,
    const context& context) const NOEXCEPT
{
    if (!(*this))
        return {};

    // The key will be invalid if the secret does not ec verify.
    return { to_chunk(to_seed(passphrase)), context.hd_prefixes() };
}

} // namespace wallet
} // namespace system
} // namespace libbitcoin

1	/**
2	* Copyright (c) 2011-2026 libbitcoin developers (see AUTHORS)
3	*
4	* This file is part of libbitcoin.
5	*
6	* This program is free software: you can redistribute it and/or modify
7	* it under the terms of the GNU Affero General Public License as published by
8	* the Free Software Foundation, either version 3 of the License, or
9	* (at your option) any later version.
10	*
11	* This program is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	* GNU Affero General Public License for more details.
15	*
16	* You should have received a copy of the GNU Affero General Public License
17	* along with this program. If not, see <http://www.gnu.org/licenses/>.
18	*/
19	#include <bitcoin/system/wallet/mnemonics/mnemonic.hpp>
20
21	#include <bitcoin/system/data/data.hpp>
22	#include <bitcoin/system/hash/hash.hpp>
23	#include <bitcoin/system/math/math.hpp>
24	#include <bitcoin/system/radix/radix.hpp>
25	#include <bitcoin/system/unicode/unicode.hpp>
26	#include <bitcoin/system/wallet/context.hpp>
27	#include <bitcoin/system/wallet/keys/hd_private.hpp>
28	#include <bitcoin/system/words/words.hpp>
29
30	namespace libbitcoin {
31	namespace system {
32	namespace wallet {
33
34	// local constants
35	// ----------------------------------------------------------------------------
36
37	// 2^11 = 2048 implies 11 bits exactly indexes every possible dictionary word.
38	static const auto index_bits = narrow_cast<uint8_t>(
39	system::floored_log2(mnemonic::dictionary::size()));
40
41	// private static
42	// ----------------------------------------------------------------------------
43
44	const mnemonic::dictionaries mnemonic::dictionaries_
45	{
46	{
47	mnemonic::dictionary{ language::en, words::mnemonic::en },
48	mnemonic::dictionary{ language::es, words::mnemonic::es },
49	mnemonic::dictionary{ language::it, words::mnemonic::it },
50	mnemonic::dictionary{ language::fr, words::mnemonic::fr },
51	mnemonic::dictionary{ language::cs, words::mnemonic::cs },
52	mnemonic::dictionary{ language::pt, words::mnemonic::pt },
53	mnemonic::dictionary{ language::ja, words::mnemonic::ja },
54	mnemonic::dictionary{ language::ko, words::mnemonic::ko },
55	mnemonic::dictionary{ language::zh_Hans, words::mnemonic::zh_Hans },
56	mnemonic::dictionary{ language::zh_Hant, words::mnemonic::zh_Hant }
57	}
58	};
59
60	// protected static (coders)
61	// ----------------------------------------------------------------------------
62
63	// Entropy requires wordlist mapping because of the checksum.
64	string_list mnemonic::encoder(const data_chunk& entropy,	48✔
65	language identifier) NOEXCEPT
66	{
67	// Bytes are the base2048 encoding, so this is byte decoding.
68	const auto buffer = splice(entropy, { checksum_byte(entropy) });	48✔
69	return decode_base2048_list(buffer, identifier);	48✔
70	}
71
72	// Entropy requires wordlist mapping because of the checksum.
73	data_chunk mnemonic::decoder(const string_list& words,	141✔
74	language identifier) NOEXCEPT
75	{
76	// Words are the base2048 decoding, so this is word encoding.
77	data_chunk buffer;	141✔
78	if (!encode_base2048_list(buffer, words, identifier))	141✔
79	return {};	×
80
81	// Entropy is always byte aligned.
82	// Checksum is in high order bits of last buffer byte, zero-padded.
83	const data_chunk entropy{ buffer.begin(), std::prev(buffer.end()) };	141✔
84	return buffer.back() == checksum_byte(entropy) ? entropy : data_chunk{};	141✔
85	}
86
87	long_hash mnemonic::seeder(const string_list& words,	99✔
88	const std::string& passphrase) NOEXCEPT
89	{
90	constexpr size_t hmac_iterations = 2048;	99✔
91	constexpr auto passphrase_prefix = "mnemonic";	99✔
92
93	// Passphrase is limited to ascii (normal) if HAVE_ICU undefind.
94	std::string phrase{ passphrase };	99✔
95
96	LCOV_EXCL_START("Always succeeds unless HAVE_ICU undefined.")
97
98	// Unlike Electrum, BIP39 does not perform any further normalization.
99	if (!to_compatibility_decomposition(phrase))
100	return {};
101
102	LCOV_EXCL_STOP()
103
104	// Words are in normal (lower, nfkd) form, even without ICU.
105	return pbkd<sha512>::key<long_hash_size>(system::join(words),	198✔
106	passphrase_prefix + phrase, hmac_iterations);	198✔
107	}
108
109	uint8_t mnemonic::checksum_byte(const data_chunk& entropy) NOEXCEPT	189✔
110	{
111	// The high order bits of the first sha256_hash byte are the checksum.
112	// Only 4, 5, 6, 7, or 8 bits of the hash are used (based on size).
113	const auto mask_bits = byte_bits - checksum_bits(entropy);	189✔
114	const auto checksum_mask = max_uint8 << mask_bits;	189✔
115	return sha256_hash(entropy).front() & checksum_mask;	189✔
116	}
117
118	// protected static (sizers)
119	// ----------------------------------------------------------------------------
120
121	size_t mnemonic::checksum_bits(const data_slice& entropy) NOEXCEPT	189✔
122	{
123	return entropy.size() / entropy_multiple;	189✔
124	}
125
UNCOV 126	size_t mnemonic::checksum_bits(const string_list& words) NOEXCEPT	×
127	{
UNCOV 128	return words.size() / word_multiple;	×
129	}
130
UNCOV 131	size_t mnemonic::entropy_bits(const data_slice& entropy) NOEXCEPT	×
132	{
UNCOV 133	return to_bits(entropy.size());	×
134	}
135
UNCOV 136	size_t mnemonic::entropy_bits(const string_list& words) NOEXCEPT	×
137	{
UNCOV 138	return words.size() * index_bits - checksum_bits(words);	×
139	}
140
UNCOV 141	size_t mnemonic::entropy_size(const string_list& words) NOEXCEPT	×
142	{
UNCOV 143	return to_floored_bytes(entropy_bits(words));	×
144	}
145
UNCOV 146	size_t mnemonic::word_count(const data_slice& entropy) NOEXCEPT	×
147	{
UNCOV 148	return (entropy_bits(entropy) + checksum_bits(entropy)) / index_bits;	×
149	}
150
151	// protected static (checkers)
152	// ----------------------------------------------------------------------------
153
154	bool mnemonic::is_ambiguous(const string_list& words, language requested,	143✔
155	language derived) NOEXCEPT
156	{
157	// HACK: There are 100 same words in en/fr, all with distinct indexes.
158	// If matches en and unspecified then check fr, since en is searched first.
159	return	143✔
160	derived == language::en &&	143✔
161	requested == language::none &&	143✔
162	contained_by(words, language::fr) == language::fr;	57✔
163	}
164
165	// public static
166	// ----------------------------------------------------------------------------
167
168	language mnemonic::contained_by(const string_list& words,	213✔
169	language identifier) NOEXCEPT
170	{
171	return dictionaries_.contains(words, identifier);	213✔
172	}
173
174	bool mnemonic::is_valid_dictionary(language identifier) NOEXCEPT	11✔
175	{
176	return dictionaries_.exists(identifier);	11✔
177	}
178
179	bool mnemonic::is_valid_entropy_size(size_t size) NOEXCEPT	55✔
180	{
181	return (is_zero(size % entropy_multiple) &&	55✔
182	size >= entropy_minimum && size <= entropy_maximum);	55✔
183	}
184
185	bool mnemonic::is_valid_word_count(size_t count) NOEXCEPT	172✔
186	{
187	return (is_zero(count % word_multiple) &&	172✔
188	count >= word_minimum && count <= word_maximum);	172✔
189	}
190
191	// construction
192	// ----------------------------------------------------------------------------
193
194	mnemonic::mnemonic() NOEXCEPT	107✔
195	: languages()	107✔
196	{
197	}	107✔
198
199	mnemonic::mnemonic(const std::string& sentence, language identifier) NOEXCEPT	57✔
200	: mnemonic(split(sentence, identifier), identifier)	57✔
201	{
202	}	57✔
203
204	mnemonic::mnemonic(const string_list& words, language identifier) NOEXCEPT	165✔
205	: mnemonic(from_words(words, identifier))	165✔
206	{
207	}	165✔
208
209	mnemonic::mnemonic(const data_chunk& entropy, language identifier) NOEXCEPT	48✔
210	: mnemonic(from_entropy(entropy, identifier))	48✔
211	{
212	}	48✔
213
214	// protected
215	mnemonic::mnemonic(const data_chunk& entropy, const string_list& words,	115✔
216	language identifier) NOEXCEPT	115✔
217	: languages(entropy, words, identifier)	115✔
218	{
219	}	115✔
220
221	// protected (factories)
222	// ----------------------------------------------------------------------------
223
224	mnemonic mnemonic::from_entropy(const data_chunk& entropy,	48✔
225	language identifier) NOEXCEPT
226	{
227	if (!is_valid_entropy_size(entropy.size()))	48✔
UNCOV 228	return {};	×
229
230	if (!dictionaries_.exists(identifier))	48✔
UNCOV 231	return {};	×
232
233	// Save entropy and derived words.
234	return { entropy, encoder(entropy, identifier), identifier };	48✔
235	}
236
237	mnemonic mnemonic::from_words(const string_list& words,	165✔
238	language identifier) NOEXCEPT
239	{
240	if (!is_valid_word_count(words.size()))	165✔
241	return {};	18✔
242
243	// Normalize to improve chance of dictionary matching.
244	const auto tokens = try_normalize(words);	147✔
245	const auto lexicon = contained_by(tokens, identifier);	147✔
246
247	if (lexicon == language::none)	147✔
248	return {};	4✔
249
250	if (identifier != language::none && lexicon != identifier)	143✔
UNCOV 251	return {};	×
252
253	// HACK: en-fr dictionary ambiguity.
254	if (is_ambiguous(tokens, identifier, lexicon))	143✔
255	return {};	2✔
256
257	const auto entropy = decoder(tokens, lexicon);	141✔
258
259	// Checksum verification failed.
260	if (entropy.empty())	141✔
261	return {};	76✔
262
263	// Save derived entropy and dictionary words, originals are discarded.
264	return { entropy, tokens, lexicon };	65✔
265	}	147✔
266
267	// public methods
268	// ----------------------------------------------------------------------------
269
270	long_hash mnemonic::to_seed(const std::string& passphrase) const NOEXCEPT	99✔
271	{
272	if (!(*this))	99✔
UNCOV 273	return {};	×
274
275	return seeder(words(), passphrase);	99✔
276	}
277
278	hd_private mnemonic::to_key(const std::string& passphrase,	100✔
279	const context& context) const NOEXCEPT
280	{
281	if (!(*this))	100✔
282	return {};	1✔
283
284	// The key will be invalid if the secret does not ec verify.
285	return { to_chunk(to_seed(passphrase)), context.hd_prefixes() };	198✔
286	}
287
288	} // namespace wallet
289	} // namespace system
290	} // namespace libbitcoin

libbitcoin / libbitcoin-system / 22832990031

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous