• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 16511306696

25 Jul 2025 01:07AM UTC coverage: 90.698% (+0.005%) from 90.693%
16511306696

push

github

web-flow
Merge pull request #5017 from randombit/jack/mp-cleanup

Clean up low level mp interfaces

99956 of 110208 relevant lines covered (90.7%)

12233642.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.61
/src/lib/math/mp/mp_karat.cpp
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8

9
#include <botan/internal/mp_core.h>
10

11
#include <botan/exceptn.h>
12
#include <botan/mem_ops.h>
13
#include <botan/internal/ct_utils.h>
14

15
namespace Botan {
16

17
/*
18
* Simple O(N^2) Multiplication
19
*/
20
void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) {
34,671,544✔
21
   if(z_size < x_size + y_size) {
34,671,544✔
22
      throw Invalid_Argument("basecase_mul z_size too small");
×
23
   }
24

25
   const size_t x_size_8 = x_size - (x_size % 8);
34,671,544✔
26

27
   clear_mem(z, z_size);
34,671,544✔
28

29
   for(size_t i = 0; i != y_size; ++i) {
275,479,298✔
30
      const word y_i = y[i];
240,807,754✔
31

32
      word carry = 0;
240,807,754✔
33

34
      for(size_t j = 0; j != x_size_8; j += 8) {
602,276,701✔
35
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
361,468,947✔
36
      }
37

38
      for(size_t j = x_size_8; j != x_size; ++j) {
728,801,612✔
39
         z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry);
487,993,858✔
40
      }
41

42
      z[x_size + i] = carry;
240,807,754✔
43
   }
44
}
34,671,544✔
45

46
void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) {
23,759,920✔
47
   if(z_size < 2 * x_size) {
23,759,920✔
48
      throw Invalid_Argument("basecase_sqr z_size too small");
×
49
   }
50

51
   const size_t x_size_8 = x_size - (x_size % 8);
23,759,920✔
52

53
   clear_mem(z, z_size);
23,759,920✔
54

55
   for(size_t i = 0; i != x_size; ++i) {
142,859,164✔
56
      const word x_i = x[i];
119,099,244✔
57

58
      word carry = 0;
119,099,244✔
59

60
      for(size_t j = 0; j != x_size_8; j += 8) {
186,262,121✔
61
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
67,162,877✔
62
      }
63

64
      for(size_t j = x_size_8; j != x_size; ++j) {
529,081,418✔
65
         z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry);
409,982,174✔
66
      }
67

68
      z[x_size + i] = carry;
119,099,244✔
69
   }
70
}
23,759,920✔
71

72
namespace {
73

74
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
75
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
76

77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) {
15,390,909✔
81
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2 != 0) {
15,390,909✔
82
      switch(N) {
11,248,137✔
83
         case 6:
×
84
            return bigint_comba_mul6(z, x, y);
11,248,137✔
85
         case 8:
×
86
            return bigint_comba_mul8(z, x, y);
×
87
         case 9:
×
88
            return bigint_comba_mul9(z, x, y);
×
89
         case 16:
7,202,835✔
90
            return bigint_comba_mul16(z, x, y);
7,202,835✔
91
         case 24:
1,654,431✔
92
            return bigint_comba_mul24(z, x, y);
1,654,431✔
93
         default:
2,390,871✔
94
            return basecase_mul(z, 2 * N, x, N, y, N);
2,390,871✔
95
      }
96
   }
97

98
   const size_t N2 = N / 2;
4,142,772✔
99

100
   const word* x0 = x;
4,142,772✔
101
   const word* x1 = x + N2;
4,142,772✔
102
   const word* y0 = y;
4,142,772✔
103
   const word* y1 = y + N2;
4,142,772✔
104
   word* z0 = z;
4,142,772✔
105
   word* z1 = z + N;
4,142,772✔
106

107
   word* ws0 = workspace;
4,142,772✔
108
   word* ws1 = workspace + N;
4,142,772✔
109

110
   clear_mem(workspace, 2 * N);
4,142,772✔
111

112
   /*
113
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
114
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
115
   * anything, clear_mem above already set the correct result.
116
   *
117
   * However we ignore the result of the comparisons and always perform the
118
   * subtractions and recursively multiply to avoid the timing channel.
119
   */
120

121
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
122
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
4,142,772✔
123
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
4,142,772✔
124
   const auto neg_mask = ~(cmp0 ^ cmp1);
4,142,772✔
125

126
   karatsuba_mul(ws0, z0, z1, N2, ws1);
4,142,772✔
127

128
   // Compute X_lo * Y_lo
129
   karatsuba_mul(z0, x0, y0, N2, ws1);
4,142,772✔
130

131
   // Compute X_hi * Y_hi
132
   karatsuba_mul(z1, x1, y1, N2, ws1);
4,142,772✔
133

134
   const word ws_carry = bigint_add3(ws1, z0, N, z1, N);
4,142,772✔
135
   word z_carry = bigint_add2(z + N2, N, ws1, N);
4,142,772✔
136

137
   z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1);
4,142,772✔
138
   bigint_add2(z + N + N2, N2, &z_carry, 1);
4,142,772✔
139

140
   clear_mem(workspace + N, N2);
4,142,772✔
141

142
   bigint_cnd_add(neg_mask.value(), z + N2, workspace, 2 * N - N2);
4,142,772✔
143
   bigint_cnd_sub((~neg_mask).value(), z + N2, workspace, 2 * N - N2);
4,142,772✔
144
}
145

146
/*
147
* Karatsuba Squaring Operation
148
*/
149
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) {
41,060,794✔
150
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2 != 0) {
41,060,794✔
151
      switch(N) {
29,621,906✔
152
         case 6:
×
153
            return bigint_comba_sqr6(z, x);
29,621,906✔
154
         case 8:
×
155
            return bigint_comba_sqr8(z, x);
×
156
         case 9:
×
157
            return bigint_comba_sqr9(z, x);
×
158
         case 16:
24,582,258✔
159
            return bigint_comba_sqr16(z, x);
24,582,258✔
160
         case 24:
5,037,495✔
161
            return bigint_comba_sqr24(z, x);
5,037,495✔
162
         default:
2,153✔
163
            return basecase_sqr(z, 2 * N, x, N);
2,153✔
164
      }
165
   }
166

167
   const size_t N2 = N / 2;
11,438,888✔
168

169
   const word* x0 = x;
11,438,888✔
170
   const word* x1 = x + N2;
11,438,888✔
171
   word* z0 = z;
11,438,888✔
172
   word* z1 = z + N;
11,438,888✔
173

174
   word* ws0 = workspace;
11,438,888✔
175
   word* ws1 = workspace + N;
11,438,888✔
176

177
   clear_mem(workspace, 2 * N);
11,438,888✔
178

179
   // See comment in karatsuba_mul
180
   bigint_sub_abs(z0, x0, x1, N2, workspace);
11,438,888✔
181
   karatsuba_sqr(ws0, z0, N2, ws1);
11,438,888✔
182

183
   karatsuba_sqr(z0, x0, N2, ws1);
11,438,888✔
184
   karatsuba_sqr(z1, x1, N2, ws1);
11,438,888✔
185

186
   const word ws_carry = bigint_add3(ws1, z0, N, z1, N);
11,438,888✔
187
   word z_carry = bigint_add2(z + N2, N, ws1, N);
11,438,888✔
188

189
   z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1);
11,438,888✔
190
   bigint_add2(z + N + N2, N2, &z_carry, 1);
11,438,888✔
191

192
   /*
193
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
194
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
195
   * timing channel.
196
   */
197
   bigint_sub2(z + N2, 2 * N - N2, ws0, N);
11,438,888✔
198
}
199

200
/*
201
* Pick a good size for the Karatsuba multiply
202
*/
203
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) {
2,963,290✔
204
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) {
2,963,290✔
205
      return 0;
206
   }
207

208
   if(((x_size == x_sw) && (x_size % 2 != 0)) || ((y_size == y_sw) && (y_size % 2 != 0))) {
2,963,290✔
209
      return 0;
210
   }
211

212
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
2,962,671✔
213
   const size_t end = (x_size < y_size) ? x_size : y_size;
2,962,671✔
214

215
   if(start == end) {
2,962,671✔
216
      if(start % 2 != 0) {
2,069,972✔
217
         return 0;
218
      }
219
      return start;
2,069,972✔
220
   }
221

222
   for(size_t j = start; j <= end; ++j) {
1,667,795✔
223
      if(j % 2 != 0) {
1,667,795✔
224
         continue;
775,096✔
225
      }
226

227
      if(2 * j > z_size) {
892,699✔
228
         return 0;
229
      }
230

231
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) {
892,691✔
232
         if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) {
892,691✔
233
            return j + 2;
234
         }
235
         return j;
892,296✔
236
      }
237
   }
238

239
   return 0;
240
}
241

242
/*
243
* Pick a good size for the Karatsuba squaring
244
*/
245
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) {
6,752,723✔
246
   if(x_sw == x_size) {
6,752,723✔
247
      if(x_sw % 2 != 0) {
6,653,021✔
248
         return 0;
249
      }
250
      return x_sw;
6,650,581✔
251
   }
252

253
   for(size_t j = x_sw; j <= x_size; ++j) {
105,855✔
254
      if(j % 2 != 0) {
105,855✔
255
         continue;
6,153✔
256
      }
257

258
      if(2 * j > z_size) {
99,702✔
259
         return 0;
260
      }
261

262
      if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) {
93,549✔
263
         return j + 2;
×
264
      }
265
      return j;
266
   }
267

268
   return 0;
269
}
270

271
template <size_t SZ>
272
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
293,347,061✔
273
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
55,147,840✔
274
}
275

276
template <size_t SZ>
277
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
355,610,518✔
278
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
53,089,633✔
279
}
280

281
}  // namespace
282

283
void bigint_mul(word z[],
75,183,548✔
284
                size_t z_size,
285
                const word x[],
286
                size_t x_size,
287
                size_t x_sw,
288
                const word y[],
289
                size_t y_size,
290
                size_t y_sw,
291
                word workspace[],
292
                size_t ws_size) {
293
   clear_mem(z, z_size);
75,183,548✔
294

295
   if(x_sw == 1) {
75,183,548✔
296
      bigint_linmul3(z, y, y_sw, x[0]);
4,048,261✔
297
   } else if(y_sw == 1) {
71,135,287✔
298
      bigint_linmul3(z, x, x_sw, y[0]);
5,019,914✔
299
   } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) {
66,115,373✔
300
      bigint_comba_mul4(z, x, y);
12,491,033✔
301
   } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) {
53,624,340✔
302
      bigint_comba_mul6(z, x, y);
3,959,782✔
303
   } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) {
49,664,558✔
304
      bigint_comba_mul8(z, x, y);
5,348,987✔
305
   } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) {
44,315,571✔
306
      bigint_comba_mul9(z, x, y);
2,555,440✔
307
   } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) {
41,760,131✔
308
      bigint_comba_mul16(z, x, y);
3,893,043✔
309
   } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) {
37,867,088✔
310
      bigint_comba_mul24(z, x, y);
2,624,078✔
311
   } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || workspace == nullptr) {
35,243,010✔
312
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
32,279,720✔
313
   } else {
314
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
2,963,290✔
315

316
      if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) {
2,963,290✔
317
         karatsuba_mul(z, x, y, N, workspace);
2,962,593✔
318
      } else {
319
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
697✔
320
      }
321
   }
322
}
75,183,548✔
323

324
/*
325
* Squaring Algorithm Dispatcher
326
*/
327
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) {
79,548,050✔
328
   clear_mem(z, z_size);
79,548,050✔
329

330
   BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient");
79,548,050✔
331

332
   if(x_sw == 1) {
79,548,050✔
333
      bigint_linmul3(z, x, x_sw, x[0]);
3,273,195✔
334
   } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) {
76,274,855✔
335
      bigint_comba_sqr4(z, x);
9,620,809✔
336
   } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) {
66,654,046✔
337
      bigint_comba_sqr6(z, x);
2,898,456✔
338
   } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) {
63,755,590✔
339
      bigint_comba_sqr8(z, x);
8,105,008✔
340
   } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) {
55,650,582✔
341
      bigint_comba_sqr9(z, x);
2,195,214✔
342
   } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) {
53,455,368✔
343
      bigint_comba_sqr16(z, x);
13,635,291✔
344
   } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) {
39,820,077✔
345
      bigint_comba_sqr24(z, x);
9,318,372✔
346
   } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || workspace == nullptr) {
30,501,705✔
347
      basecase_sqr(z, z_size, x, x_sw);
23,748,982✔
348
   } else {
349
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
6,752,723✔
350

351
      if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) {
6,752,723✔
352
         karatsuba_sqr(z, x, N, workspace);
6,744,130✔
353
      } else {
354
         basecase_sqr(z, z_size, x, x_sw);
8,593✔
355
      }
356
   }
357
}
79,548,050✔
358

359
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc