• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 5079590438

25 May 2023 12:28PM UTC coverage: 92.228% (+0.5%) from 91.723%
5079590438

Pull #3502

github

Pull Request #3502: Apply clang-format to the codebase

75589 of 81959 relevant lines covered (92.23%)

12139530.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.57
/src/lib/math/mp/mp_karat.cpp
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8

9
#include <botan/internal/mp_core.h>
10

11
#include <botan/exceptn.h>
12
#include <botan/mem_ops.h>
13
#include <botan/internal/ct_utils.h>
14

15
namespace Botan {
16

17
/*
18
* Simple O(N^2) Multiplication
19
*/
20
void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) {
6,714,126✔
21
   if(z_size < x_size + y_size)
6,714,126✔
22
      throw Invalid_Argument("basecase_mul z_size too small");
×
23

24
   const size_t x_size_8 = x_size - (x_size % 8);
6,714,126✔
25

26
   clear_mem(z, z_size);
6,714,126✔
27

28
   for(size_t i = 0; i != y_size; ++i) {
73,263,290✔
29
      const word y_i = y[i];
66,549,164✔
30

31
      word carry = 0;
66,549,164✔
32

33
      for(size_t j = 0; j != x_size_8; j += 8)
152,981,110✔
34
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
86,431,946✔
35

36
      for(size_t j = x_size_8; j != x_size; ++j)
209,494,278✔
37
         z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry);
142,945,114✔
38

39
      z[x_size + i] = carry;
66,549,164✔
40
   }
41
}
6,714,126✔
42

43
void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) {
6,032,284✔
44
   if(z_size < 2 * x_size)
6,032,284✔
45
      throw Invalid_Argument("basecase_sqr z_size too small");
×
46

47
   const size_t x_size_8 = x_size - (x_size % 8);
6,032,284✔
48

49
   clear_mem(z, z_size);
6,032,284✔
50

51
   for(size_t i = 0; i != x_size; ++i) {
56,461,525✔
52
      const word x_i = x[i];
50,429,241✔
53

54
      word carry = 0;
50,429,241✔
55

56
      for(size_t j = 0; j != x_size_8; j += 8)
112,808,267✔
57
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
62,379,026✔
58

59
      for(size_t j = x_size_8; j != x_size; ++j)
180,780,308✔
60
         z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry);
130,351,067✔
61

62
      z[x_size + i] = carry;
50,429,241✔
63
   }
64
}
6,032,284✔
65

66
namespace {
67

68
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
69
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
70

71
/*
72
* Karatsuba Multiplication Operation
73
*/
74
void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) {
12,399,592✔
75
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) {
12,399,592✔
76
      switch(N) {
9,011,841✔
77
         case 6:
×
78
            return bigint_comba_mul6(z, x, y);
9,011,841✔
79
         case 8:
×
80
            return bigint_comba_mul8(z, x, y);
×
81
         case 9:
×
82
            return bigint_comba_mul9(z, x, y);
×
83
         case 16:
7,332,768✔
84
            return bigint_comba_mul16(z, x, y);
7,332,768✔
85
         case 24:
1,673,769✔
86
            return bigint_comba_mul24(z, x, y);
1,673,769✔
87
         default:
5,304✔
88
            return basecase_mul(z, 2 * N, x, N, y, N);
5,304✔
89
      }
90
   }
91

92
   const size_t N2 = N / 2;
3,387,751✔
93

94
   const word* x0 = x;
3,387,751✔
95
   const word* x1 = x + N2;
3,387,751✔
96
   const word* y0 = y;
3,387,751✔
97
   const word* y1 = y + N2;
3,387,751✔
98
   word* z0 = z;
3,387,751✔
99
   word* z1 = z + N;
3,387,751✔
100

101
   word* ws0 = workspace;
3,387,751✔
102
   word* ws1 = workspace + N;
3,387,751✔
103

104
   clear_mem(workspace, 2 * N);
3,387,751✔
105

106
   /*
107
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
108
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
109
   * anything, clear_mem above already set the correct result.
110
   *
111
   * However we ignore the result of the comparisons and always perform the
112
   * subtractions and recursively multiply to avoid the timing channel.
113
   */
114

115
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
116
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
3,387,751✔
117
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
3,387,751✔
118
   const auto neg_mask = ~(cmp0 ^ cmp1);
3,387,751✔
119

120
   karatsuba_mul(ws0, z0, z1, N2, ws1);
3,387,751✔
121

122
   // Compute X_lo * Y_lo
123
   karatsuba_mul(z0, x0, y0, N2, ws1);
3,387,751✔
124

125
   // Compute X_hi * Y_hi
126
   karatsuba_mul(z1, x1, y1, N2, ws1);
3,387,751✔
127

128
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
3,387,751✔
129
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
3,387,751✔
130

131
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
3,387,751✔
132
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
3,387,751✔
133

134
   clear_mem(workspace + N, N2);
3,387,751✔
135

136
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2 * N - N2);
3,387,751✔
137
}
138

139
/*
140
* Karatsuba Squaring Operation
141
*/
142
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) {
40,396,348✔
143
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) {
40,396,348✔
144
      switch(N) {
29,389,669✔
145
         case 6:
×
146
            return bigint_comba_sqr6(z, x);
29,389,669✔
147
         case 8:
×
148
            return bigint_comba_sqr8(z, x);
×
149
         case 9:
×
150
            return bigint_comba_sqr9(z, x);
×
151
         case 16:
23,541,132✔
152
            return bigint_comba_sqr16(z, x);
23,541,132✔
153
         case 24:
4,961,097✔
154
            return bigint_comba_sqr24(z, x);
4,961,097✔
155
         default:
887,440✔
156
            return basecase_sqr(z, 2 * N, x, N);
887,440✔
157
      }
158
   }
159

160
   const size_t N2 = N / 2;
11,006,679✔
161

162
   const word* x0 = x;
11,006,679✔
163
   const word* x1 = x + N2;
11,006,679✔
164
   word* z0 = z;
11,006,679✔
165
   word* z1 = z + N;
11,006,679✔
166

167
   word* ws0 = workspace;
11,006,679✔
168
   word* ws1 = workspace + N;
11,006,679✔
169

170
   clear_mem(workspace, 2 * N);
11,006,679✔
171

172
   // See comment in karatsuba_mul
173
   bigint_sub_abs(z0, x0, x1, N2, workspace);
11,006,679✔
174
   karatsuba_sqr(ws0, z0, N2, ws1);
11,006,679✔
175

176
   karatsuba_sqr(z0, x0, N2, ws1);
11,006,679✔
177
   karatsuba_sqr(z1, x1, N2, ws1);
11,006,679✔
178

179
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
11,006,679✔
180
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
11,006,679✔
181

182
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
11,006,679✔
183
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
11,006,679✔
184

185
   /*
186
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
187
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
188
   * timing channel.
189
   */
190
   bigint_sub2(z + N2, 2 * N - N2, ws0, N);
11,006,679✔
191
}
192

193
/*
194
* Pick a good size for the Karatsuba multiply
195
*/
196
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) {
2,375,127✔
197
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
2,375,127✔
198
      return 0;
199

200
   if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2)))
2,375,067✔
201
      return 0;
202

203
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
2,374,469✔
204
   const size_t end = (x_size < y_size) ? x_size : y_size;
2,374,469✔
205

206
   if(start == end) {
2,374,469✔
207
      if(start % 2)
1,331,219✔
208
         return 0;
209
      return start;
1,331,219✔
210
   }
211

212
   for(size_t j = start; j <= end; ++j) {
1,181,619✔
213
      if(j % 2)
1,181,619✔
214
         continue;
138,369✔
215

216
      if(2 * j > z_size)
1,043,250✔
217
         return 0;
218

219
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) {
905,271✔
220
         if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size)
905,271✔
221
            return j + 2;
222
         return j;
904,871✔
223
      }
224
   }
225

226
   return 0;
227
}
228

229
/*
230
* Pick a good size for the Karatsuba squaring
231
*/
232
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) {
8,788,902✔
233
   if(x_sw == x_size) {
8,788,902✔
234
      if(x_sw % 2)
5,266✔
235
         return 0;
236
      return x_sw;
5,265✔
237
   }
238

239
   for(size_t j = x_sw; j <= x_size; ++j) {
10,196,226✔
240
      if(j % 2)
10,196,226✔
241
         continue;
1,412,590✔
242

243
      if(2 * j > z_size)
8,783,636✔
244
         return 0;
245

246
      if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size)
7,371,046✔
247
         return j + 2;
×
248
      return j;
249
   }
250

251
   return 0;
252
}
253

254
template <size_t SZ>
255
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
223,432,212✔
256
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
91,734,401✔
257
}
258

259
template <size_t SZ>
260
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
323,074,773✔
261
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
94,753,992✔
262
}
263

264
}
265

266
void bigint_mul(word z[],
87,329,119✔
267
                size_t z_size,
268
                const word x[],
269
                size_t x_size,
270
                size_t x_sw,
271
                const word y[],
272
                size_t y_size,
273
                size_t y_sw,
274
                word workspace[],
275
                size_t ws_size) {
276
   clear_mem(z, z_size);
87,329,119✔
277

278
   if(x_sw == 1) {
87,329,119✔
279
      bigint_linmul3(z, y, y_sw, x[0]);
2,380,609✔
280
   } else if(y_sw == 1) {
84,948,510✔
281
      bigint_linmul3(z, x, x_sw, y[0]);
5✔
282
   } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) {
84,948,505✔
283
      bigint_comba_mul4(z, x, y);
33,743,742✔
284
   } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) {
51,204,763✔
285
      bigint_comba_mul6(z, x, y);
19,295,219✔
286
   } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) {
31,909,544✔
287
      bigint_comba_mul8(z, x, y);
5,258,093✔
288
   } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) {
26,651,451✔
289
      bigint_comba_mul9(z, x, y);
9,840,685✔
290
   } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) {
16,810,766✔
291
      bigint_comba_mul16(z, x, y);
4,903,583✔
292
   } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) {
11,907,183✔
293
      bigint_comba_mul24(z, x, y);
2,962,278✔
294
   } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) {
8,944,905✔
295
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
6,569,778✔
296
   } else {
297
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
2,375,127✔
298

299
      if(N && z_size >= 2 * N && ws_size >= 2 * N)
2,375,127✔
300
         karatsuba_mul(z, x, y, N, workspace);
2,236,339✔
301
      else
302
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
138,788✔
303
   }
304
}
87,329,119✔
305

306
/*
307
* Squaring Algorithm Dispatcher
308
*/
309
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) {
96,700,401✔
310
   clear_mem(z, z_size);
96,700,401✔
311

312
   BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient");
96,700,401✔
313

314
   if(x_sw == 1) {
96,700,401✔
315
      bigint_linmul3(z, x, x_sw, x[0]);
3,227,364✔
316
   } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) {
93,473,037✔
317
      bigint_comba_sqr4(z, x);
23,259,157✔
318
   } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) {
70,213,880✔
319
      bigint_comba_sqr6(z, x);
16,710,878✔
320
   } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) {
53,503,002✔
321
      bigint_comba_sqr8(z, x);
7,614,672✔
322
   } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) {
45,888,330✔
323
      bigint_comba_sqr9(z, x);
8,134,524✔
324
   } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) {
37,753,806✔
325
      bigint_comba_sqr16(z, x);
15,511,088✔
326
   } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) {
22,242,718✔
327
      bigint_comba_sqr24(z, x);
9,721,755✔
328
   } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) {
12,520,963✔
329
      basecase_sqr(z, z_size, x, x_sw);
3,732,061✔
330
   } else {
331
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
8,788,902✔
332

333
      if(N && z_size >= 2 * N && ws_size >= 2 * N)
8,788,902✔
334
         karatsuba_sqr(z, x, N, workspace);
7,376,311✔
335
      else
336
         basecase_sqr(z, z_size, x, x_sw);
1,412,591✔
337
   }
338
}
96,700,401✔
339

340
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc