• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

openmc-dev / openmc / 23011177584

12 Mar 2026 03:57PM UTC coverage: 80.895% (-0.7%) from 81.566%
23011177584

Pull #3863

github

web-flow
Merge 00a6e11ae into ba94c5823
Pull Request #3863: Shared Secondary Particle Bank

16504 of 23690 branches covered (69.67%)

Branch coverage included in aggregate %.

261 of 367 new or added lines in 17 files covered. (71.12%)

609 existing lines in 46 files now uncovered.

56448 of 66491 relevant lines covered (84.9%)

25290933.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.77
/src/bank.cpp
1
#include "openmc/bank.h"
2
#include "openmc/capi.h"
3
#include "openmc/error.h"
4
#include "openmc/ifp.h"
5
#include "openmc/message_passing.h"
6
#include "openmc/simulation.h"
7
#include "openmc/vector.h"
8

9
#include <cstdint>
10
#include <limits>
11
#include <numeric>
12

13
namespace openmc {
14

15
//==============================================================================
16
// Global variables
17
//==============================================================================
18

19
namespace simulation {
20

21
vector<SourceSite> source_bank;
22

23
SharedArray<SourceSite> surf_source_bank;
24

25
SharedArray<CollisionTrackSite> collision_track_bank;
26

27
// The fission bank is allocated as a SharedArray, rather than a vector, as it
28
// will be shared by all threads in the simulation. It will be allocated to a
29
// fixed maximum capacity in the init_fission_bank() function. Then, Elements
30
// will be added to it by using SharedArray's special thread_safe_append()
31
// function.
32
SharedArray<SourceSite> fission_bank;
33

34
vector<vector<int>> ifp_source_delayed_group_bank;
35

36
vector<vector<double>> ifp_source_lifetime_bank;
37

38
vector<vector<int>> ifp_fission_delayed_group_bank;
39

40
vector<vector<double>> ifp_fission_lifetime_bank;
41

42
// Each entry in this vector corresponds to the number of progeny produced
43
// this generation for the particle located at that index. This vector is
44
// used to efficiently sort the fission bank after each iteration.
45
vector<int64_t> progeny_per_particle;
46

47
// When shared secondary bank mode is enabled, secondaries produced during
48
// transport are collected in the write bank. When a secondary generation is
49
// complete, write is moved to read for transport, and a new empty write bank
50
// is created. This repeats until no secondaries remain.
51
SharedArray<SourceSite> shared_secondary_bank_read;
52
SharedArray<SourceSite> shared_secondary_bank_write;
53

54
} // namespace simulation
55

56
//==============================================================================
57
// Non-member functions
58
//==============================================================================
59

60
void free_memory_bank()
2,704✔
61
{
62
  simulation::source_bank.clear();
2,704✔
63
  simulation::surf_source_bank.clear();
2,704✔
64
  simulation::collision_track_bank.clear();
2,704✔
65
  simulation::fission_bank.clear();
2,704✔
66
  simulation::progeny_per_particle.clear();
2,704✔
67
  simulation::ifp_source_delayed_group_bank.clear();
2,704✔
68
  simulation::ifp_source_lifetime_bank.clear();
2,704✔
69
  simulation::ifp_fission_delayed_group_bank.clear();
2,704✔
70
  simulation::ifp_fission_lifetime_bank.clear();
2,704✔
71
  simulation::shared_secondary_bank_read.clear();
2,704✔
72
  simulation::shared_secondary_bank_write.clear();
2,704✔
73
}
2,704✔
74

75
void init_fission_bank(int64_t max)
1,216✔
76
{
77
  simulation::fission_bank.reserve(max);
1,216✔
78
  simulation::progeny_per_particle.resize(simulation::work_per_rank);
1,216✔
79
}
1,216✔
80

81
// Performs an O(n) sort on a fission or secondary bank, by leveraging
82
// the parent_id and progeny_id fields of banked particles. See the following
83
// paper for more details:
84
// "Reproducibility and Monte Carlo Eigenvalue Calculations," F.B. Brown and
85
// T.M. Sutton, 1992 ANS Annual Meeting, Transactions of the American Nuclear
86
// Society, Volume 65, Page 235.
87
void sort_bank(SharedArray<SourceSite>& bank, bool is_fission_bank)
29,153✔
88
{
89
  // Ensure we don't read off the end of the array if we ran with 0 particles
90
  if (simulation::progeny_per_particle.size() == 0) {
29,153!
91
    return;
×
92
  }
93

94
  // Perform exclusive scan summation to determine starting indices in fission
95
  // bank for each parent particle id
96
  std::exclusive_scan(simulation::progeny_per_particle.begin(),
29,153✔
97
    simulation::progeny_per_particle.end(),
98
    simulation::progeny_per_particle.begin(), 0);
99

100
  // We need a scratch vector to make permutation of the fission bank into
101
  // sorted order easy. Under normal usage conditions, the fission bank is
102
  // over provisioned, so we can use that as scratch space.
103
  SourceSite* sorted_bank;
29,153✔
104
  vector<SourceSite> sorted_bank_holder;
29,153✔
105
  vector<vector<int>> sorted_ifp_delayed_group_bank;
29,153✔
106
  vector<vector<double>> sorted_ifp_lifetime_bank;
29,153✔
107

108
  // If there is not enough space, allocate a temporary vector and point to it
109
  if (bank.size() > bank.capacity() / 2) {
29,153✔
110
    sorted_bank_holder.resize(bank.size());
2,721✔
111
    sorted_bank = sorted_bank_holder.data();
2,721✔
112
  } else { // otherwise, point sorted_bank to unused portion of the fission bank
113
    sorted_bank = bank.data() + bank.size();
26,432✔
114
  }
115

116
  if (settings::ifp_on && is_fission_bank) {
29,153!
117
    allocate_temporary_vector_ifp(
480✔
118
      sorted_ifp_delayed_group_bank, sorted_ifp_lifetime_bank);
119
  }
120

121
  // Use parent and progeny indices to sort bank
122
  for (int64_t i = 0; i < bank.size(); i++) {
61,202,012✔
123
    const auto& site = bank[i];
61,172,859!
124
    if (site.parent_id < 0 ||
61,172,859!
125
        site.parent_id >=
61,172,859!
126
          static_cast<int64_t>(simulation::progeny_per_particle.size())) {
61,172,859!
NEW
127
      fatal_error(fmt::format("Invalid parent_id {} for banked site (expected "
×
128
                              "range [0, {})).",
NEW
129
        site.parent_id, simulation::progeny_per_particle.size()));
×
130
    }
131
    int64_t idx =
61,172,859✔
132
      simulation::progeny_per_particle[site.parent_id] + site.progeny_id;
61,172,859!
133
    if (idx < 0 || idx >= bank.size()) {
61,172,859!
UNCOV
134
      fatal_error("Mismatch detected between sum of all particle progeny and "
×
135
                  "bank size during sorting.");
136
    }
137
    sorted_bank[idx] = site;
61,172,859✔
138
    if (settings::ifp_on && is_fission_bank) {
61,172,859!
139
      copy_ifp_data_from_fission_banks(
491,864✔
140
        i, sorted_ifp_delayed_group_bank[idx], sorted_ifp_lifetime_bank[idx]);
491,864✔
141
    }
142
  }
143

144
  // Copy sorted bank into the fission bank
145
  std::copy(sorted_bank, sorted_bank + bank.size(), bank.data());
29,153✔
146
  if (settings::ifp_on && is_fission_bank) {
29,153!
147
    copy_ifp_data_to_fission_banks(
480✔
148
      sorted_ifp_delayed_group_bank.data(), sorted_ifp_lifetime_bank.data());
480✔
149
  }
150
}
29,153✔
151

152
// This function redistributes SourceSite particles across MPI ranks to
153
// achieve load balancing while preserving the global ordering of particles.
154
//
155
// GUARANTEES:
156
// -----------
157
// 1. Global Order Preservation: After redistribution, each rank holds a
158
//    contiguous slice of the original global ordering. For example, if the
159
//    input across 3 ranks was:
160
//      - Rank 0: IDs 0-4
161
//      - Rank 1: IDs 5-6
162
//      - Rank 2: IDs 7-200
163
//    Then after redistribution (assuming ~67 particles per rank):
164
//      - Rank 0: IDs 0-66   (contiguous)
165
//      - Rank 1: IDs 67-133 (contiguous)
166
//      - Rank 2: IDs 134-200 (contiguous)
167
//    The global ordering is always preserved - no rank will ever hold
168
//    non-contiguous ID ranges like "0-4 and 100-200".
169
//
170
// 2. Even Load Balancing: Particles are distributed as evenly as possible.
171
//    If total % n_procs != 0, the first 'remainder' ranks each get one extra
172
//    particle (i.e., floor division with remainder distributed to lower
173
//    ranks). This follows the same logic as calculate_work().
174
//
175
// HOW IT WORKS:
176
// -------------
177
// The algorithm uses overlap-based redistribution:
178
// 1. Each rank's current data occupies a range [cumulative_before[rank],
179
//    cumulative_before[rank+1]) in the global index space.
180
// 2. Each rank's target data should occupy [cumulative_target[rank],
181
//    cumulative_target[rank+1]) in the same global index space.
182
// 3. For each pair of (source_rank, dest_rank), we calculate the overlap
183
//    between what source_rank currently has and what dest_rank needs.
184
// 4. MPI_Alltoallv transfers exactly these overlapping regions, with
185
//    displacements ensuring data lands at the correct position in the
186
//    receiving buffer.
187
//
188
// EDGE CASES HANDLED:
189
// -------------------
190
// - Single rank (n_procs == 1): Returns immediately with local size, no MPI.
191
// - Empty total (all ranks have 0 particles): Returns 0 immediately.
192
// - Imbalanced input (e.g., one rank has all particles): Works correctly;
193
//   that rank will send portions to all other ranks based on target ranges.
194
// - Non-divisible totals: First 'remainder' ranks get one extra particle.
195
int64_t synchronize_global_secondary_bank(
2,581✔
196
  SharedArray<SourceSite>& shared_secondary_bank)
197
{
198
  // Get current size of local bank
199
  int64_t local_size = shared_secondary_bank.size();
2,581✔
200

201
  if (mpi::n_procs == 1) {
2,581✔
202
    return local_size;
203
  }
204

205
#ifdef OPENMC_MPI
206
  // Gather all sizes to all ranks
207
  vector<int64_t> all_sizes(mpi::n_procs);
208
  MPI_Allgather(&local_size, 1, MPI_INT64_T, all_sizes.data(), 1, MPI_INT64_T,
209
    mpi::intracomm);
210

211
  // Calculate total and check for empty case
212
  int64_t total = 0;
213
  for (int64_t size : all_sizes) {
214
    total += size;
215
  }
216

217
  // If we don't have any items to distribute, return
218
  if (total == 0) {
219
    return total;
220
  }
221

222
  int64_t base_count = total / mpi::n_procs;
223
  int64_t remainder = total % mpi::n_procs;
224

225
  // Calculate target size for each rank
226
  // First 'remainder' ranks get base_count + 1, rest get base_count
227
  vector<int64_t> target_sizes(mpi::n_procs);
228
  for (int i = 0; i < mpi::n_procs; ++i) {
229
    target_sizes[i] = base_count + (i < remainder ? 1 : 0);
230
  }
231

232
  // Calculate send and receive counts in terms of SourceSite objects
233
  // (not bytes)
234
  vector<int> send_counts(mpi::n_procs, 0);
235
  vector<int> recv_counts(mpi::n_procs, 0);
236
  vector<int> send_displs(mpi::n_procs, 0);
237
  vector<int> recv_displs(mpi::n_procs, 0);
238

239
  // Calculate cumulative positions (starting index for each rank in the
240
  // global array)
241
  vector<int64_t> cumulative_before(mpi::n_procs + 1, 0);
242
  vector<int64_t> cumulative_target(mpi::n_procs + 1, 0);
243
  for (int i = 0; i < mpi::n_procs; ++i) {
244
    cumulative_before[i + 1] = cumulative_before[i] + all_sizes[i];
245
    cumulative_target[i + 1] = cumulative_target[i] + target_sizes[i];
246
  }
247

248
  // Determine send and receive amounts for each rank
249
  int64_t my_start = cumulative_before[mpi::rank];
250
  int64_t my_end = cumulative_before[mpi::rank + 1];
251
  int64_t my_target_start = cumulative_target[mpi::rank];
252
  int64_t my_target_end = cumulative_target[mpi::rank + 1];
253

254
  for (int r = 0; r < mpi::n_procs; ++r) {
255
    // Send: overlap between my current range and rank r's target range
256
    int64_t send_overlap_start = std::max(my_start, cumulative_target[r]);
257
    int64_t send_overlap_end = std::min(my_end, cumulative_target[r + 1]);
258
    if (send_overlap_start < send_overlap_end) {
259
      int64_t count = send_overlap_end - send_overlap_start;
260
      int64_t displ = send_overlap_start - my_start;
261
      if (count > std::numeric_limits<int>::max() ||
262
          displ > std::numeric_limits<int>::max()) {
263
        fatal_error("Secondary bank size exceeds MPI_Alltoallv int limit.");
264
      }
265
      send_counts[r] = static_cast<int>(count);
266
      send_displs[r] = static_cast<int>(displ);
267
    }
268

269
    // Recv: overlap between rank r's current range and my target range
270
    int64_t recv_overlap_start =
271
      std::max(cumulative_before[r], my_target_start);
272
    int64_t recv_overlap_end =
273
      std::min(cumulative_before[r + 1], my_target_end);
274
    if (recv_overlap_start < recv_overlap_end) {
275
      int64_t count = recv_overlap_end - recv_overlap_start;
276
      int64_t displ = recv_overlap_start - my_target_start;
277
      if (count > std::numeric_limits<int>::max() ||
278
          displ > std::numeric_limits<int>::max()) {
279
        fatal_error("Secondary bank size exceeds MPI_Alltoallv int limit.");
280
      }
281
      recv_counts[r] = static_cast<int>(count);
282
      recv_displs[r] = static_cast<int>(displ);
283
    }
284
  }
285

286
  // Prepare receive buffer with target size
287
  SharedArray<SourceSite> new_bank(target_sizes[mpi::rank]);
288

289
  // Perform all-to-all redistribution using the custom MPI type
290
  MPI_Alltoallv(shared_secondary_bank.data(), send_counts.data(),
291
    send_displs.data(), mpi::source_site, new_bank.data(), recv_counts.data(),
292
    recv_displs.data(), mpi::source_site, mpi::intracomm);
293

294
  // Replace old bank with redistributed data
295
  shared_secondary_bank = std::move(new_bank);
296

297
  return total;
298
#else
299
  return local_size;
300
#endif
301
}
302

303
//==============================================================================
304
// C API
305
//==============================================================================
306

307
extern "C" int openmc_source_bank(void** ptr, int64_t* n)
4✔
308
{
309
  if (!ptr || !n) {
4!
310
    set_errmsg("Received null pointer.");
×
311
    return OPENMC_E_INVALID_ARGUMENT;
×
312
  }
313

314
  if (simulation::source_bank.size() == 0) {
4!
315
    set_errmsg("Source bank has not been allocated.");
×
316
    return OPENMC_E_ALLOCATE;
×
317
  } else {
318
    *ptr = simulation::source_bank.data();
4✔
319
    *n = simulation::source_bank.size();
4✔
320
    return 0;
4✔
321
  }
322
}
323

324
extern "C" int openmc_fission_bank(void** ptr, int64_t* n)
×
325
{
326
  if (!ptr || !n) {
×
327
    set_errmsg("Received null pointer.");
×
328
    return OPENMC_E_INVALID_ARGUMENT;
×
329
  }
330

331
  if (simulation::fission_bank.size() == 0) {
×
332
    set_errmsg("Fission bank has not been allocated.");
×
333
    return OPENMC_E_ALLOCATE;
×
334
  } else {
335
    *ptr = simulation::fission_bank.data();
×
336
    *n = simulation::fission_bank.size();
×
337
    return 0;
×
338
  }
339
}
340

341
} // namespace openmc
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc