23011177584

Committed 12 Mar 2026 03:57PM UTC coverage: 80.895% (-0.7%) from 81.566%

Build # 23011177584

Build Type

Pull #3863

github

Committed by

web-flow

Commit Message

Merge 00a6e11ae into ba94c5823

Pull Request Pull Request #3863: Shared Secondary Particle Bank

Coverage Stats

16504 of 23690 branches covered (69.67%)

Branch coverage included in aggregate %.

261 of 367 new or added lines in 17 files covered. (71.12%)

609 existing lines in 46 files now uncovered.

56448 of 66491 relevant lines covered (84.9%)

25290933.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.77

/src/bank.cpp

#include "openmc/bank.h"
#include "openmc/capi.h"
#include "openmc/error.h"
#include "openmc/ifp.h"
#include "openmc/message_passing.h"
#include "openmc/simulation.h"
#include "openmc/vector.h"

#include <cstdint>
#include <limits>
#include <numeric>

namespace openmc {

//==============================================================================
// Global variables
//==============================================================================

namespace simulation {

vector<SourceSite> source_bank;

SharedArray<SourceSite> surf_source_bank;

SharedArray<CollisionTrackSite> collision_track_bank;

// The fission bank is allocated as a SharedArray, rather than a vector, as it
// will be shared by all threads in the simulation. It will be allocated to a
// fixed maximum capacity in the init_fission_bank() function. Then, Elements
// will be added to it by using SharedArray's special thread_safe_append()
// function.
SharedArray<SourceSite> fission_bank;

vector<vector<int>> ifp_source_delayed_group_bank;

vector<vector<double>> ifp_source_lifetime_bank;

vector<vector<int>> ifp_fission_delayed_group_bank;

vector<vector<double>> ifp_fission_lifetime_bank;

// Each entry in this vector corresponds to the number of progeny produced
// this generation for the particle located at that index. This vector is
// used to efficiently sort the fission bank after each iteration.
vector<int64_t> progeny_per_particle;

// When shared secondary bank mode is enabled, secondaries produced during
// transport are collected in the write bank. When a secondary generation is
// complete, write is moved to read for transport, and a new empty write bank
// is created. This repeats until no secondaries remain.
SharedArray<SourceSite> shared_secondary_bank_read;
SharedArray<SourceSite> shared_secondary_bank_write;

} // namespace simulation

//==============================================================================
// Non-member functions
//==============================================================================

void free_memory_bank()
{
  simulation::source_bank.clear();
  simulation::surf_source_bank.clear();
  simulation::collision_track_bank.clear();
  simulation::fission_bank.clear();
  simulation::progeny_per_particle.clear();
  simulation::ifp_source_delayed_group_bank.clear();
  simulation::ifp_source_lifetime_bank.clear();
  simulation::ifp_fission_delayed_group_bank.clear();
  simulation::ifp_fission_lifetime_bank.clear();
  simulation::shared_secondary_bank_read.clear();
  simulation::shared_secondary_bank_write.clear();
}

void init_fission_bank(int64_t max)
{
  simulation::fission_bank.reserve(max);
  simulation::progeny_per_particle.resize(simulation::work_per_rank);
}

// Performs an O(n) sort on a fission or secondary bank, by leveraging
// the parent_id and progeny_id fields of banked particles. See the following
// paper for more details:
// "Reproducibility and Monte Carlo Eigenvalue Calculations," F.B. Brown and
// T.M. Sutton, 1992 ANS Annual Meeting, Transactions of the American Nuclear
// Society, Volume 65, Page 235.
void sort_bank(SharedArray<SourceSite>& bank, bool is_fission_bank)
{
  // Ensure we don't read off the end of the array if we ran with 0 particles
  if (simulation::progeny_per_particle.size() == 0) {
    return;
  }

  // Perform exclusive scan summation to determine starting indices in fission
  // bank for each parent particle id
  std::exclusive_scan(simulation::progeny_per_particle.begin(),
    simulation::progeny_per_particle.end(),
    simulation::progeny_per_particle.begin(), 0);

  // We need a scratch vector to make permutation of the fission bank into
  // sorted order easy. Under normal usage conditions, the fission bank is
  // over provisioned, so we can use that as scratch space.
  SourceSite* sorted_bank;
  vector<SourceSite> sorted_bank_holder;
  vector<vector<int>> sorted_ifp_delayed_group_bank;
  vector<vector<double>> sorted_ifp_lifetime_bank;

  // If there is not enough space, allocate a temporary vector and point to it
  if (bank.size() > bank.capacity() / 2) {
    sorted_bank_holder.resize(bank.size());
    sorted_bank = sorted_bank_holder.data();
  } else { // otherwise, point sorted_bank to unused portion of the fission bank
    sorted_bank = bank.data() + bank.size();
  }

  if (settings::ifp_on && is_fission_bank) {
    allocate_temporary_vector_ifp(
      sorted_ifp_delayed_group_bank, sorted_ifp_lifetime_bank);
  }

  // Use parent and progeny indices to sort bank
  for (int64_t i = 0; i < bank.size(); i++) {
    const auto& site = bank[i];
    if (site.parent_id < 0 ||
        site.parent_id >=
          static_cast<int64_t>(simulation::progeny_per_particle.size())) {
      fatal_error(fmt::format("Invalid parent_id {} for banked site (expected "
                              "range [0, {})).",
        site.parent_id, simulation::progeny_per_particle.size()));
    }
    int64_t idx =
      simulation::progeny_per_particle[site.parent_id] + site.progeny_id;
    if (idx < 0 || idx >= bank.size()) {
      fatal_error("Mismatch detected between sum of all particle progeny and "
                  "bank size during sorting.");
    }
    sorted_bank[idx] = site;
    if (settings::ifp_on && is_fission_bank) {
      copy_ifp_data_from_fission_banks(
        i, sorted_ifp_delayed_group_bank[idx], sorted_ifp_lifetime_bank[idx]);
    }
  }

  // Copy sorted bank into the fission bank
  std::copy(sorted_bank, sorted_bank + bank.size(), bank.data());
  if (settings::ifp_on && is_fission_bank) {
    copy_ifp_data_to_fission_banks(
      sorted_ifp_delayed_group_bank.data(), sorted_ifp_lifetime_bank.data());
  }
}

// This function redistributes SourceSite particles across MPI ranks to
// achieve load balancing while preserving the global ordering of particles.
//
// GUARANTEES:
// -----------
// 1. Global Order Preservation: After redistribution, each rank holds a
//    contiguous slice of the original global ordering. For example, if the
//    input across 3 ranks was:
//      - Rank 0: IDs 0-4
//      - Rank 1: IDs 5-6
//      - Rank 2: IDs 7-200
//    Then after redistribution (assuming ~67 particles per rank):
//      - Rank 0: IDs 0-66   (contiguous)
//      - Rank 1: IDs 67-133 (contiguous)
//      - Rank 2: IDs 134-200 (contiguous)
//    The global ordering is always preserved - no rank will ever hold
//    non-contiguous ID ranges like "0-4 and 100-200".
//
// 2. Even Load Balancing: Particles are distributed as evenly as possible.
//    If total % n_procs != 0, the first 'remainder' ranks each get one extra
//    particle (i.e., floor division with remainder distributed to lower
//    ranks). This follows the same logic as calculate_work().
//
// HOW IT WORKS:
// -------------
// The algorithm uses overlap-based redistribution:
// 1. Each rank's current data occupies a range [cumulative_before[rank],
//    cumulative_before[rank+1]) in the global index space.
// 2. Each rank's target data should occupy [cumulative_target[rank],
//    cumulative_target[rank+1]) in the same global index space.
// 3. For each pair of (source_rank, dest_rank), we calculate the overlap
//    between what source_rank currently has and what dest_rank needs.
// 4. MPI_Alltoallv transfers exactly these overlapping regions, with
//    displacements ensuring data lands at the correct position in the
//    receiving buffer.
//
// EDGE CASES HANDLED:
// -------------------
// - Single rank (n_procs == 1): Returns immediately with local size, no MPI.
// - Empty total (all ranks have 0 particles): Returns 0 immediately.
// - Imbalanced input (e.g., one rank has all particles): Works correctly;
//   that rank will send portions to all other ranks based on target ranges.
// - Non-divisible totals: First 'remainder' ranks get one extra particle.
int64_t synchronize_global_secondary_bank(
  SharedArray<SourceSite>& shared_secondary_bank)
{
  // Get current size of local bank
  int64_t local_size = shared_secondary_bank.size();

  if (mpi::n_procs == 1) {
    return local_size;
  }

#ifdef OPENMC_MPI
  // Gather all sizes to all ranks
  vector<int64_t> all_sizes(mpi::n_procs);
  MPI_Allgather(&local_size, 1, MPI_INT64_T, all_sizes.data(), 1, MPI_INT64_T,
    mpi::intracomm);

  // Calculate total and check for empty case
  int64_t total = 0;
  for (int64_t size : all_sizes) {
    total += size;
  }

  // If we don't have any items to distribute, return
  if (total == 0) {
    return total;
  }

  int64_t base_count = total / mpi::n_procs;
  int64_t remainder = total % mpi::n_procs;

  // Calculate target size for each rank
  // First 'remainder' ranks get base_count + 1, rest get base_count
  vector<int64_t> target_sizes(mpi::n_procs);
  for (int i = 0; i < mpi::n_procs; ++i) {
    target_sizes[i] = base_count + (i < remainder ? 1 : 0);
  }

  // Calculate send and receive counts in terms of SourceSite objects
  // (not bytes)
  vector<int> send_counts(mpi::n_procs, 0);
  vector<int> recv_counts(mpi::n_procs, 0);
  vector<int> send_displs(mpi::n_procs, 0);
  vector<int> recv_displs(mpi::n_procs, 0);

  // Calculate cumulative positions (starting index for each rank in the
  // global array)
  vector<int64_t> cumulative_before(mpi::n_procs + 1, 0);
  vector<int64_t> cumulative_target(mpi::n_procs + 1, 0);
  for (int i = 0; i < mpi::n_procs; ++i) {
    cumulative_before[i + 1] = cumulative_before[i] + all_sizes[i];
    cumulative_target[i + 1] = cumulative_target[i] + target_sizes[i];
  }

  // Determine send and receive amounts for each rank
  int64_t my_start = cumulative_before[mpi::rank];
  int64_t my_end = cumulative_before[mpi::rank + 1];
  int64_t my_target_start = cumulative_target[mpi::rank];
  int64_t my_target_end = cumulative_target[mpi::rank + 1];

  for (int r = 0; r < mpi::n_procs; ++r) {
    // Send: overlap between my current range and rank r's target range
    int64_t send_overlap_start = std::max(my_start, cumulative_target[r]);
    int64_t send_overlap_end = std::min(my_end, cumulative_target[r + 1]);
    if (send_overlap_start < send_overlap_end) {
      int64_t count = send_overlap_end - send_overlap_start;
      int64_t displ = send_overlap_start - my_start;
      if (count > std::numeric_limits<int>::max() ||
          displ > std::numeric_limits<int>::max()) {
        fatal_error("Secondary bank size exceeds MPI_Alltoallv int limit.");
      }
      send_counts[r] = static_cast<int>(count);
      send_displs[r] = static_cast<int>(displ);
    }

    // Recv: overlap between rank r's current range and my target range
    int64_t recv_overlap_start =
      std::max(cumulative_before[r], my_target_start);
    int64_t recv_overlap_end =
      std::min(cumulative_before[r + 1], my_target_end);
    if (recv_overlap_start < recv_overlap_end) {
      int64_t count = recv_overlap_end - recv_overlap_start;
      int64_t displ = recv_overlap_start - my_target_start;
      if (count > std::numeric_limits<int>::max() ||
          displ > std::numeric_limits<int>::max()) {
        fatal_error("Secondary bank size exceeds MPI_Alltoallv int limit.");
      }
      recv_counts[r] = static_cast<int>(count);
      recv_displs[r] = static_cast<int>(displ);
    }
  }

  // Prepare receive buffer with target size
  SharedArray<SourceSite> new_bank(target_sizes[mpi::rank]);

  // Perform all-to-all redistribution using the custom MPI type
  MPI_Alltoallv(shared_secondary_bank.data(), send_counts.data(),
    send_displs.data(), mpi::source_site, new_bank.data(), recv_counts.data(),
    recv_displs.data(), mpi::source_site, mpi::intracomm);

  // Replace old bank with redistributed data
  shared_secondary_bank = std::move(new_bank);

  return total;
#else
  return local_size;
#endif
}

//==============================================================================
// C API
//==============================================================================

extern "C" int openmc_source_bank(void** ptr, int64_t* n)
{
  if (!ptr || !n) {
    set_errmsg("Received null pointer.");
    return OPENMC_E_INVALID_ARGUMENT;
  }

  if (simulation::source_bank.size() == 0) {
    set_errmsg("Source bank has not been allocated.");
    return OPENMC_E_ALLOCATE;
  } else {
    *ptr = simulation::source_bank.data();
    *n = simulation::source_bank.size();
    return 0;
  }
}

extern "C" int openmc_fission_bank(void** ptr, int64_t* n)
{
  if (!ptr || !n) {
    set_errmsg("Received null pointer.");
    return OPENMC_E_INVALID_ARGUMENT;
  }

  if (simulation::fission_bank.size() == 0) {
    set_errmsg("Fission bank has not been allocated.");
    return OPENMC_E_ALLOCATE;
  } else {
    *ptr = simulation::fission_bank.data();
    *n = simulation::fission_bank.size();
    return 0;
  }
}

} // namespace openmc

1	#include "openmc/bank.h"
2	#include "openmc/capi.h"
3	#include "openmc/error.h"
4	#include "openmc/ifp.h"
5	#include "openmc/message_passing.h"
6	#include "openmc/simulation.h"
7	#include "openmc/vector.h"
8
9	#include <cstdint>
10	#include <limits>
11	#include <numeric>
12
13	namespace openmc {
14
15	//==============================================================================
16	// Global variables
17	//==============================================================================
18
19	namespace simulation {
20
21	vector<SourceSite> source_bank;
22
23	SharedArray<SourceSite> surf_source_bank;
24
25	SharedArray<CollisionTrackSite> collision_track_bank;
26
27	// The fission bank is allocated as a SharedArray, rather than a vector, as it
28	// will be shared by all threads in the simulation. It will be allocated to a
29	// fixed maximum capacity in the init_fission_bank() function. Then, Elements
30	// will be added to it by using SharedArray's special thread_safe_append()
31	// function.
32	SharedArray<SourceSite> fission_bank;
33
34	vector<vector<int>> ifp_source_delayed_group_bank;
35
36	vector<vector<double>> ifp_source_lifetime_bank;
37
38	vector<vector<int>> ifp_fission_delayed_group_bank;
39
40	vector<vector<double>> ifp_fission_lifetime_bank;
41
42	// Each entry in this vector corresponds to the number of progeny produced
43	// this generation for the particle located at that index. This vector is
44	// used to efficiently sort the fission bank after each iteration.
45	vector<int64_t> progeny_per_particle;
46
47	// When shared secondary bank mode is enabled, secondaries produced during
48	// transport are collected in the write bank. When a secondary generation is
49	// complete, write is moved to read for transport, and a new empty write bank
50	// is created. This repeats until no secondaries remain.
51	SharedArray<SourceSite> shared_secondary_bank_read;
52	SharedArray<SourceSite> shared_secondary_bank_write;
53
54	} // namespace simulation
55
56	//==============================================================================
57	// Non-member functions
58	//==============================================================================
59
60	void free_memory_bank()	2,704✔
61	{
62	simulation::source_bank.clear();	2,704✔
63	simulation::surf_source_bank.clear();	2,704✔
64	simulation::collision_track_bank.clear();	2,704✔
65	simulation::fission_bank.clear();	2,704✔
66	simulation::progeny_per_particle.clear();	2,704✔
67	simulation::ifp_source_delayed_group_bank.clear();	2,704✔
68	simulation::ifp_source_lifetime_bank.clear();	2,704✔
69	simulation::ifp_fission_delayed_group_bank.clear();	2,704✔
70	simulation::ifp_fission_lifetime_bank.clear();	2,704✔
71	simulation::shared_secondary_bank_read.clear();	2,704✔
72	simulation::shared_secondary_bank_write.clear();	2,704✔
73	}	2,704✔
74
75	void init_fission_bank(int64_t max)	1,216✔
76	{
77	simulation::fission_bank.reserve(max);	1,216✔
78	simulation::progeny_per_particle.resize(simulation::work_per_rank);	1,216✔
79	}	1,216✔
80
81	// Performs an O(n) sort on a fission or secondary bank, by leveraging
82	// the parent_id and progeny_id fields of banked particles. See the following
83	// paper for more details:
84	// "Reproducibility and Monte Carlo Eigenvalue Calculations," F.B. Brown and
85	// T.M. Sutton, 1992 ANS Annual Meeting, Transactions of the American Nuclear
86	// Society, Volume 65, Page 235.
87	void sort_bank(SharedArray<SourceSite>& bank, bool is_fission_bank)	29,153✔
88	{
89	// Ensure we don't read off the end of the array if we ran with 0 particles
90	if (simulation::progeny_per_particle.size() == 0) {	29,153!
91	return;	×
92	}
93
94	// Perform exclusive scan summation to determine starting indices in fission
95	// bank for each parent particle id
96	std::exclusive_scan(simulation::progeny_per_particle.begin(),	29,153✔
97	simulation::progeny_per_particle.end(),
98	simulation::progeny_per_particle.begin(), 0);
99
100	// We need a scratch vector to make permutation of the fission bank into
101	// sorted order easy. Under normal usage conditions, the fission bank is
102	// over provisioned, so we can use that as scratch space.
103	SourceSite* sorted_bank;	29,153✔
104	vector<SourceSite> sorted_bank_holder;	29,153✔
105	vector<vector<int>> sorted_ifp_delayed_group_bank;	29,153✔
106	vector<vector<double>> sorted_ifp_lifetime_bank;	29,153✔
107
108	// If there is not enough space, allocate a temporary vector and point to it
109	if (bank.size() > bank.capacity() / 2) {	29,153✔
110	sorted_bank_holder.resize(bank.size());	2,721✔
111	sorted_bank = sorted_bank_holder.data();	2,721✔
112	} else { // otherwise, point sorted_bank to unused portion of the fission bank
113	sorted_bank = bank.data() + bank.size();	26,432✔
114	}
115
116	if (settings::ifp_on && is_fission_bank) {	29,153!
117	allocate_temporary_vector_ifp(	480✔
118	sorted_ifp_delayed_group_bank, sorted_ifp_lifetime_bank);
119	}
120
121	// Use parent and progeny indices to sort bank
122	for (int64_t i = 0; i < bank.size(); i++) {	61,202,012✔
123	const auto& site = bank[i];	61,172,859!
124	if (site.parent_id < 0 \|\|	61,172,859!
125	site.parent_id >=	61,172,859!
126	static_cast<int64_t>(simulation::progeny_per_particle.size())) {	61,172,859!
NEW 127	fatal_error(fmt::format("Invalid parent_id {} for banked site (expected "	×
128	"range [0, {})).",
NEW 129	site.parent_id, simulation::progeny_per_particle.size()));	×
130	}
131	int64_t idx =	61,172,859✔
132	simulation::progeny_per_particle[site.parent_id] + site.progeny_id;	61,172,859!
133	if (idx < 0 \|\| idx >= bank.size()) {	61,172,859!
UNCOV 134	fatal_error("Mismatch detected between sum of all particle progeny and "	×
135	"bank size during sorting.");
136	}
137	sorted_bank[idx] = site;	61,172,859✔
138	if (settings::ifp_on && is_fission_bank) {	61,172,859!
139	copy_ifp_data_from_fission_banks(	491,864✔
140	i, sorted_ifp_delayed_group_bank[idx], sorted_ifp_lifetime_bank[idx]);	491,864✔
141	}
142	}
143
144	// Copy sorted bank into the fission bank
145	std::copy(sorted_bank, sorted_bank + bank.size(), bank.data());	29,153✔
146	if (settings::ifp_on && is_fission_bank) {	29,153!
147	copy_ifp_data_to_fission_banks(	480✔
148	sorted_ifp_delayed_group_bank.data(), sorted_ifp_lifetime_bank.data());	480✔
149	}
150	}	29,153✔
151
152	// This function redistributes SourceSite particles across MPI ranks to
153	// achieve load balancing while preserving the global ordering of particles.
154	//
155	// GUARANTEES:
156	// -----------
157	// 1. Global Order Preservation: After redistribution, each rank holds a
158	// contiguous slice of the original global ordering. For example, if the
159	// input across 3 ranks was:
160	// - Rank 0: IDs 0-4
161	// - Rank 1: IDs 5-6
162	// - Rank 2: IDs 7-200
163	// Then after redistribution (assuming ~67 particles per rank):
164	// - Rank 0: IDs 0-66 (contiguous)
165	// - Rank 1: IDs 67-133 (contiguous)
166	// - Rank 2: IDs 134-200 (contiguous)
167	// The global ordering is always preserved - no rank will ever hold
168	// non-contiguous ID ranges like "0-4 and 100-200".
169	//
170	// 2. Even Load Balancing: Particles are distributed as evenly as possible.
171	// If total % n_procs != 0, the first 'remainder' ranks each get one extra
172	// particle (i.e., floor division with remainder distributed to lower
173	// ranks). This follows the same logic as calculate_work().
174	//
175	// HOW IT WORKS:
176	// -------------
177	// The algorithm uses overlap-based redistribution:
178	// 1. Each rank's current data occupies a range [cumulative_before[rank],
179	// cumulative_before[rank+1]) in the global index space.
180	// 2. Each rank's target data should occupy [cumulative_target[rank],
181	// cumulative_target[rank+1]) in the same global index space.
182	// 3. For each pair of (source_rank, dest_rank), we calculate the overlap
183	// between what source_rank currently has and what dest_rank needs.
184	// 4. MPI_Alltoallv transfers exactly these overlapping regions, with
185	// displacements ensuring data lands at the correct position in the
186	// receiving buffer.
187	//
188	// EDGE CASES HANDLED:
189	// -------------------
190	// - Single rank (n_procs == 1): Returns immediately with local size, no MPI.
191	// - Empty total (all ranks have 0 particles): Returns 0 immediately.
192	// - Imbalanced input (e.g., one rank has all particles): Works correctly;
193	// that rank will send portions to all other ranks based on target ranges.
194	// - Non-divisible totals: First 'remainder' ranks get one extra particle.
195	int64_t synchronize_global_secondary_bank(	2,581✔
196	SharedArray<SourceSite>& shared_secondary_bank)
197	{
198	// Get current size of local bank
199	int64_t local_size = shared_secondary_bank.size();	2,581✔
200
201	if (mpi::n_procs == 1) {	2,581✔
202	return local_size;
203	}
204
205	#ifdef OPENMC_MPI
206	// Gather all sizes to all ranks
207	vector<int64_t> all_sizes(mpi::n_procs);
208	MPI_Allgather(&local_size, 1, MPI_INT64_T, all_sizes.data(), 1, MPI_INT64_T,
209	mpi::intracomm);
210
211	// Calculate total and check for empty case
212	int64_t total = 0;
213	for (int64_t size : all_sizes) {
214	total += size;
215	}
216
217	// If we don't have any items to distribute, return
218	if (total == 0) {
219	return total;
220	}
221
222	int64_t base_count = total / mpi::n_procs;
223	int64_t remainder = total % mpi::n_procs;
224
225	// Calculate target size for each rank
226	// First 'remainder' ranks get base_count + 1, rest get base_count
227	vector<int64_t> target_sizes(mpi::n_procs);
228	for (int i = 0; i < mpi::n_procs; ++i) {
229	target_sizes[i] = base_count + (i < remainder ? 1 : 0);
230	}
231
232	// Calculate send and receive counts in terms of SourceSite objects
233	// (not bytes)
234	vector<int> send_counts(mpi::n_procs, 0);
235	vector<int> recv_counts(mpi::n_procs, 0);
236	vector<int> send_displs(mpi::n_procs, 0);
237	vector<int> recv_displs(mpi::n_procs, 0);
238
239	// Calculate cumulative positions (starting index for each rank in the
240	// global array)
241	vector<int64_t> cumulative_before(mpi::n_procs + 1, 0);
242	vector<int64_t> cumulative_target(mpi::n_procs + 1, 0);
243	for (int i = 0; i < mpi::n_procs; ++i) {
244	cumulative_before[i + 1] = cumulative_before[i] + all_sizes[i];
245	cumulative_target[i + 1] = cumulative_target[i] + target_sizes[i];
246	}
247
248	// Determine send and receive amounts for each rank
249	int64_t my_start = cumulative_before[mpi::rank];
250	int64_t my_end = cumulative_before[mpi::rank + 1];
251	int64_t my_target_start = cumulative_target[mpi::rank];
252	int64_t my_target_end = cumulative_target[mpi::rank + 1];
253
254	for (int r = 0; r < mpi::n_procs; ++r) {
255	// Send: overlap between my current range and rank r's target range
256	int64_t send_overlap_start = std::max(my_start, cumulative_target[r]);
257	int64_t send_overlap_end = std::min(my_end, cumulative_target[r + 1]);
258	if (send_overlap_start < send_overlap_end) {
259	int64_t count = send_overlap_end - send_overlap_start;
260	int64_t displ = send_overlap_start - my_start;
261	if (count > std::numeric_limits<int>::max() \|\|
262	displ > std::numeric_limits<int>::max()) {
263	fatal_error("Secondary bank size exceeds MPI_Alltoallv int limit.");
264	}
265	send_counts[r] = static_cast<int>(count);
266	send_displs[r] = static_cast<int>(displ);
267	}
268
269	// Recv: overlap between rank r's current range and my target range
270	int64_t recv_overlap_start =
271	std::max(cumulative_before[r], my_target_start);
272	int64_t recv_overlap_end =
273	std::min(cumulative_before[r + 1], my_target_end);
274	if (recv_overlap_start < recv_overlap_end) {
275	int64_t count = recv_overlap_end - recv_overlap_start;
276	int64_t displ = recv_overlap_start - my_target_start;
277	if (count > std::numeric_limits<int>::max() \|\|
278	displ > std::numeric_limits<int>::max()) {
279	fatal_error("Secondary bank size exceeds MPI_Alltoallv int limit.");
280	}
281	recv_counts[r] = static_cast<int>(count);
282	recv_displs[r] = static_cast<int>(displ);
283	}
284	}
285
286	// Prepare receive buffer with target size
287	SharedArray<SourceSite> new_bank(target_sizes[mpi::rank]);
288
289	// Perform all-to-all redistribution using the custom MPI type
290	MPI_Alltoallv(shared_secondary_bank.data(), send_counts.data(),
291	send_displs.data(), mpi::source_site, new_bank.data(), recv_counts.data(),
292	recv_displs.data(), mpi::source_site, mpi::intracomm);
293
294	// Replace old bank with redistributed data
295	shared_secondary_bank = std::move(new_bank);
296
297	return total;
298	#else
299	return local_size;
300	#endif
301	}
302
303	//==============================================================================
304	// C API
305	//==============================================================================
306
307	extern "C" int openmc_source_bank(void** ptr, int64_t* n)	4✔
308	{
309	if (!ptr \|\| !n) {	4!
310	set_errmsg("Received null pointer.");	×
311	return OPENMC_E_INVALID_ARGUMENT;	×
312	}
313
314	if (simulation::source_bank.size() == 0) {	4!
315	set_errmsg("Source bank has not been allocated.");	×
316	return OPENMC_E_ALLOCATE;	×
317	} else {
318	*ptr = simulation::source_bank.data();	4✔
319	*n = simulation::source_bank.size();	4✔
320	return 0;	4✔
321	}
322	}
323
324	extern "C" int openmc_fission_bank(void** ptr, int64_t* n)	×
325	{
326	if (!ptr \|\| !n) {	×
327	set_errmsg("Received null pointer.");	×
328	return OPENMC_E_INVALID_ARGUMENT;	×
329	}
330
331	if (simulation::fission_bank.size() == 0) {	×
332	set_errmsg("Fission bank has not been allocated.");	×
333	return OPENMC_E_ALLOCATE;	×
334	} else {
335	*ptr = simulation::fission_bank.data();	×
336	*n = simulation::fission_bank.size();	×
337	return 0;	×
338	}
339	}
340
341	} // namespace openmc

openmc-dev / openmc / 23011177584

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous