31#ifndef ALL_STAGGERED_HEADER_INCLUDED
32#define ALL_STAGGERED_HEADER_INCLUDED
65 communicators.resize(d);
66 for (
int _d = 0; _d < d; ++_d)
67 communicators.at(_d) = MPI_COMM_NULL;
69 nNeighbors.resize(2 * d);
77 void setup()
override;
83 void balance(
int step)
override;
101 MPI_Datatype MPIDataTypeT;
103 MPI_Datatype MPIDataTypeW;
106 std::vector<MPI_Comm> communicators;
109 std::vector<int> neighbors;
111 std::vector<int> nNeighbors;
114 void find_neighbors();
120 if (communicators.at(i) != MPI_COMM_NULL)
121 MPI_Comm_free(&communicators.at(i));
137 if (status != MPI_CART) {
139 __FILE__, __func__, __LINE__,
140 "Cartesian MPI communicator required, passed communicator is not "
147 this->periodicity.data(), this->local_coords.data());
154 if (communicators.at(1) != MPI_COMM_NULL)
155 MPI_Comm_free(&communicators.at(1));
156 if (communicators.at(2) != MPI_COMM_NULL)
157 MPI_Comm_free(&communicators.at(2));
161 this->local_coords.at(0) +
162 this->local_coords.at(1) * this->global_dims.at(0),
163 &communicators.at(2));
168 this->local_coords.at(1),
169 this->local_coords.at(0), &communicators.at(1));
172 communicators.at(0) = MPI_COMM_SELF;
175 if (std::is_same<T, double>::value)
176 MPIDataTypeT = MPI_DOUBLE;
177 else if (std::is_same<T, float>::value)
178 MPIDataTypeT = MPI_FLOAT;
179 else if (std::is_same<T, int>::value)
180 MPIDataTypeT = MPI_INT;
181 else if (std::is_same<T, long>::value)
182 MPIDataTypeT = MPI_LONG;
185 __FILE__, __func__, __LINE__,
186 "Invalid data type for boundaries given (T)");
190 if (std::is_same<W, double>::value)
191 MPIDataTypeW = MPI_DOUBLE;
192 else if (std::is_same<W, float>::value)
193 MPIDataTypeW = MPI_FLOAT;
194 else if (std::is_same<W, int>::value)
195 MPIDataTypeW = MPI_INT;
196 else if (std::is_same<W, long>::value)
197 MPIDataTypeW = MPI_LONG;
200 "Invalid data type for work given (W)");
204 int rank_left, rank_right;
208 MPI_Cart_shift(this->
globalComm, i, 1, &rank_left, &rank_right);
209 neighbors.push_back(rank_left);
210 neighbors.push_back(rank_right);
215 std::vector<Point<T>> newVertices = this->
vertices;
224#ifdef ALL_DEBUG_ENABLED
227 std::cout <<
"ALL::Staggered_LB::balance(): before work computation..."
231 MPI_Allreduce(this->
work.data(), &work_local_plane, 1, MPIDataTypeW,
232 MPI_SUM, communicators[i]);
234#ifdef ALL_DEBUG_ENABLED
237 std::cout <<
"ALL::Staggered_LB::balance(): before work distribution..."
246 int rank_left, rank_right;
248 MPI_Cart_shift(this->
globalComm, i, 1, &rank_left, &rank_right);
251 MPI_Request sreq, rreq;
252 MPI_Status sstat, rstat;
254 MPI_Irecv(&remote_work, 1, MPIDataTypeW, rank_right, 0, this->
globalComm,
256 MPI_Isend(&work_local_plane, 1, MPIDataTypeW, rank_left, 0,
258 MPI_Wait(&sreq, &sstat);
259 MPI_Wait(&rreq, &rstat);
265 MPI_Irecv(&remote_size, 1, MPIDataTypeT, rank_right, 0, this->
globalComm,
267 MPI_Isend(&local_size, 1, MPIDataTypeT, rank_left, 0, this->
globalComm,
269 MPI_Wait(&sreq, &sstat);
270 MPI_Wait(&rreq, &rstat);
275 std::max(4.1, 2.0 * (1.0 + std::max(local_size, remote_size) /
276 std::min(local_size, remote_size)));
278#ifdef ALL_DEBUG_ENABLED
281 std::cout <<
"ALL::Staggered_LB::balance(): before shift calculation..."
286 rank_right, this->
local_coords.at(i), this->global_dims.at(i),
287 work_local_plane, remote_work, local_size, remote_size, this->gamma,
290#ifdef ALL_DEBUG_ENABLED
293 std::cout <<
"ALL::Staggered_LB::balance(): before shift distibution..."
297 T remote_shift = (T)0;
299 MPI_Irecv(&remote_shift, 1, MPIDataTypeT, rank_left, 0, this->
globalComm,
301 MPI_Isend(&shift, 1, MPIDataTypeT, rank_right, 0, this->
globalComm, &sreq);
302 MPI_Wait(&sreq, &sstat);
303 MPI_Wait(&rreq, &rstat);
305#ifdef ALL_DEBUG_ENABLED
307 std::cout << this->
localRank <<
": shift = " << shift
308 <<
" remoteShift = " << remote_shift
309 <<
" vertices: " << this->
vertices.at(0)[i] <<
", "
310 << this->
vertices.at(1)[i] << std::endl;
316 if (rank_left != MPI_PROC_NULL && this->
local_coords[i] != 0)
317 newVertices.at(0)[i] = this->
prevVertices.at(0)[i] + remote_shift;
322 if (rank_right != MPI_PROC_NULL &&
324 newVertices.at(1)[i] = this->
prevVertices.at(1)[i] + shift;
329 if (newVertices.at(1)[i] < newVertices.at(0)[i]) {
330 std::cout <<
"ERROR on process: " << this->
localRank << std::endl;
332 __FILE__, __func__, __LINE__,
333 "Lower border of process larger than upper border of process!");
338#ifdef ALL_DEBUG_ENABLED
341 std::cout <<
"ALL::Staggered_LB::balance(): before neighbor search..."
348template <
class T,
class W>
void Staggered_LB<T, W>::find_neighbors() {
349 auto work = this->getWork();
350 auto vertices = this->prevVertices;
351 auto shifted_vertices = this->vertices;
355 MPI_Request sreq, rreq;
356 MPI_Status sstat, rstat;
358 T *vertices_loc =
new T[4];
359 T *vertices_rem =
new T[8 * this->global_dims[0] * this->global_dims[1]];
365 int rank_left, rank_right;
369 int offset_coords[3];
372 nNeighbors.at(0) = nNeighbors.at(1) = 1;
375 MPI_Cart_shift(this->globalComm, 0, 1, &rank_left, &rank_right);
378 neighbors.push_back(rank_left);
379 neighbors.push_back(rank_right);
382 MPI_Cart_shift(this->globalComm, 1, 1, &rank_left, &rank_right);
385 vertices_loc[0] = shifted_vertices.at(0)[0];
386 vertices_loc[1] = shifted_vertices.at(1)[0];
387 MPI_Allgather(vertices_loc, 2, MPIDataTypeT,
388 vertices_rem + 2 * this->global_dims[0], 2, MPIDataTypeT,
393 MPI_Irecv(vertices_rem, 2 * this->global_dims[0], MPIDataTypeT, rank_left, 0,
394 this->globalComm, &rreq);
395 MPI_Isend(vertices_rem + 2 * this->global_dims[0], 2 * this->global_dims[0],
396 MPIDataTypeT, rank_right, 0, this->globalComm, &sreq);
399 offset_coords[0] = 0;
400 offset_coords[1] = this->local_coords[1] - 1;
401 offset_coords[2] = this->local_coords[2];
403 rem_coords[1] = offset_coords[1];
404 rem_coords[2] = offset_coords[2];
406 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
409 MPI_Wait(&sreq, &sstat);
410 MPI_Wait(&rreq, &rstat);
413 nNeighbors.at(2) = 0;
414 for (
int x = 0; x < this->global_dims[0]; ++x) {
415 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
416 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
417 (vertices_rem[2 * x] < vertices_loc[1] &&
418 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
419 (vertices_rem[2 * x] >= vertices_loc[0] &&
420 vertices_loc[0] < vertices_rem[2 * x + 1] &&
421 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
424 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
425 neighbors.push_back(rem_rank);
431 MPI_Barrier(this->globalComm);
435 MPI_Irecv(vertices_rem, 2 * this->global_dims[0], MPIDataTypeT, rank_right, 0,
436 this->globalComm, &rreq);
437 MPI_Isend(vertices_rem + 2 * this->global_dims[0], 2 * this->global_dims[0],
438 MPIDataTypeT, rank_left, 0, this->globalComm, &sreq);
441 offset_coords[0] = 0;
442 offset_coords[1] = this->local_coords[1] + 1;
443 offset_coords[2] = this->local_coords[2];
445 rem_coords[1] = offset_coords[1];
446 rem_coords[2] = offset_coords[2];
448 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
451 MPI_Wait(&sreq, &sstat);
452 MPI_Wait(&rreq, &rstat);
455 nNeighbors.at(3) = 0;
456 for (
int x = 0; x < this->global_dims[0]; ++x) {
457 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
458 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
459 (vertices_rem[2 * x] < vertices_loc[1] &&
460 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
461 (vertices_rem[2 * x] >= vertices_loc[0] &&
462 vertices_loc[0] < vertices_rem[2 * x + 1] &&
463 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
466 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
467 neighbors.push_back(rem_rank);
473 MPI_Barrier(this->globalComm);
476 MPI_Cart_shift(this->globalComm, 2, 1, &rank_left, &rank_right);
479 vertices_loc[0] = shifted_vertices.at(0)[0];
480 vertices_loc[1] = shifted_vertices.at(1)[0];
481 vertices_loc[2] = shifted_vertices.at(0)[1];
482 vertices_loc[3] = shifted_vertices.at(1)[1];
484 MPI_Barrier(this->globalComm);
486 MPI_Allgather(vertices_loc, 4, MPIDataTypeT,
487 vertices_rem + 4 * this->global_dims[0] * this->global_dims[1],
488 4, MPIDataTypeT, communicators[2]);
492 MPI_Irecv(vertices_rem, 4 * this->global_dims[0] * this->global_dims[1],
493 MPIDataTypeT, rank_left, 0, this->globalComm, &rreq);
494 MPI_Isend(vertices_rem + 4 * this->global_dims[0] * this->global_dims[1],
495 4 * this->global_dims[0] * this->global_dims[1], MPIDataTypeT,
496 rank_right, 0, this->globalComm, &sreq);
499 offset_coords[0] = 0;
500 offset_coords[1] = 0;
501 offset_coords[2] = this->local_coords[2] - 1;
503 rem_coords[2] = offset_coords[2];
505 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
508 MPI_Wait(&sreq, &sstat);
509 MPI_Wait(&rreq, &rstat);
512 nNeighbors.at(4) = 0;
513 for (
int y = 0; y < this->global_dims[1]; ++y) {
514 for (
int x = 0; x < this->global_dims[0]; ++x) {
515 if (((vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <=
518 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
519 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <
522 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
523 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] >=
526 vertices_rem[4 * (x + y * this->global_dims[0]) + 3] &&
528 vertices_rem[4 * (x + y * this->global_dims[0]) + 3])))
529 if (((vertices_rem[4 * (x + y * this->global_dims[0])] <=
532 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
533 (vertices_rem[4 * (x + y * this->global_dims[0])] <
536 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
537 (vertices_rem[4 * (x + y * this->global_dims[0])] >=
540 vertices_rem[4 * (x + y * this->global_dims[0]) + 1] &&
542 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]))) {
546 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
547 neighbors.push_back(rem_rank);
554 MPI_Barrier(this->globalComm);
558 MPI_Irecv(vertices_rem, 4 * this->global_dims[0] * this->global_dims[1],
559 MPIDataTypeT, rank_right, 0, this->globalComm, &rreq);
560 MPI_Isend(vertices_rem + 4 * this->global_dims[0] * this->global_dims[1],
561 4 * this->global_dims[0] * this->global_dims[1], MPIDataTypeT,
562 rank_left, 0, this->globalComm, &sreq);
565 offset_coords[0] = 0;
566 offset_coords[1] = 0;
567 offset_coords[2] = this->local_coords[2] + 1;
569 rem_coords[2] = offset_coords[2];
571 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
574 MPI_Wait(&sreq, &sstat);
575 MPI_Wait(&rreq, &rstat);
578 nNeighbors.at(5) = 0;
579 for (
int y = 0; y < this->global_dims[1]; ++y) {
580 for (
int x = 0; x < this->global_dims[0]; ++x) {
581 if (((vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <=
584 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
585 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <
588 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
589 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] >=
592 vertices_rem[4 * (x + y * this->global_dims[0]) + 3] &&
594 vertices_rem[4 * (x + y * this->global_dims[0]) + 3])))
595 if (((vertices_rem[4 * (x + y * this->global_dims[0])] <=
598 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
599 (vertices_rem[4 * (x + y * this->global_dims[0])] <
602 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
603 (vertices_rem[4 * (x + y * this->global_dims[0])] >=
606 vertices_rem[4 * (x + y * this->global_dims[0]) + 1] &&
608 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]))) {
612 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
613 neighbors.push_back(rem_rank);
620 MPI_Barrier(this->globalComm);
623 delete[] vertices_loc;
624 delete[] vertices_rem;
628template <
class T,
class W>
LB(const int dim, const T g)
virtual void setVertices(const std::vector< Point< T > > &vertices_in)
int localRank
local rank in the used MPI communicator
std::vector< W > work
local work
MPI_Comm globalComm
used MPI communicator
int dimension
dimension of the used vertices
std::vector< int > global_dims
dimensions of the global process grid
virtual void setWork(const std::vector< W > &w)
virtual int getDimension()
std::vector< int > local_coords
cartesian coordinates of the local domain in the process grid
std::vector< Point< T > > prevVertices
original vertices before previous balancing step
std::vector< Point< T > > vertices
local vertices after previous balancing step
~Staggered_LB()
default destructor
virtual void setAdditionalData(known_unused const void *data) override
void balance(int step) override
virtual W getEstimatedEfficiency() override
Staggered_LB(int d, W w, T g)
Staggered_LB()
default constructor
void setup() override
method to setup the method specific internal parameters
virtual std::vector< int > & getNeighbors() override
T borderShift1d(const int remote_rank, const int local_coord, const int global_dim, const W local_work, const W remote_work, const T local_size, const T remote_size, const T gamma, const T minSize)