31#ifndef ALL_HISTOGRAM_HEADER_INCLUDED
32#define ALL_HISTOGRAM_HEADER_INCLUDED
73 communicators.resize(6);
74 for (
int i=0; i<6; i++) communicators.at(i) = MPI_COMM_NULL;
82 for (
int i=0; i<communicators.size(); i++) {
83 if (communicators.at(i) != MPI_COMM_NULL &&
84 communicators.at(i) != MPI_COMM_SELF &&
85 communicators.at(i) != MPI_COMM_WORLD)
86 MPI_Comm_free(&(communicators.at(i)));
91 void setup()
override;
96 void balance(
int step)
override;
119 std::vector<int> nBins;
122 MPI_Datatype MPIDataTypeT;
124 MPI_Datatype MPIDataTypeW;
129 std::vector<MPI_Comm> communicators;
132 std::vector<int> neighbors;
134 std::vector<int> nNeighbors;
137 void find_neighbors();
140 W estimatedEfficiency;
143template <
class T,
class W>
145 if (nBins.size() < 3)
148 for (
int i = 0; i < 3; ++i)
149 nBins.at(i) = *((
int *)data + i);
163 if (status != MPI_CART) {
165 __FILE__, __func__, __LINE__,
166 "Cartesian MPI communicator required, passed communicator is not "
172 this->periodicity.data(), this->local_coords.data());
178 for (
int i=0; i<6; i++) {
179 if (communicators.at(i) != MPI_COMM_SELF &&
180 communicators.at(i) != MPI_COMM_WORLD &&
181 communicators.at(i) != MPI_COMM_NULL)
182 MPI_Comm_free(&(communicators.at(i)));
194 this->local_coords.at(0) +
195 this->local_coords.at(1) * this->global_dims.at(0),
196 &communicators.at(2));
201 this->local_coords.at(1),
202 this->local_coords.at(0), &communicators.at(1));
205 communicators[0] = MPI_COMM_SELF;
212 this->local_coords.at(2) * this->global_dims.at(1),
213 this->local_coords.at(0), &communicators.at(3));
218 this->local_coords.at(2) * this->global_dims.at(0),
219 this->local_coords.at(1), &communicators.at(4));
224 this->local_coords.at(1) * this->global_dims.at(0),
225 this->local_coords.at(2), &communicators.at(5));
228 if (std::is_same<T, double>::value)
229 MPIDataTypeT = MPI_DOUBLE;
230 else if (std::is_same<T, float>::value)
231 MPIDataTypeT = MPI_FLOAT;
232 else if (std::is_same<T, int>::value)
233 MPIDataTypeT = MPI_INT;
234 else if (std::is_same<T, long>::value)
235 MPIDataTypeT = MPI_LONG;
238 __FILE__, __func__, __LINE__,
239 "Invalid data type for boundaries given (T)");
243 if (std::is_same<W, double>::value)
244 MPIDataTypeW = MPI_DOUBLE;
245 else if (std::is_same<W, float>::value)
246 MPIDataTypeW = MPI_FLOAT;
247 else if (std::is_same<W, int>::value)
248 MPIDataTypeW = MPI_INT;
249 else if (std::is_same<W, long>::value)
250 MPIDataTypeW = MPI_LONG;
253 "Invalid data type for work given (W)");
257 int rank_left, rank_right;
260 for (
int i = 0; i < this->
dimension; ++i) {
261 MPI_Cart_shift(this->
globalComm, i, 1, &rank_left, &rank_right);
262 neighbors.push_back(rank_left);
263 neighbors.push_back(rank_right);
269 int i = 2 - step % 3;
272 W workSumLocal = (W)0;
274 W workTotalDimension;
280 std::vector<W> work_dimension(nBins.at(i));
284 MPI_Allreduce(&(nBins.at(i)), &nBinsDimension, 1, MPI_INT, MPI_SUM,
285 communicators.at(i + 3));
287 std::vector<W> work_collection(nBinsDimension);
288 std::vector<int> histogramSlices(this->
global_dims.at(i));
289 std::vector<W> work_new_dimension(this->
global_dims.at(i), 0.0);
292 MPI_Allgather(&(nBins.at(i)), 1, MPI_INT, histogramSlices.data(), 1, MPI_INT,
293 communicators.at(i + 3));
296 for (
int n = 0; n < nBins.at(i); ++n) {
299 workSumLocal += this->
work.at(n);
303 MPI_Allreduce(&workSumLocal, &workSumDimension, 1, MPIDataTypeW, MPI_SUM,
304 communicators.at(i));
307 MPI_Allreduce(&workSumDimension, &workTotalDimension, 1, MPIDataTypeW,
308 MPI_SUM, communicators.at(i + 3));
311 workAvgDimension = workTotalDimension / (W)avg_num;
314 MPI_Allreduce(this->
work.data(), work_dimension.data(), nBins.at(i),
315 MPIDataTypeW, MPI_SUM, communicators.at(i));
317 std::vector<int> displs(this->
global_dims.at(i), 0);
320 for (
int n = 0; n < this->
global_dims.at(i); ++n) {
322 tmp += histogramSlices.at(n);
326 MPI_Allgatherv(work_dimension.data(), nBins.at(i), MPIDataTypeW,
327 work_collection.data(), histogramSlices.data(), displs.data(),
328 MPIDataTypeW, communicators[i + 3]);
330 int current_slice = 0;
333 T size = (this->
sysSize[2 * i + 1] - this->
sysSize[2 * i]) / (T)work_collection.size();
336 int minBinsPerSlice = ceil(this->
minSize[i]/size);
337 if (minBinsPerSlice < 1) minBinsPerSlice = 1;
339 if (minBinsPerSlice * this->
global_dims.at(i) > (
int)work_collection.size()) {
340 std::cout <<
"ERROR on process: " << this->
localRank << std::endl;
342 __FILE__, __func__, __LINE__,
343 "Less histogram bins than required due to minimum domain size!");
350 for (
int idx = 0; idx < minBinsPerSlice - 1; ++idx)
351 work_new_dimension.at(current_slice) += work_collection.at(idx);
353 for (
int idx = minBinsPerSlice; idx < (int)work_collection.size(); ++idx) {
354 work_new_dimension.at(current_slice) += work_collection.at(idx);
355 if ((idx < (
int)work_collection.size() - 1 &&
356 work_new_dimension.at(current_slice) + work_collection.at(idx+1)
358 || idx + (this->global_dims.at(i) - current_slice) * minBinsPerSlice >= work_collection.size()
360 histogramSlices.at(current_slice) = idx;
363 if (current_slice == (this->
global_dims.at(i) - 1)) {
364 histogramSlices.at(current_slice) = work_collection.size();
366 for (
int j = 0; j < this->
global_dims.at(i) - 1; ++j)
367 tmp_work += work_new_dimension.at(j);
368 work_new_dimension.at(current_slice) = workTotalDimension - tmp_work;
375 for (
int i_bin=0; i_bin<minBinsPerSlice-1; ++i_bin) {
377 work_new_dimension.at(current_slice) += work_collection.at(idx);
384 ? (T)work_collection.size()
385 : (T)histogramSlices.at(this->local_coords.at(i));
396 MPI_Allreduce(work_new_dimension.data() + this->local_coords.at(i),
397 &workMinDimension, 1, MPIDataTypeW, MPI_MIN, this->globalComm);
398 MPI_Allreduce(work_new_dimension.data() + this->local_coords.at(i),
399 &workMaxDimension, 1, MPIDataTypeW, MPI_MAX, this->globalComm);
401 estimatedEfficiency = (W)1.0 -
402 (workMaxDimension - workMinDimension) /
403 (workMaxDimension + workMinDimension);
415template <
class T,
class W>
void Histogram_LB<T, W>::find_neighbors() {
418 MPI_Request sreq, rreq;
419 MPI_Status sstat, rstat;
421 T *vertices_loc =
new T[4];
423 new T[8 * this->global_dims.at(0) * this->global_dims.at(1)];
429 int rank_left, rank_right;
433 int offset_coords[3];
436 nNeighbors.at(0) = nNeighbors.at(1) = 1;
439 MPI_Cart_shift(this->globalComm, 0, 1, &rank_left, &rank_right);
442 neighbors.push_back(rank_left);
443 neighbors.push_back(rank_right);
446 MPI_Cart_shift(this->globalComm, 1, 1, &rank_left, &rank_right);
449 vertices_loc[0] = this->vertices.at(0)[0];
450 vertices_loc[1] = this->vertices.at(1)[0];
451 MPI_Allgather(vertices_loc, 2, MPIDataTypeT,
452 vertices_rem + 2 * this->global_dims.at(0), 2, MPIDataTypeT,
453 communicators.at(1));
457 MPI_Irecv(vertices_rem, 2 * this->global_dims.at(0), MPIDataTypeT, rank_left,
458 0, this->globalComm, &rreq);
459 MPI_Isend(vertices_rem + 2 * this->global_dims.at(0),
460 2 * this->global_dims.at(0), MPIDataTypeT, rank_right, 0,
461 this->globalComm, &sreq);
464 offset_coords[0] = 0;
465 offset_coords[1] = this->local_coords.at(1) - 1;
466 offset_coords[2] = this->local_coords.at(2);
468 rem_coords[1] = offset_coords[1];
469 rem_coords[2] = offset_coords[2];
471 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
473#ifdef ALL_DEBUG_ENABLED
474 MPI_Barrier(this->globalComm);
475 if (this->localRank == 0)
476 std::cout <<
"HISTOGRAM: neighbor_find y-communication" << std::endl;
479 MPI_Wait(&sreq, &sstat);
480 MPI_Wait(&rreq, &rstat);
483 nNeighbors.at(2) = 0;
484 for (
int x = 0; x < this->global_dims.at(0); ++x) {
485 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
486 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
487 (vertices_rem[2 * x] < vertices_loc[1] &&
488 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
489 (vertices_rem[2 * x] >= vertices_loc[0] &&
490 vertices_loc[0] < vertices_rem[2 * x + 1] &&
491 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
494 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
495 neighbors.push_back(rem_rank);
501 MPI_Barrier(this->globalComm);
505 MPI_Irecv(vertices_rem, 2 * this->global_dims.at(0), MPIDataTypeT, rank_right,
506 0, this->globalComm, &rreq);
507 MPI_Isend(vertices_rem + 2 * this->global_dims.at(0),
508 2 * this->global_dims.at(0), MPIDataTypeT, rank_left, 0,
509 this->globalComm, &sreq);
512 offset_coords[0] = 0;
513 offset_coords[1] = this->local_coords.at(1) + 1;
514 offset_coords[2] = this->local_coords.at(2);
516 rem_coords[1] = offset_coords[1];
517 rem_coords[2] = offset_coords[2];
519 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
521#ifdef ALL_DEBUG_ENABLED
522 MPI_Barrier(this->globalComm);
523 if (this->localRank == 0)
524 std::cout <<
"HISTOGRAM: neighbor_find y-communication 2" << std::endl;
527 MPI_Wait(&sreq, &sstat);
528 MPI_Wait(&rreq, &rstat);
531 nNeighbors.at(3) = 0;
532 for (
int x = 0; x < this->global_dims.at(0); ++x) {
533 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
534 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
535 (vertices_rem[2 * x] < vertices_loc[1] &&
536 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
537 (vertices_rem[2 * x] >= vertices_loc[0] &&
538 vertices_loc[0] < vertices_rem[2 * x + 1] &&
539 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
542 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
543 neighbors.push_back(rem_rank);
549 MPI_Barrier(this->globalComm);
552 MPI_Cart_shift(this->globalComm, 2, 1, &rank_left, &rank_right);
555 vertices_loc[0] = this->vertices.at(0)[0];
556 vertices_loc[1] = this->vertices.at(1)[0];
557 vertices_loc[2] = this->vertices.at(0)[1];
558 vertices_loc[3] = this->vertices.at(1)[1];
560 MPI_Barrier(this->globalComm);
562 MPI_Allgather(vertices_loc, 4, MPIDataTypeT,
564 4 * this->global_dims.at(0) * this->global_dims.at(1),
565 4, MPIDataTypeT, communicators.at(2));
569 MPI_Irecv(vertices_rem, 4 * this->global_dims.at(0) * this->global_dims.at(1),
570 MPIDataTypeT, rank_left, 0, this->globalComm, &rreq);
571 MPI_Isend(vertices_rem +
572 4 * this->global_dims.at(0) * this->global_dims.at(1),
573 4 * this->global_dims.at(0) * this->global_dims.at(1), MPIDataTypeT,
574 rank_right, 0, this->globalComm, &sreq);
577 offset_coords[0] = 0;
578 offset_coords[1] = 0;
579 offset_coords[2] = this->local_coords.at(2) - 1;
581 rem_coords[2] = offset_coords[2];
583 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
585#ifdef ALL_DEBUG_ENABLED
586 MPI_Barrier(this->globalComm);
587 if (this->localRank == 0)
588 std::cout <<
"HISTOGRAM: neighbor_find z-communication" << std::endl;
591 MPI_Wait(&sreq, &sstat);
592 MPI_Wait(&rreq, &rstat);
595 nNeighbors.at(4) = 0;
596 for (
int y = 0; y < this->global_dims.at(1); ++y) {
597 for (
int x = 0; x < this->global_dims.at(0); ++x) {
598 if (((vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <=
601 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
602 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <
605 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
606 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] >=
609 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3] &&
611 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3])))
612 if (((vertices_rem[4 * (x + y * this->global_dims.at(0))] <=
615 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
616 (vertices_rem[4 * (x + y * this->global_dims.at(0))] <
619 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
620 (vertices_rem[4 * (x + y * this->global_dims.at(0))] >=
623 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1] &&
625 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]))) {
629 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
630 neighbors.push_back(rem_rank);
637 MPI_Barrier(this->globalComm);
641 MPI_Irecv(vertices_rem, 4 * this->global_dims.at(0) * this->global_dims.at(1),
642 MPIDataTypeT, rank_right, 0, this->globalComm, &rreq);
643 MPI_Isend(vertices_rem +
644 4 * this->global_dims.at(0) * this->global_dims.at(1),
645 4 * this->global_dims.at(0) * this->global_dims.at(1), MPIDataTypeT,
646 rank_left, 0, this->globalComm, &sreq);
649 offset_coords[0] = 0;
650 offset_coords[1] = 0;
651 offset_coords[2] = this->local_coords.at(2) + 1;
653 rem_coords[2] = offset_coords[2];
655 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
657#ifdef ALL_DEBUG_ENABLED
658 MPI_Barrier(this->globalComm);
659 if (this->localRank == 0)
660 std::cout <<
"HISTOGRAM: neighbor_find z-communication 2" << std::endl;
664 MPI_Wait(&sreq, &sstat);
665 MPI_Wait(&rreq, &rstat);
666#ifdef ALL_DEBUG_ENABLED
667 if (this->localRank == 0)
668 std::cout <<
"HISTOGRAM: neighbor_find z-communication 2" << std::endl;
671 nNeighbors.at(5) = 0;
672 for (
int y = 0; y < this->global_dims.at(1); ++y) {
673 for (
int x = 0; x < this->global_dims.at(0); ++x) {
674 if (((vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <=
677 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
678 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <
681 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
682 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] >=
685 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3] &&
687 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3])))
688 if (((vertices_rem[4 * (x + y * this->global_dims.at(0))] <=
691 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
692 (vertices_rem[4 * (x + y * this->global_dims.at(0))] <
695 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
696 (vertices_rem[4 * (x + y * this->global_dims.at(0))] >=
699 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1] &&
701 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]))) {
705 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
706 neighbors.push_back(rem_rank);
713 MPI_Barrier(this->globalComm);
716 delete[] vertices_loc;
717 delete[] vertices_rem;
721template <
class T,
class W>
726template <
class T,
class W>
729 return estimatedEfficiency;
std::vector< int > & getNeighbors() override
void setup() override
method to setup the loac-balancing method
virtual void setAdditionalData(const void *data) override
Histogram_LB(int d, std::vector< W > w, T g)
void balance(int step) override
Histogram_LB()
default constructor
~Histogram_LB()
default destructor
virtual W getEstimatedEfficiency() override
LB(const int dim, const T g)
int localRank
local rank in the used MPI communicator
std::vector< W > work
local work
MPI_Comm globalComm
used MPI communicator
int dimension
dimension of the used vertices
std::vector< T > sysSize
(orthogonal) system size
std::vector< int > global_dims
dimensions of the global process grid
virtual void setWork(const std::vector< W > &w)
std::vector< int > local_coords
cartesian coordinates of the local domain in the process grid
std::vector< Point< T > > prevVertices
original vertices before previous balancing step
std::vector< Point< T > > vertices
local vertices after previous balancing step