ALL 0.9.3
A Loadbalacing Library
Loading...
Searching...
No Matches
ALL_Histogram.hpp
Go to the documentation of this file.
1/*
2Copyright 2018-2020 Rene Halver, Forschungszentrum Juelich GmbH, Germany
3Copyright 2018-2020 Godehard Sutmann, Forschungszentrum Juelich GmbH, Germany
4
5Redistribution and use in source and binary forms, with or without modification,
6are permitted provided that the following conditions are met:
7
81. Redistributions of source code must retain the above copyright notice, this
9 list of conditions and the following disclaimer.
10
112. Redistributions in binary form must reproduce the above copyright notice,
12this list of conditions and the following disclaimer in the documentation and/or
13 other materials provided with the distribution.
14
153. Neither the name of the copyright holder nor the names of its contributors
16may be used to endorse or promote products derived from this software without
17 specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef ALL_HISTOGRAM_HEADER_INCLUDED
32#define ALL_HISTOGRAM_HEADER_INCLUDED
33
35#include "ALL_LB.hpp"
36#include <exception>
37#include <mpi.h>
38#include <vector>
39
40namespace ALL {
41
55template <class T, class W> class Histogram_LB : public LB<T, W> {
56public:
65 Histogram_LB(int d, std::vector<W> w, T g) : LB<T, W>(d, g) {
66 this->setWork(w);
67 // need the lower and upper bounds
68 // of the domain for the tensor-based
69 // load-balancing scheme
70
71 // array of MPI communicators for each direction (to collect work
72 // on each plane)
73 communicators.resize(6);
74 for (int i=0; i<6; i++) communicators.at(i) = MPI_COMM_NULL;
75
76 nNeighbors.resize(2 * this->dimension);
77 nBins.resize(this->dimension);
78 }
79
82 for (int i=0; i<communicators.size(); i++) {
83 if (communicators.at(i) != MPI_COMM_NULL &&
84 communicators.at(i) != MPI_COMM_SELF &&
85 communicators.at(i) != MPI_COMM_WORLD)
86 MPI_Comm_free(&(communicators.at(i)));
87 }
88 }
89
91 void setup() override;
92
96 void balance(int step) override;
97
98 // getter for variables (passed by reference to avoid
99 // direct access to private members of the object)
100
101 // neighbors
104 std::vector<int> &getNeighbors() override;
105
109 virtual void setAdditionalData(const void *data) override;
110
111
115 virtual W getEstimatedEfficiency() override;
116
117private:
119 std::vector<int> nBins;
120
122 MPI_Datatype MPIDataTypeT;
124 MPI_Datatype MPIDataTypeW;
125
129 std::vector<MPI_Comm> communicators;
130
132 std::vector<int> neighbors;
134 std::vector<int> nNeighbors;
135
137 void find_neighbors();
138
140 W estimatedEfficiency;
141};
142
143template <class T, class W>
145 if (nBins.size() < 3)
146 nBins.resize(3);
147
148 for (int i = 0; i < 3; ++i)
149 nBins.at(i) = *((int *)data + i);
150}
151
152// setup routine for the tensor-based load-balancing scheme
153// requires:
154// this->globalComm (int): cartesian MPI communicator, from
155// which separate sub communicators
156// are derived in order to represent
157// each plane of domains in the system
158template <class T, class W> void Histogram_LB<T, W>::setup() {
159 int status;
160
161 // check if Communicator is cartesian
162 MPI_Topo_test(this->globalComm, &status);
163 if (status != MPI_CART) {
165 __FILE__, __func__, __LINE__,
166 "Cartesian MPI communicator required, passed communicator is not "
167 "cartesian");
168 }
169 // get the local coordinates, this->periodicity and global size from the MPI
170 // communicator
171 MPI_Cart_get(this->globalComm, this->dimension, this->global_dims.data(),
172 this->periodicity.data(), this->local_coords.data());
173
174 // get the local rank from the MPI communicator
175 MPI_Cart_rank(this->globalComm, this->local_coords.data(), &this->localRank);
176
177 // free sub-communicators
178 for (int i=0; i<6; i++) {
179 if (communicators.at(i) != MPI_COMM_SELF &&
180 communicators.at(i) != MPI_COMM_WORLD &&
181 communicators.at(i) != MPI_COMM_NULL)
182 MPI_Comm_free(&(communicators.at(i)));
183 }
184
185 // create sub-communicators
186
187 // communicators 0 - 2: reduction of partial histograms
188 // communicators 3 - 5: gathering of complete histograms
189
190 // reduction sub-communicators (equal to staggered grid)
191
192 // z-plane
193 MPI_Comm_split(this->globalComm, this->local_coords.at(2),
194 this->local_coords.at(0) +
195 this->local_coords.at(1) * this->global_dims.at(0),
196 &communicators.at(2));
197
198 // y-column
199 MPI_Comm_split(this->globalComm,
200 this->local_coords.at(2) * this->global_dims.at(1) +
201 this->local_coords.at(1),
202 this->local_coords.at(0), &communicators.at(1));
203
204 // only cell itself
205 communicators[0] = MPI_COMM_SELF;
206
207 // gathering sub-communicators
208
209 // x-gathering (same y/z coordinates)
210 MPI_Comm_split(this->globalComm,
211 this->local_coords.at(1) +
212 this->local_coords.at(2) * this->global_dims.at(1),
213 this->local_coords.at(0), &communicators.at(3));
214
215 // y-gathering
216 MPI_Comm_split(this->globalComm,
217 this->local_coords.at(0) +
218 this->local_coords.at(2) * this->global_dims.at(0),
219 this->local_coords.at(1), &communicators.at(4));
220
221 // z-gathering
222 MPI_Comm_split(this->globalComm,
223 this->local_coords.at(0) +
224 this->local_coords.at(1) * this->global_dims.at(0),
225 this->local_coords.at(2), &communicators.at(5));
226
227 // determine correct MPI data type for template T
228 if (std::is_same<T, double>::value)
229 MPIDataTypeT = MPI_DOUBLE;
230 else if (std::is_same<T, float>::value)
231 MPIDataTypeT = MPI_FLOAT;
232 else if (std::is_same<T, int>::value)
233 MPIDataTypeT = MPI_INT;
234 else if (std::is_same<T, long>::value)
235 MPIDataTypeT = MPI_LONG;
236 else {
238 __FILE__, __func__, __LINE__,
239 "Invalid data type for boundaries given (T)");
240 }
241
242 // determine correct MPI data type for template W
243 if (std::is_same<W, double>::value)
244 MPIDataTypeW = MPI_DOUBLE;
245 else if (std::is_same<W, float>::value)
246 MPIDataTypeW = MPI_FLOAT;
247 else if (std::is_same<W, int>::value)
248 MPIDataTypeW = MPI_INT;
249 else if (std::is_same<W, long>::value)
250 MPIDataTypeW = MPI_LONG;
251 else {
252 throw InvalidCommTypeException(__FILE__, __func__, __LINE__,
253 "Invalid data type for work given (W)");
254 }
255
256 // calculate neighbors
257 int rank_left, rank_right;
258
259 neighbors.clear();
260 for (int i = 0; i < this->dimension; ++i) {
261 MPI_Cart_shift(this->globalComm, i, 1, &rank_left, &rank_right);
262 neighbors.push_back(rank_left);
263 neighbors.push_back(rank_right);
264 }
265}
266
267template <class T, class W> void Histogram_LB<T, W>::balance(int step) {
268
269 int i = 2 - step % 3;
270
271 // required work values for the scheme
272 W workSumLocal = (W)0;
273 W workSumDimension;
274 W workTotalDimension;
275 W workAvgDimension;
276 W workMinDimension;
277 W workMaxDimension;
278
279 // vector to store all values of the histogram for the current dimension
280 std::vector<W> work_dimension(nBins.at(i));
281
282 // compute total number of bins in dimension
283 int nBinsDimension;
284 MPI_Allreduce(&(nBins.at(i)), &nBinsDimension, 1, MPI_INT, MPI_SUM,
285 communicators.at(i + 3));
286
287 std::vector<W> work_collection(nBinsDimension);
288 std::vector<int> histogramSlices(this->global_dims.at(i));
289 std::vector<W> work_new_dimension(this->global_dims.at(i), 0.0);
290
291 // collect how many bins from each process will be received
292 MPI_Allgather(&(nBins.at(i)), 1, MPI_INT, histogramSlices.data(), 1, MPI_INT,
293 communicators.at(i + 3));
294
295 // add up work to get total work on domain
296 for (int n = 0; n < nBins.at(i); ++n) {
297 // std::cout << "DEBUG: " << this->work.size() << " " << n << " " <<
298 // i << " on " << this->localRank << std::endl;
299 workSumLocal += this->work.at(n);
300 }
301
302 // compute total work in current dimension
303 MPI_Allreduce(&workSumLocal, &workSumDimension, 1, MPIDataTypeW, MPI_SUM,
304 communicators.at(i));
305
306 // compute total work for current dimension
307 MPI_Allreduce(&workSumDimension, &workTotalDimension, 1, MPIDataTypeW,
308 MPI_SUM, communicators.at(i + 3));
309
310 int avg_num = this->global_dims.at(i);
311 workAvgDimension = workTotalDimension / (W)avg_num;
312
313 // compute local slice of the histogram
314 MPI_Allreduce(this->work.data(), work_dimension.data(), nBins.at(i),
315 MPIDataTypeW, MPI_SUM, communicators.at(i));
316 // displacement array
317 std::vector<int> displs(this->global_dims.at(i), 0);
318 int tmp = 0;
319
320 for (int n = 0; n < this->global_dims.at(i); ++n) {
321 displs[n] = tmp;
322 tmp += histogramSlices.at(n);
323 }
324
325 // gather complete histogram in current dimension
326 MPI_Allgatherv(work_dimension.data(), nBins.at(i), MPIDataTypeW,
327 work_collection.data(), histogramSlices.data(), displs.data(),
328 MPIDataTypeW, communicators[i + 3]);
329
330 int current_slice = 0;
331
332 // size of one bin
333 T size = (this->sysSize[2 * i + 1] - this->sysSize[2 * i]) / (T)work_collection.size();
334
335 // minimum size of one slice due to minimum domain width
336 int minBinsPerSlice = ceil(this->minSize[i]/size);
337 if (minBinsPerSlice < 1) minBinsPerSlice = 1;
338
339 if (minBinsPerSlice * this->global_dims.at(i) > (int)work_collection.size()) {
340 std::cout << "ERROR on process: " << this->localRank << std::endl;
342 __FILE__, __func__, __LINE__,
343 "Less histogram bins than required due to minimum domain size!");
344 }
345
346 // TODO: compute cumulative function - up to workAvgDimension work in each
347 // box
348
349 // consider minimum domain size
350 for (int idx = 0; idx < minBinsPerSlice - 1; ++idx)
351 work_new_dimension.at(current_slice) += work_collection.at(idx);
352
353 for (int idx = minBinsPerSlice; idx < (int)work_collection.size(); ++idx) {
354 work_new_dimension.at(current_slice) += work_collection.at(idx);
355 if ((idx < (int)work_collection.size() - 1 &&
356 work_new_dimension.at(current_slice) + work_collection.at(idx+1)
357 > workAvgDimension)
358 || idx + (this->global_dims.at(i) - current_slice) * minBinsPerSlice >= work_collection.size()
359 ) {
360 histogramSlices.at(current_slice) = idx;
361
362 // calculate last slice with histogram size
363 if (current_slice == (this->global_dims.at(i) - 1)) {
364 histogramSlices.at(current_slice) = work_collection.size();
365 W tmp_work = (W)0;
366 for (int j = 0; j < this->global_dims.at(i) - 1; ++j)
367 tmp_work += work_new_dimension.at(j);
368 work_new_dimension.at(current_slice) = workTotalDimension - tmp_work;
369 break;
370 }
371
372 current_slice++;
373
374 // consider minimum domain size
375 for (int i_bin=0; i_bin<minBinsPerSlice-1; ++i_bin) {
376 idx++;
377 work_new_dimension.at(current_slice) += work_collection.at(idx);
378 }
379 }
380 }
381
382 // TODO: compute width of domain
383 T up = (this->local_coords.at(i) == this->global_dims.at(i) - 1)
384 ? (T)work_collection.size()
385 : (T)histogramSlices.at(this->local_coords.at(i));
386 T down = (this->local_coords.at(i) == 0)
387 ? (T)0
388 : histogramSlices.at(this->local_coords.at(i) - 1);
389
390 this->prevVertices = this->vertices;
391
392 this->vertices.at(0)[i] = (T)down * size + this->sysSize[2 * i];
393 this->vertices.at(1)[i] = (T)up * size + this->sysSize[2 * i];
394
395 // compute min / max work in current dimension
396 MPI_Allreduce(work_new_dimension.data() + this->local_coords.at(i),
397 &workMinDimension, 1, MPIDataTypeW, MPI_MIN, this->globalComm);
398 MPI_Allreduce(work_new_dimension.data() + this->local_coords.at(i),
399 &workMaxDimension, 1, MPIDataTypeW, MPI_MAX, this->globalComm);
400
401 estimatedEfficiency = (W)1.0 -
402 (workMaxDimension - workMinDimension) /
403 (workMaxDimension + workMinDimension);
404 /*
405 if (this->localRank == 0)
406 std::cout << "HISTOGRAM: " << i << " " << workMinDimension << " "
407 << workMaxDimension << " "
408 << (workMaxDimension - workMinDimension) /
409 (workMaxDimension + workMinDimension)
410 << std::endl;
411 */
412 find_neighbors();
413}
414
415template <class T, class W> void Histogram_LB<T, W>::find_neighbors() {
416 neighbors.clear();
417 // collect work from right neighbor plane
418 MPI_Request sreq, rreq;
419 MPI_Status sstat, rstat;
420 // array to store neighbor vertices in Y/Z direction (reused)
421 T *vertices_loc = new T[4];
422 T *vertices_rem =
423 new T[8 * this->global_dims.at(0) * this->global_dims.at(1)];
424
425 int rem_rank;
426 int rem_coords[3];
427
428 // determine neighbors
429 int rank_left, rank_right;
430
431 // offset to get the correct rank
432 int rank_offset;
433 int offset_coords[3];
434
435 // X-neighbors are static
436 nNeighbors.at(0) = nNeighbors.at(1) = 1;
437
438 // find X-neighbors
439 MPI_Cart_shift(this->globalComm, 0, 1, &rank_left, &rank_right);
440
441 // store X-neighbors
442 neighbors.push_back(rank_left);
443 neighbors.push_back(rank_right);
444
445 // find Y-neighbors to get border information from
446 MPI_Cart_shift(this->globalComm, 1, 1, &rank_left, &rank_right);
447
448 // collect border information from local column
449 vertices_loc[0] = this->vertices.at(0)[0];
450 vertices_loc[1] = this->vertices.at(1)[0];
451 MPI_Allgather(vertices_loc, 2, MPIDataTypeT,
452 vertices_rem + 2 * this->global_dims.at(0), 2, MPIDataTypeT,
453 communicators.at(1));
454
455 // exchange local column information with upper neighbor in Y direction (cart
456 // grid)
457 MPI_Irecv(vertices_rem, 2 * this->global_dims.at(0), MPIDataTypeT, rank_left,
458 0, this->globalComm, &rreq);
459 MPI_Isend(vertices_rem + 2 * this->global_dims.at(0),
460 2 * this->global_dims.at(0), MPIDataTypeT, rank_right, 0,
461 this->globalComm, &sreq);
462
463 // determine the offset in ranks
464 offset_coords[0] = 0;
465 offset_coords[1] = this->local_coords.at(1) - 1;
466 offset_coords[2] = this->local_coords.at(2);
467
468 rem_coords[1] = offset_coords[1];
469 rem_coords[2] = offset_coords[2];
470
471 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
472
473#ifdef ALL_DEBUG_ENABLED
474 MPI_Barrier(this->globalComm);
475 if (this->localRank == 0)
476 std::cout << "HISTOGRAM: neighbor_find y-communication" << std::endl;
477#endif
478 // wait for communication
479 MPI_Wait(&sreq, &sstat);
480 MPI_Wait(&rreq, &rstat);
481
482 // iterate about neighbor borders to determine the neighborship relation
483 nNeighbors.at(2) = 0;
484 for (int x = 0; x < this->global_dims.at(0); ++x) {
485 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
486 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
487 (vertices_rem[2 * x] < vertices_loc[1] &&
488 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
489 (vertices_rem[2 * x] >= vertices_loc[0] &&
490 vertices_loc[0] < vertices_rem[2 * x + 1] &&
491 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
492 nNeighbors.at(2)++;
493 rem_coords[0] = x;
494 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
495 neighbors.push_back(rem_rank);
496 }
497 }
498
499 // barrier to ensure every process concluded the calculations before
500 // overwriting remote borders!
501 MPI_Barrier(this->globalComm);
502
503 // exchange local column information with lower neighbor in Y direction (cart
504 // grid)
505 MPI_Irecv(vertices_rem, 2 * this->global_dims.at(0), MPIDataTypeT, rank_right,
506 0, this->globalComm, &rreq);
507 MPI_Isend(vertices_rem + 2 * this->global_dims.at(0),
508 2 * this->global_dims.at(0), MPIDataTypeT, rank_left, 0,
509 this->globalComm, &sreq);
510
511 // determine the offset in ranks
512 offset_coords[0] = 0;
513 offset_coords[1] = this->local_coords.at(1) + 1;
514 offset_coords[2] = this->local_coords.at(2);
515
516 rem_coords[1] = offset_coords[1];
517 rem_coords[2] = offset_coords[2];
518
519 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
520
521#ifdef ALL_DEBUG_ENABLED
522 MPI_Barrier(this->globalComm);
523 if (this->localRank == 0)
524 std::cout << "HISTOGRAM: neighbor_find y-communication 2" << std::endl;
525#endif
526 // wait for communication
527 MPI_Wait(&sreq, &sstat);
528 MPI_Wait(&rreq, &rstat);
529
530 // iterate about neighbor borders to determine the neighborship relation
531 nNeighbors.at(3) = 0;
532 for (int x = 0; x < this->global_dims.at(0); ++x) {
533 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
534 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
535 (vertices_rem[2 * x] < vertices_loc[1] &&
536 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
537 (vertices_rem[2 * x] >= vertices_loc[0] &&
538 vertices_loc[0] < vertices_rem[2 * x + 1] &&
539 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
540 nNeighbors.at(3)++;
541 rem_coords[0] = x;
542 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
543 neighbors.push_back(rem_rank);
544 }
545 }
546
547 // barrier to ensure every process concluded the calculations before
548 // overwriting remote borders!
549 MPI_Barrier(this->globalComm);
550
551 // find Z-neighbors to get border information from
552 MPI_Cart_shift(this->globalComm, 2, 1, &rank_left, &rank_right);
553
554 // collect border information from local column
555 vertices_loc[0] = this->vertices.at(0)[0];
556 vertices_loc[1] = this->vertices.at(1)[0];
557 vertices_loc[2] = this->vertices.at(0)[1];
558 vertices_loc[3] = this->vertices.at(1)[1];
559
560 MPI_Barrier(this->globalComm);
561
562 MPI_Allgather(vertices_loc, 4, MPIDataTypeT,
563 vertices_rem +
564 4 * this->global_dims.at(0) * this->global_dims.at(1),
565 4, MPIDataTypeT, communicators.at(2));
566
567 // exchange local column information with upper neighbor in Z direction (cart
568 // grid)
569 MPI_Irecv(vertices_rem, 4 * this->global_dims.at(0) * this->global_dims.at(1),
570 MPIDataTypeT, rank_left, 0, this->globalComm, &rreq);
571 MPI_Isend(vertices_rem +
572 4 * this->global_dims.at(0) * this->global_dims.at(1),
573 4 * this->global_dims.at(0) * this->global_dims.at(1), MPIDataTypeT,
574 rank_right, 0, this->globalComm, &sreq);
575
576 // determine the offset in ranks
577 offset_coords[0] = 0;
578 offset_coords[1] = 0;
579 offset_coords[2] = this->local_coords.at(2) - 1;
580
581 rem_coords[2] = offset_coords[2];
582
583 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
584
585#ifdef ALL_DEBUG_ENABLED
586 MPI_Barrier(this->globalComm);
587 if (this->localRank == 0)
588 std::cout << "HISTOGRAM: neighbor_find z-communication" << std::endl;
589#endif
590 // wait for communication
591 MPI_Wait(&sreq, &sstat);
592 MPI_Wait(&rreq, &rstat);
593
594 // iterate about neighbor borders to determine the neighborship relation
595 nNeighbors.at(4) = 0;
596 for (int y = 0; y < this->global_dims.at(1); ++y) {
597 for (int x = 0; x < this->global_dims.at(0); ++x) {
598 if (((vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <=
599 vertices_loc[2] &&
600 vertices_loc[2] <
601 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
602 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <
603 vertices_loc[3] &&
604 vertices_loc[3] <=
605 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
606 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] >=
607 vertices_loc[2] &&
608 vertices_loc[2] <
609 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3] &&
610 vertices_loc[3] >=
611 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3])))
612 if (((vertices_rem[4 * (x + y * this->global_dims.at(0))] <=
613 vertices_loc[0] &&
614 vertices_loc[0] <
615 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
616 (vertices_rem[4 * (x + y * this->global_dims.at(0))] <
617 vertices_loc[1] &&
618 vertices_loc[1] <=
619 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
620 (vertices_rem[4 * (x + y * this->global_dims.at(0))] >=
621 vertices_loc[0] &&
622 vertices_loc[0] <
623 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1] &&
624 vertices_loc[1] >=
625 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]))) {
626 nNeighbors.at(4)++;
627 rem_coords[1] = y;
628 rem_coords[0] = x;
629 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
630 neighbors.push_back(rem_rank);
631 }
632 }
633 }
634
635 // barrier to ensure every process concluded the calculations before
636 // overwriting remote borders!
637 MPI_Barrier(this->globalComm);
638
639 // exchange local column information with upper neighbor in Y direction (cart
640 // grid)
641 MPI_Irecv(vertices_rem, 4 * this->global_dims.at(0) * this->global_dims.at(1),
642 MPIDataTypeT, rank_right, 0, this->globalComm, &rreq);
643 MPI_Isend(vertices_rem +
644 4 * this->global_dims.at(0) * this->global_dims.at(1),
645 4 * this->global_dims.at(0) * this->global_dims.at(1), MPIDataTypeT,
646 rank_left, 0, this->globalComm, &sreq);
647
648 // determine the offset in ranks
649 offset_coords[0] = 0;
650 offset_coords[1] = 0;
651 offset_coords[2] = this->local_coords.at(2) + 1;
652
653 rem_coords[2] = offset_coords[2];
654
655 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
656
657#ifdef ALL_DEBUG_ENABLED
658 MPI_Barrier(this->globalComm);
659 if (this->localRank == 0)
660 std::cout << "HISTOGRAM: neighbor_find z-communication 2" << std::endl;
661#endif
662
663 // wait for communication
664 MPI_Wait(&sreq, &sstat);
665 MPI_Wait(&rreq, &rstat);
666#ifdef ALL_DEBUG_ENABLED
667 if (this->localRank == 0)
668 std::cout << "HISTOGRAM: neighbor_find z-communication 2" << std::endl;
669#endif
670 // iterate about neighbor borders to determine the neighborship relation
671 nNeighbors.at(5) = 0;
672 for (int y = 0; y < this->global_dims.at(1); ++y) {
673 for (int x = 0; x < this->global_dims.at(0); ++x) {
674 if (((vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <=
675 vertices_loc[2] &&
676 vertices_loc[2] <
677 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
678 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] <
679 vertices_loc[3] &&
680 vertices_loc[3] <=
681 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3]) ||
682 (vertices_rem[4 * (x + y * this->global_dims.at(0)) + 2] >=
683 vertices_loc[2] &&
684 vertices_loc[2] <
685 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3] &&
686 vertices_loc[3] >=
687 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 3])))
688 if (((vertices_rem[4 * (x + y * this->global_dims.at(0))] <=
689 vertices_loc[0] &&
690 vertices_loc[0] <
691 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
692 (vertices_rem[4 * (x + y * this->global_dims.at(0))] <
693 vertices_loc[1] &&
694 vertices_loc[1] <=
695 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]) ||
696 (vertices_rem[4 * (x + y * this->global_dims.at(0))] >=
697 vertices_loc[0] &&
698 vertices_loc[0] <
699 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1] &&
700 vertices_loc[1] >=
701 vertices_rem[4 * (x + y * this->global_dims.at(0)) + 1]))) {
702 nNeighbors.at(5)++;
703 rem_coords[1] = y;
704 rem_coords[0] = x;
705 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
706 neighbors.push_back(rem_rank);
707 }
708 }
709 }
710
711 // barrier to ensure every process concluded the calculations before
712 // overwriting remote borders!
713 MPI_Barrier(this->globalComm);
714
715 // clear up vertices array
716 delete[] vertices_loc;
717 delete[] vertices_rem;
718}
719
720// provide list of neighbors
721template <class T, class W>
723 return neighbors;
724}
725
726template <class T, class W>
728{
729 return estimatedEfficiency;
730}
731
732}//namespace ALL
733
734#endif
std::vector< int > & getNeighbors() override
void setup() override
method to setup the loac-balancing method
virtual void setAdditionalData(const void *data) override
Histogram_LB(int d, std::vector< W > w, T g)
void balance(int step) override
Histogram_LB()
default constructor
~Histogram_LB()
default destructor
virtual W getEstimatedEfficiency() override
LB(const int dim, const T g)
Definition ALL_LB.hpp:49
int localRank
local rank in the used MPI communicator
Definition ALL_LB.hpp:212
std::vector< W > work
local work
Definition ALL_LB.hpp:219
std::vector< T > minSize
Definition ALL_LB.hpp:215
MPI_Comm globalComm
used MPI communicator
Definition ALL_LB.hpp:210
int dimension
dimension of the used vertices
Definition ALL_LB.hpp:208
std::vector< T > sysSize
(orthogonal) system size
Definition ALL_LB.hpp:217
std::vector< int > global_dims
dimensions of the global process grid
Definition ALL_LB.hpp:225
virtual void setWork(const std::vector< W > &w)
Definition ALL_LB.hpp:112
std::vector< int > local_coords
cartesian coordinates of the local domain in the process grid
Definition ALL_LB.hpp:227
std::vector< Point< T > > prevVertices
original vertices before previous balancing step
Definition ALL_LB.hpp:223
std::vector< Point< T > > vertices
local vertices after previous balancing step
Definition ALL_LB.hpp:221
Definition ALL.hpp:75