ALL 0.9.3
A Loadbalacing Library
Loading...
Searching...
No Matches
ALL_test.cpp
Go to the documentation of this file.
1/*
2 Copyright 2018 Rene Halver, Forschungszentrum Juelich GmbH, Germany
3 Copyright 2018 Godehard Sutmann, Forschungszentrum Juelich GmbH, Germany
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14
15 3. Neither the name of the copyright holder nor the names of its contributors
16 may be used to endorse or promote products derived from this software without
17 specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include "ALL.hpp"
34#include "ALL_Point.hpp"
35#include <cmath>
36#include <exception>
37#include <fstream>
38#include <iomanip>
39#include <iostream>
40#include <random>
41#include <sstream>
42#include <stdlib.h>
43#include <string>
44#include <vector>
45
46#ifdef ALL_VTK_OUTPUT
47#include <vtkCellArray.h>
48#include <vtkCellLocator.h>
49#include <vtkPointData.h>
50#include <vtkPoints.h>
51#include <vtkPolyData.h>
52#include <vtkSmartPointer.h>
53#include <vtkVersion.h>
54#include <vtkXMLPPolyDataWriter.h>
55#include <vtkXMLPolyDataWriter.h>
56#endif
57
58#define BOX_SIZE 300.0
59#define N_PARTICLES 2080
60#define N_GENERATE 1000
61#define SEED 123789456u
62#define N_LOOP 500
63#define OUTPUT_INTV 1
64#define MAX_NEIG 1024
65#define ALL_HISTOGRAM_DEFAULT_WIDTH 1.0
66
67#ifdef ALL_VORONOI_ACTIVE
68#define ALL_VORONOI_PREP_STEPS 50
69#endif
70
71void generate_points(std::vector<ALL::Point<double>> &points,
72 std::vector<double> l, std::vector<double> u, int *lcoords,
73 int *gcoords, int dimension, int &n, int rank) {
74
75 unsigned seed = SEED + rank;
76 std::default_random_engine generator(seed);
77 std::vector<std::uniform_real_distribution<double>> dist(dimension);
78
79 std::uniform_int_distribution<int> dist2(0, 2 * n);
80 std::uniform_real_distribution<double> weight_dist(0.1, 1.9);
81 n = dist2(generator);
82 n = dist2(generator);
83 int offset = 1;
84 double x = (lcoords[0] < gcoords[0] / 2)
85 ? (double)lcoords[0]
86 : (double)((gcoords[0] - 1) - lcoords[0]);
87 double y = (lcoords[1] < gcoords[1] / 2)
88 ? (double)lcoords[1]
89 : (double)((gcoords[1] - 1) - lcoords[1]);
90 double z = (lcoords[2] < gcoords[2] / 2)
91 ? (double)lcoords[2]
92 : (double)((gcoords[2] - 1) - lcoords[2]);
93 // offset = (int) ( 15.0 - 0.5 * x * y - (5.0 - x) * (5.0 - y) / 2.0 - 2.0 *
94 // std::abs(2.5 - z) ); offset = std::max(offset,0); offset =
95 // dist2(generator); offset = 1 + (int)x * (int)y * (int)z;
96 offset = 1 + (int)x + (int)y + (int)z;
97 n = offset * 64;
98 double weight = weight_dist(generator);
99
100 for (int i = 0; i < dimension; ++i) {
101 dist[i] = std::uniform_real_distribution<double>(l.at(i), u.at(i));
102 }
103
104 double coords[dimension];
105 for (int j = 0; j < n; ++j) {
106 for (int i = 0; i < dimension; ++i) {
107 coords[i] = dist.at(i)(generator);
108 }
109 ALL::Point<double> p(dimension, coords, weight);
110 points.push_back(p);
111 }
112}
113
114/****************************************************************
115 *
116 * input routine for example files
117 *
118 * format of MPI I/O files:
119 *
120 * n_proc int : offset in point data
121 * n_proc int : number of points on domain
122 * long + 4*n_point double : point data
123 ****************************************************************/
124
125void read_points(std::vector<ALL::Point<double>> &points, std::vector<double> l,
126 std::vector<double> u, int &n_points, char *filename,
127 int dimension, int rank, MPI_Comm comm) {
128 MPI_File file;
129 MPI_Barrier(comm);
130 int err =
131 MPI_File_open(comm, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &file);
132 MPI_Barrier(comm);
133
134 n_points = 0;
135
136 int n_ranks;
137 long offset;
138 MPI_Comm_size(comm, &n_ranks);
139
140 if (err) {
141 if (rank == 0)
142 std::cout << "File could not be opened: " << filename << std::endl;
143 MPI_Abort(MPI_COMM_WORLD, -1);
144 }
145
146 // read offset from file
147 MPI_File_read_at(file, (MPI_Offset)(rank * sizeof(long)), &offset, 1,
148 MPI_LONG, MPI_STATUS_IGNORE);
149 // read number of points from file
150 MPI_File_read_at(file,
151 (MPI_Offset)(n_ranks * sizeof(long) + rank * sizeof(int)),
152 &n_points, 1, MPI_INT, MPI_STATUS_IGNORE);
153
154 double values[4];
155 long ID;
156 long block_size = sizeof(long) + 4 * sizeof(double);
157 for (int i = 0; i < n_points; ++i) {
158 MPI_File_read_at(file,
159 (MPI_Offset)((offset + i) * block_size +
160 n_ranks * (sizeof(int) + sizeof(long))),
161 &ID, 1, MPI_DOUBLE, MPI_STATUS_IGNORE);
162 MPI_File_read_at(file,
163 (MPI_Offset)((offset + i) * block_size +
164 n_ranks * (sizeof(int) + sizeof(long)) +
165 sizeof(long)),
166 values, 4, MPI_DOUBLE, MPI_STATUS_IGNORE);
167 ALL::Point<double> p(dimension, &values[0], values[3], ID);
168 points.push_back(p);
169 }
170
171 MPI_File_close(&file);
172
173 /*
174
175 std::ifstream ifile;
176 ifile.open(filename);
177
178 double coords[3];
179 double weight;
180 int id;
181
182 n_points = 0;
183 char line[256];
184
185 while (ifile.getline(line,256))
186 {
187 if (ifile.eof()) break;
188 std::string str(line);
189 std::istringstream istr(str);
190 istr >> id >> coords[0] >> coords[1] >> coords[2] >> weight;
191
192 // check if point is within the local domain
193 if (
194 coords[0] >= l.at(0) && coords[0] < u.at(0) &&
195 coords[1] >= l.at(1) && coords[1] < u.at(1) &&
196 coords[2] >= l.at(2) && coords[2] < u.at(2)
197 )
198 {
199 ALL::Point<double> p(dimension,coords,weight);
200 points.push_back(p);
201 n_points++;
202 }
203 }
204
205 ifile.close();
206 */
207}
208
209#ifdef ALL_VTK_OUTPUT
210// function to create a VTK output of the points in the system
211void print_points(std::vector<ALL::Point<double>> plist, int step,
212 ALL::LB_t method, MPI_Comm comm) {
213 int rank, n_ranks;
214 static bool vtk_init = false;
215 MPI_Comm_rank(comm, &rank);
216 MPI_Comm_size(comm, &n_ranks);
217 vtkMPIController *controller;
218
219 // seperate init step required, since VORONOI does not
220 // use the MPI initialization of VTK within the library
221 if (
222#ifdef ALL_VORONOI_ACTIVE
223 method == ALL::LB_t::VORONOI ||
224#endif
225 method == ALL::LB_t::FORCEBASED) {
226 if (!vtk_init) {
227 controller = vtkMPIController::New();
228 controller->Initialize();
229 controller->SetGlobalController(controller);
230 vtk_init = true;
231 }
232 }
233
234 auto points = vtkSmartPointer<vtkPoints>::New();
235
236 for (auto p : plist)
237 points->InsertNextPoint(p[0], p[1], p[2]);
238
239 auto polydata = vtkSmartPointer<vtkPolyData>::New();
240 polydata->SetPoints(points);
241
242 auto weight = vtkSmartPointer<vtkFloatArray>::New();
243 weight->SetNumberOfComponents(1);
244 weight->SetNumberOfTuples(plist.size());
245 weight->SetName("weight");
246 for (auto i = 0; i < (int)plist.size(); ++i)
247 weight->SetValue(i, plist.at(i).getWeight());
248
249 auto domain = vtkSmartPointer<vtkIntArray>::New();
250 domain->SetNumberOfComponents(1);
251 domain->SetNumberOfTuples(plist.size());
252 domain->SetName("domain");
253 for (auto i = 0; i < (int)plist.size(); ++i)
254 domain->SetValue(i, rank);
255
256 polydata->GetPointData()->AddArray(weight);
257 polydata->GetPointData()->AddArray(domain);
258
259 std::ostringstream ss_local;
260 ss_local << "vtk_points/ALL_vtk_points_" << std::setw(7) << std::setfill('0')
261 << step << "_" << rank << ".vtp";
262
263 auto writer = vtkSmartPointer<vtkXMLPolyDataWriter>::New();
264 writer->SetInputData(polydata);
265 writer->SetFileName(ss_local.str().c_str());
266 writer->SetDataModeToAscii();
267 writer->Write();
268
269 std::ostringstream ss_para;
270 ss_para << "vtk_points/ALL_vtk_points_" << std::setw(7) << std::setfill('0')
271 << step << ".pvtp";
272
273 auto parallel_writer = vtkSmartPointer<vtkXMLPPolyDataWriter>::New();
274 parallel_writer->SetFileName(ss_para.str().c_str());
275 parallel_writer->SetNumberOfPieces(n_ranks);
276 parallel_writer->SetStartPiece(rank);
277 parallel_writer->SetEndPiece(rank);
278 parallel_writer->SetInputData(polydata);
279 parallel_writer->SetDataModeToAscii();
280 parallel_writer->Update();
281 MPI_Barrier(comm);
282 parallel_writer->Write();
283}
284#endif
285
286int main(int argc, char **argv) {
287 try {
288 MPI_Init(&argc, &argv);
289
290 const int sys_dim = 3;
291 int max_loop = N_LOOP;
292 double box_size[sys_dim] = {BOX_SIZE, BOX_SIZE, BOX_SIZE};
293 std::vector<double> sys_size(6);
294 sys_size.at(0) = 0;
295 sys_size.at(1) = BOX_SIZE;
296 sys_size.at(2) = 0;
297 sys_size.at(3) = BOX_SIZE;
298 sys_size.at(4) = 0;
299 sys_size.at(5) = BOX_SIZE;
300
301 std::vector<double> minSize(3, 2.0);
302
303 ALL::LB_t chosen_method = ALL::LB_t::STAGGERED;
304 bool weighted_points = false;
305 char *filename = NULL;
306 double gamma = 16.0;
307 int output_step = 0;
308 int global_dim[sys_dim];
309 for (int d = 0; d < sys_dim; ++d)
310 global_dim[d] = 0;
311
312 // check arguments
313 if (argc < 2) {
314 throw ALL::InvalidArgumentException(__FILE__, __func__, __LINE__,
315 "Wrong number of arguments: ALL_test \
316 [ <int:method> [ <int:max_loop> \
317 [ <double:gamma> [ <int:weighted> \
318 [ <char*:input_file> \
319 [ <double:lx> <double:ly> <double:lz> \
320 [ <int:dx> <int:dy> <int:dz> ]]]]]]]");
321 }
322 if (argc < 3) {
323 // read system dimension from command line
324 chosen_method = (ALL::LB_t)atoi(argv[1]);
325 } else if (argc < 4) {
326 chosen_method = (ALL::LB_t)atoi(argv[1]);
327 max_loop = atoi(argv[2]);
328 } else if (argc < 5) {
329 chosen_method = (ALL::LB_t)atoi(argv[1]);
330 max_loop = atoi(argv[2]);
331 gamma = atof(argv[3]);
332 } else if (argc < 6) {
333 chosen_method = (ALL::LB_t)atoi(argv[1]);
334 max_loop = atoi(argv[2]);
335 gamma = atof(argv[3]);
336 weighted_points = atoi(argv[4]) == 1;
337 } else if (argc < 7) {
338 chosen_method = (ALL::LB_t)atoi(argv[1]);
339 max_loop = atoi(argv[2]);
340 gamma = atof(argv[3]);
341 weighted_points = atoi(argv[4]) == 1;
342 filename = argv[5];
343 } else if (argc == 9) {
344 chosen_method = (ALL::LB_t)atoi(argv[1]);
345 max_loop = atoi(argv[2]);
346 gamma = atof(argv[3]);
347 weighted_points = atoi(argv[4]) == 1;
348 filename = argv[5];
349 box_size[0] = atof(argv[6]);
350 box_size[1] = atof(argv[7]);
351 box_size[2] = atof(argv[8]);
352 sys_size.at(1) = atof(argv[6]);
353 sys_size.at(3) = atof(argv[7]);
354 sys_size.at(5) = atof(argv[8]);
355 } else if (argc == 12) {
356 chosen_method = (ALL::LB_t)atoi(argv[1]);
357 max_loop = atoi(argv[2]);
358 gamma = atof(argv[3]);
359 weighted_points = atoi(argv[4]) == 1;
360 filename = argv[5];
361 box_size[0] = atof(argv[6]);
362 box_size[1] = atof(argv[7]);
363 box_size[2] = atof(argv[8]);
364 global_dim[0] = atoi(argv[9]);
365 global_dim[1] = atoi(argv[10]);
366 global_dim[2] = atoi(argv[11]);
367 sys_size.at(1) = atof(argv[6]);
368 sys_size.at(3) = atof(argv[7]);
369 sys_size.at(5) = atof(argv[8]);
370 }
371
372 // setup of vector of points on each MPI rank
373 std::vector<double> dummy(sys_dim);
374 std::vector<ALL::Point<double>> points;
375 int max_neighbors, loc_neighbors;
376
377 // setup of cartesian communicator
378 int localRank;
379 int n_ranks;
380 MPI_Comm cart_comm;
381 int local_coords[sys_dim];
382 int periodicity[sys_dim];
383 // domain sizes
384 std::vector<double> ds(sys_dim);
385 // domain boundaries
386 std::vector<double> l(sys_dim);
387 std::vector<double> u(sys_dim);
388
389 double min_ratio = 1.01;
390 int min_step = -1;
391
392 for (int i = 0; i < sys_dim; ++i) {
393 periodicity[i] = 1;
394 }
395
396 // get number of total ranks
397 MPI_Comm_size(MPI_COMM_WORLD, &n_ranks);
398
399 if (global_dim[0] == 0) {
400 // get distribution into number of dimensions
401 MPI_Dims_create(n_ranks, sys_dim, global_dim);
402 }
403
404 // create cartesian MPI communicator
405 MPI_Cart_create(MPI_COMM_WORLD, sys_dim, global_dim, periodicity, 1,
406 &cart_comm);
407
408 // get local coordinates
409 MPI_Cart_get(cart_comm, sys_dim, global_dim, periodicity, local_coords);
410
411 // get local rank
412 MPI_Cart_rank(cart_comm, local_coords, &localRank);
413
414 int coords[sys_dim];
415 MPI_Cart_coords(cart_comm, localRank, sys_dim, coords);
416
417 // print out chosen method
418 if (localRank == 0) {
419 switch (chosen_method) {
422 std::cout << "chosen method: TENSOR" << std::endl;
423 break;
425 std::cout << "chosen method: STAGGERED" << std::endl;
426 break;
428 std::cout << "chosen method: FORCEBASED" << std::endl;
429 break;
430#ifdef ALL_VORONOI_ACTIVE
432 std::cout << "chosen method: VORONOI" << std::endl;
433 break;
434#endif
436 std::cout << "chosen method: HISTOGRAM" << std::endl;
437 break;
438 default:
439 throw ALL::InvalidArgumentException(__FILE__, __func__, __LINE__,
440 "Invalid method chosen.");
441 }
442 std::cout << "chosen gamma: " << gamma << std::endl;
443 }
444 // calculate domain extension
445 for (int i = 0; i < sys_dim; ++i) {
446 ds.at(i) = box_size[i] / (double)global_dim[i];
447 l.at(i) = local_coords[i] * ds.at(i);
448 u.at(i) = (1.0 + local_coords[i]) * ds.at(i);
449 }
450
451 // generate vertices (equal to outline (for tensor only))
453 std::vector<ALL::Point<double>> *tv = new std::vector<ALL::Point<double>>(2);
454 tv->at(0) = *tp;
455 delete tv;
456 delete tp;
457
458 int nvertices = 2;
459
460 switch (chosen_method) {
463 nvertices = 2;
464 break;
466 nvertices = 2;
467 break;
469 nvertices = 8;
470 break;
471#ifdef ALL_VORONOI_ACTIVE
473 // the generator point is stored as 'vertex(0)'
474 // the center of work is stored as 'vertex(1)'
475 nvertices = 2;
476 break;
477#endif
479 nvertices = 2;
480 break;
481 default:
482 nvertices = 2;
483 break;
484 }
485
486 ALL::Point<double> lp(l);
487 ALL::Point<double> up(u);
488 std::vector<ALL::Point<double>> vertices(nvertices, lp);
489 std::vector<ALL::Point<double>> new_vertices(nvertices, lp);
490 std::vector<ALL::Point<double>> old_vertices(nvertices, lp);
491
492 switch (chosen_method) {
495 vertices.at(0) = lp;
496 vertices.at(1) = up;
497 break;
499 vertices.at(0) = lp;
500 vertices.at(1) = up;
501 break;
503 for (int i = 0; i < nvertices; ++i)
504 vertices.at(0) = lp;
505 for (int z = 0; z < 2; ++z)
506 for (int y = 0; y < 2; ++y)
507 for (int x = 0; x < 2; ++x) {
508 int vertex = 4 * z + 2 * y + x;
509 vertices.at(vertex)[0] =
510 vertices.at(vertex)[0] + (double)x * ds.at(0);
511 vertices.at(vertex)[1] =
512 vertices.at(vertex)[1] + (double)y * ds.at(1);
513 vertices.at(vertex)[2] =
514 vertices.at(vertex)[2] + (double)z * ds.at(2);
515 }
516 break;
517#ifdef ALL_VORONOI_ACTIVE
519 // first generator point is in the center of the orthogonal,
520 // equidistant decompositon
521 for (int d = 0; d < sys_dim; ++d)
522 vertices.at(0)[d] = 0.5 * (lp[d] + up[d]);
523 vertices.at(1) = vertices.at(0);
524 break;
525#endif
527 vertices.at(0) = lp;
528 vertices.at(1) = up;
529 break;
530 default:
531 throw ALL::InvalidArgumentException(__FILE__, __func__, __LINE__,
532 "Invalid method chosen.");
533 break;
534 }
535
536 // generate points on domains
537 int n_points = N_GENERATE;
538 int max_particles = 1;
539 if (localRank == 0)
540 std::cout << "creating / reading point data" << std::endl;
541 if (argc <= 4) {
542 generate_points(points, l, u, coords, global_dim, sys_dim, n_points,
543 localRank);
544 } else {
545 read_points(points, l, u, n_points, filename, sys_dim, localRank,
546 cart_comm);
547 }
548 double *transfer;
549 double *recv;
550
551 MPI_Allreduce(&n_points, &max_particles, 1, MPI_INT, MPI_MAX, cart_comm);
552
553 max_particles = (int)std::ceil((double)max_particles * 1.5);
554
555 if (localRank == 0)
556 std::cout << "maximum number of points on any process: " << max_particles
557 << std::endl;
558 int max_neig = 27;
559
560 recv = new double[max_neig * (sys_dim + 1) * max_particles];
561 transfer = new double[max_neig * (sys_dim + 1) * max_particles];
562
563 // find neighbors on cartesian communicator
564 int l_neig[sys_dim];
565 int r_neig[sys_dim];
566 int self;
567 for (int i = 0; i < sys_dim; ++i) {
568 MPI_Cart_shift(cart_comm, i, 1, &self, &r_neig[i]);
569 MPI_Cart_shift(cart_comm, i, -1, &self, &l_neig[i]);
570 if (local_coords[i] == 0)
571 l_neig[i] = MPI_PROC_NULL;
572 if (local_coords[i] == global_dim[i] - 1)
573 r_neig[i] = MPI_PROC_NULL;
574 }
575
576 double d_min, d_max, d_ratio;
577 double n_local;
578 if (!weighted_points) {
579 n_local = (double)n_points;
580 } else {
581 n_local = 0.0;
582 for (auto p = points.begin(); p != points.end(); ++p)
583 n_local += p->getWeight();
584 }
585 double n_total;
586 MPI_Allreduce(&n_local, &n_total, 1, MPI_DOUBLE, MPI_SUM, cart_comm);
587 double avg_work = (double)n_total / (double)n_ranks;
588 double n_min, n_max;
589 MPI_Allreduce(&n_local, &n_min, 1, MPI_DOUBLE, MPI_MIN, cart_comm);
590 MPI_Allreduce(&n_local, &n_max, 1, MPI_DOUBLE, MPI_MAX, cart_comm);
591 d_min = n_min / avg_work;
592 d_max = n_max / avg_work;
593 d_ratio = (d_max - d_min) / (d_max + d_min);
594 min_ratio = d_ratio;
595 min_step = 0;
596
597 double total_points;
598 // get starting number of particles
599 MPI_Allreduce(&n_local, &total_points, 1, MPI_DOUBLE, MPI_SUM, cart_comm);
600
601 // output of borders / contents
602 if (n_ranks < 216) {
603 for (int i = 0; i < n_ranks; ++i) {
604 if (localRank == i) {
605 std::ofstream of;
606 of.open("domain_data.dat", std::ios::out | std::ios::app);
607 of << 0 << " " << localRank << " ";
608
609 for (int j = 0; j < sys_dim; ++j) {
610 of << " " << local_coords[j] << " " << lp[j] << " " << up[j] << " ";
611 }
612
613 of << " " << n_local << " ";
614
615 of << std::endl;
616 if (i == n_ranks - 1)
617 of << std::endl;
618 of.close();
619 MPI_Barrier(cart_comm);
620 } else
621 MPI_Barrier(cart_comm);
622 }
623 }
624
625#ifdef ALL_VORONOI_ACTIVE
626 if (chosen_method == ALL::LB_t::VORONOI) {
627 // one-time particle output to voronoi/particles.pov
628
629 for (int i = 0; i < n_ranks; ++i) {
630 if (localRank == i) {
631 std::ofstream of;
632 if (i != 0)
633 of.open("voronoi/particles.pov", std::ios::out | std::ios::app);
634 else
635 of.open("voronoi/particles.pov", std::ios::out);
636 int j = 0;
637 for (auto it = points.begin(); it != points.end(); ++it) {
638 of << "// rank " << localRank << " particle " << j << std::endl;
639 of << "sphere{<" << (*it)[0] << "," << (*it)[1] << "," << (*it)[2]
640 << ">, p}" << std::endl;
641 ++j;
642 }
643 }
644 MPI_Barrier(cart_comm);
645 }
646
647 int minpoints = 0;
648 double cow_sys[sys_dim + 1];
649 double target_point[sys_dim + 1];
650
651 cow_sys[0] = cow_sys[1] = cow_sys[2] = cow_sys[3] = 0.0;
652
653 for (int i = 0; i < n_points; ++i) {
654 for (int d = 0; d < sys_dim; ++d)
655 cow_sys[d] += points.at(i)[d] * points.at(i).getWeight();
656 cow_sys[sys_dim] += points.at(i).getWeight();
657 }
658
659 MPI_Allreduce(cow_sys, target_point, sys_dim + 1, MPI_DOUBLE, MPI_SUM,
660 cart_comm);
661
662 for (int d = 0; d < sys_dim; ++d)
663 target_point[d] /= target_point[sys_dim];
664
665 std::default_random_engine generator(SEED + localRank - 42 * 23);
666 std::uniform_real_distribution<double> dist(-0.25, 0.25);
667
668 // experimental: as a first step try to find more optimal start points
669 for (int i_preloop = 0; i_preloop < ALL_VORONOI_PREP_STEPS; ++i_preloop) {
670 // get neighbor information
671 int nNeighbors = n_ranks;
672 std::vector<double> neighbor_vertices(sys_dim * n_ranks);
673 std::vector<int> neighbors(n_ranks);
674
675 for (int n = 0; n < n_ranks; ++n)
676 neighbors.at(n) = n;
677
678 double local_vertex[sys_dim];
679 if (n_points > 0) {
680 for (auto &v : local_vertex)
681 v = 0.0;
682 double local_work = 0.0;
683
684 for (int p = 0; p < n_points; ++p) {
685 local_work += points.at(p).getWeight();
686 for (int d = 0; d < sys_dim; ++d)
687 local_vertex[d] += points.at(p)[d] * points.at(p).getWeight();
688 }
689
690 for (int d = 0; d < sys_dim; ++d) {
691 if (local_work < 1e-6)
692 local_work = 1.0;
693 local_vertex[d] /= local_work;
694 vertices.at(0)[d] = local_vertex[d];
695 }
696 } else {
697 double diff[3];
698
699 for (int d = 0; d < sys_dim; ++d) {
700 // diff[d] = dist(generator);
701 diff[d] = target_point[d] - local_vertex[d] + dist(generator);
702 }
703
704 double dd = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
705
706 dd = sqrt(dd);
707 if (std::fabs(dd) < 1e-6)
708 dd = 1.0;
709
710 for (int d = 0; d < sys_dim; ++d) {
711 local_vertex[d] = vertices.at(0)[d] + diff[d] / dd;
712 local_vertex[d] = (local_vertex[d] < 0.0)
713 ? local_vertex[d] + 1.0
714 : ((local_vertex[d] > box_size[d])
715 ? local_vertex[d] - box_size[d]
716 : local_vertex[d]);
717 vertices.at(0)[d] = local_vertex[d];
718 }
719 }
720 MPI_Allgather(local_vertex, sys_dim, MPI_DOUBLE,
721 neighbor_vertices.data(), sys_dim, MPI_DOUBLE, cart_comm);
722
723 // compute voronoi cells
724
725 voro::container con_prep(0.0, box_size[0], 0.0, box_size[1], 0.0,
726 box_size[2], ALL_VORONOI_SUBBLOCKS,
728 false, false, false, 10);
729 // add neighbor points first to maintain
730 // mapping to neighbors array
731 for (auto i = 0; i < nNeighbors; ++i) {
732 con_prep.put(i, neighbor_vertices.at(3 * i),
733 neighbor_vertices.at(3 * i + 1),
734 neighbor_vertices.at(3 * i + 2));
735 }
736
737 /* OUTPUT: Preparation steps */
738
739 /*
740 std::ostringstream ss_local_gp_pov2;
741 ss_local_gp_pov2 << "voronoi/prep_points_"
742 << std::setw(7) << std::setfill('0')
743 << i_preloop << ".pov";
744 std::ostringstream ss_local_vc_pov2;
745 ss_local_vc_pov2 << "voronoi/prep_cells_"
746 << std::setw(7) << std::setfill('0')
747 << i_preloop << ".pov";
748
749 if (localRank == 0)
750 {
751 con_prep.draw_particles_pov(ss_local_gp_pov2.str().c_str());
752 con_prep.draw_cells_pov(ss_local_vc_pov2.str().c_str());
753 }
754 */
755
756 // collect particles that left the domain
757 // and determine to which domain they will be transfered
758
759 std::vector<std::vector<double>> remote_particles(nNeighbors);
760
761 for (auto it = points.begin(); it != points.end(); ++it) {
762 double x, y, z;
763 int pos;
764
765 con_prep.find_voronoi_cell((*it)[0], (*it)[1], (*it)[2], x, y, z,
766 pos);
767 remote_particles.at(pos).push_back((*it)[0]);
768 remote_particles.at(pos).push_back((*it)[1]);
769 remote_particles.at(pos).push_back((*it)[2]);
770 remote_particles.at(pos).push_back(it->getWeight());
771 }
772
773 points.clear();
774 n_points = 0;
775 // exchange number of particles to be exchanged
776
777 int remote_s[nNeighbors];
778 int remote_r[nNeighbors];
779 MPI_Request request_s[nNeighbors];
780 MPI_Request request_r[nNeighbors];
781 MPI_Status status_s[nNeighbors];
782 MPI_Status status_r[nNeighbors];
783
784 for (auto i = 0; i < nNeighbors; ++i) {
785 // send number of values to be send
786 remote_s[i] = remote_particles.at(i).size();
787 MPI_Isend(&remote_s[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
788 &request_s[i]);
789 MPI_Irecv(&remote_r[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
790 &request_r[i]);
791 }
792
793 MPI_Waitall(nNeighbors, request_s, status_s);
794 MPI_Waitall(nNeighbors, request_r, status_r);
795
796 /*
797 for (int i = 0; i < 25; ++i)
798 {
799 if (localRank == i)
800 {
801 std::cout << localRank << ": ";
802 for (int j = 0; j < nNeighbors; ++j)
803 std::cout << " " << neighbors.at(j) << " ";
804 std::cout << "| " << nNeighbors << std::endl;
805 }
806 MPI_Barrier(cart_comm);
807 }
808
809 for (int i = 0; i < 25; ++i)
810 {
811 if (localRank == i)
812 {
813 std::cout << localRank << ": ";
814 for (int j = 0; j < nNeighbors; ++j)
815 std::cout << " " << remote_s[j] << " / " << remote_r[j] << " ";
816 std::cout << n_points << std::endl;
817 }
818 MPI_Barrier(cart_comm);
819 }
820 */
821
822 std::vector<std::vector<double>> received_particles(nNeighbors);
823 for (auto i = 0; i < nNeighbors; ++i) {
824 if (remote_r[i] > 0) {
825 received_particles.at(i).resize(remote_r[i]);
826 MPI_Irecv(received_particles.at(i).data(), remote_r[i], MPI_DOUBLE,
827 neighbors.at(i), 4000, cart_comm, &request_r[i]);
828 } else
829 request_r[i] = MPI_REQUEST_NULL;
830 if (remote_s[i] > 0)
831 MPI_Isend(remote_particles.at(i).data(), remote_s[i], MPI_DOUBLE,
832 neighbors.at(i), 4000, cart_comm, &request_s[i]);
833 else
834 request_s[i] = MPI_REQUEST_NULL;
835 }
836
837 MPI_Waitall(nNeighbors, request_s, status_s);
838 MPI_Waitall(nNeighbors, request_r, status_r);
839
840 for (auto i = 0; i < nNeighbors; ++i) {
841 for (auto j = 0; j < remote_r[i]; j += 4) {
842 ALL::Point<double> tmp_point(3, received_particles.at(i).data() + j,
843 received_particles.at(i).at(j + 3));
844 points.push_back(tmp_point);
845 n_points++;
846 }
847 }
848
849 /*
850 int check;
851 MPI_Reduce(&n_points,&check,1,MPI_INT,MPI_SUM,0,cart_comm);
852 MPI_Allreduce(&n_points,&minpoints,1,MPI_INT,MPI_MIN,cart_comm);
853 if (localRank == 0) std::cout << "minpoints: " << minpoints <<
854 std::endl;
855 */
856
857 // output of borders / contents
858 for (int i = 0; i < n_ranks; ++i) {
859 if (localRank == i) {
860 std::ofstream of;
861 if (!weighted_points)
862 of.open("prep_data.dat", std::ios::out | std::ios::app);
863 else
864 of.open("prep_data_w.dat", std::ios::out | std::ios::app);
865 of << (i_preloop + 1) << " " << localRank << " ";
866
867 of << " " << vertices.at(0)[0] << " " << vertices.at(0)[1] << " "
868 << vertices.at(0)[2] << " " << n_points << std::endl;
869
870 if (i == n_ranks - 1)
871 of << std::endl;
872 of.close();
873 MPI_Barrier(cart_comm);
874 } else
875 MPI_Barrier(cart_comm);
876 }
877 }
878 }
879#endif
880 double limit_efficiency = 0.5;
881 // create ALL object
882 ALL::ALL<double, double> lb_obj(chosen_method, sys_dim, vertices, gamma);
883 std::vector<int> local_coordinates(local_coords, local_coords+sys_dim);
884 std::vector<int> global_dimensions(global_dim, global_dim+sys_dim);
885 lb_obj.setProcGridParams(local_coordinates, global_dimensions);
886 lb_obj.setCommunicator(MPI_COMM_WORLD);
887 lb_obj.setup();
888 for (int i_loop = 0; i_loop < max_loop; ++i_loop) {
889 MPI_Barrier(cart_comm);
890 if (d_ratio < limit_efficiency) {
891 gamma *= 2.0;
892 limit_efficiency /= 2.0;
893 }
894 if (localRank == 0)
895 std::cout << "loop " << i_loop << ": " << std::endl;
896 std::vector<double> work;
897 std::vector<int> n_bins(3, -1);
898
899 // double histogram_width = ALL_HISTOGRAM_DEFAULT_WIDTH / (double)(i_loop
900 // + 1);
901 double histogram_width = ALL_HISTOGRAM_DEFAULT_WIDTH / gamma;
902
903 if (!weighted_points) {
904 // work -> number of points on domain
905#ifdef ALL_VORONOI_ACTIVE
906 if (chosen_method == ALL::LB_t::VORONOI) {
907 ALL::Point<double> cow(3);
908 for (int i = 0; i < 3; ++i)
909 cow[i] = 0.0;
910 for (auto p : points) {
911 cow = cow + (p * (1.0 / n_points));
912 }
913 if (points.size() != 0)
914 vertices.at(1) = cow;
915 else
916 vertices.at(1) = vertices.at(0);
917 }
918#endif
919 // compute work histogram
920 if (chosen_method != ALL::LB_t::HISTOGRAM)
921 work = std::vector<double>(1, (double)n_points);
922 else {
923 double lb(-1.0);
924 double ub(-1.0);
925 double overlap(0.0);
926 int d = 2 - i_loop % 3;
927 // compute number of bins in each direction
928 lb = std::ceil(lp[d] / histogram_width) * histogram_width;
929 ub = std::ceil(up[d] / histogram_width) * histogram_width;
930 n_bins.at(d) = (ub - lb) / histogram_width;
931
932 work = std::vector<double>(n_bins.at(d), 0.0);
933 // compute histogram of work load
934 for (auto p : points) {
935 int idx = (int)std::floor(((p[d] - lb) / histogram_width));
936 if (idx >= 0) {
937 work.at(idx) += 1.0;
938 } else
939 overlap += 1.0;
940 }
941
942 // exchange overlapping workload (histograms might overlap
943 // over the domain boundaries
944 int rank_left, rank_right;
945 MPI_Cart_shift(cart_comm, 0, 1, &rank_left, &rank_right);
946
947 MPI_Request sreq, rreq;
948 MPI_Status ssta, rsta;
949
950 double recv_work;
951
952 MPI_Isend(&overlap, 1, MPI_DOUBLE, rank_left, 0, cart_comm, &sreq);
953 MPI_Irecv(&recv_work, 1, MPI_DOUBLE, rank_right, 0, cart_comm, &rreq);
954 MPI_Wait(&sreq, &ssta);
955 MPI_Wait(&rreq, &rsta);
956
957 if (local_coords[d] != global_dim[d] - 1)
958 work.at(n_bins.at(d) - 1) += recv_work;
959 }
960 } else {
961 // work -> weighted number of points on domain
962 if (chosen_method != ALL::LB_t::HISTOGRAM) {
963 work = std::vector<double>(1, 0.0);
964 for (auto p = points.begin(); p != points.end(); ++p)
965 work.at(0) += p->getWeight();
966 } else {
967 double lb(-1.0);
968 double ub(-1.0);
969 double overlap(0.0);
970 int d = 2 - i_loop % 3;
971 // compute number of bins in each direction
972 lb = std::ceil(lp[d] / histogram_width) * histogram_width;
973 ub = std::ceil(up[d] / histogram_width) * histogram_width;
974 n_bins.at(d) = (ub - lb) / histogram_width;
975
976 work = std::vector<double>(n_bins.at(d), 0.0);
977 // compute histogram of work load
978 for (auto p : points) {
979 int idx = (int)std::floor(((p[d] - lb) / histogram_width));
980 if (idx >= 0) {
981 work.at(idx) += p.getWeight();
982 } else
983 overlap += p.getWeight();
984 }
985
986 // exchange overlapping workload (histograms might overlap
987 // over the domain boundaries
988 int rank_left, rank_right;
989 MPI_Cart_shift(cart_comm, 0, 1, &rank_left, &rank_right);
990
991 MPI_Request sreq, rreq;
992 MPI_Status ssta, rsta;
993
994 double recv_work;
995
996 MPI_Isend(&overlap, 1, MPI_DOUBLE, rank_left, 0, cart_comm, &sreq);
997 MPI_Irecv(&recv_work, 1, MPI_DOUBLE, rank_right, 0, cart_comm, &rreq);
998 MPI_Wait(&sreq, &ssta);
999 MPI_Wait(&rreq, &rsta);
1000
1001 if (local_coords[d] != global_dim[d] - 1)
1002 work.at(n_bins.at(d) - 1) += recv_work;
1003 }
1004#ifdef ALL_VORONOI_ACTIVE
1005 if (chosen_method == ALL::LB_t::VORONOI) {
1006 if (points.size() > 0) {
1007 ALL::Point<double> cow(3);
1008 for (int i = 0; i < 3; ++i)
1009 cow[i] = 0.0;
1010 for (auto p : points) {
1011 cow = cow + (p * (1.0 / (work.at(0) * p.getWeight())));
1012 }
1013 vertices.at(1) = cow;
1014 } else
1015 vertices.at(1) = vertices.at(0);
1016 }
1017#endif
1018 }
1019#ifdef ALL_DEBUG_ENABLED
1020 MPI_Barrier(cart_comm);
1021 if (localRank == 0)
1022 std::cout << "finished computation of work" << std::endl;
1023 // lb_obj.setWork((double)n_points);
1024
1025 if (localRank == 0)
1026 std::cout << "Setting work..." << std::endl;
1027#endif
1028 lb_obj.setWork(work);
1029#ifdef ALL_DEBUG_ENABLED
1030 MPI_Barrier(cart_comm);
1031 if (localRank == 0)
1032 std::cout << "Setting process grid information..." << std::endl;
1033#endif
1034#ifdef ALL_DEBUG_ENABLED
1035 MPI_Barrier(cart_comm);
1036 if (localRank == 0)
1037 std::cout << "Setting communicator..." << std::endl;
1038#endif
1039 lb_obj.setCommunicator(MPI_COMM_WORLD);
1040#ifdef ALL_DEBUG_ENABLED
1041 MPI_Barrier(cart_comm);
1042 if (localRank == 0)
1043 std::cout << "Setting up method..." << std::endl;
1044#endif
1045#ifdef ALL_DEBUG_ENABLED
1046 MPI_Barrier(cart_comm);
1047 if (localRank == 0)
1048 std::cout << "Setting method data..." << std::endl;
1049#endif
1050 if (chosen_method == ALL::LB_t::HISTOGRAM)
1051 lb_obj.setMethodData(n_bins.data());
1052
1053#ifdef ALL_DEBUG_ENABLED
1054 MPI_Barrier(cart_comm);
1055 if (localRank == 0)
1056 std::cout << "Setting system size..." << std::endl;
1057#endif
1058 lb_obj.setSysSize(sys_size);
1059#ifdef ALL_DEBUG_ENABLED
1060 MPI_Barrier(cart_comm);
1061 if (localRank == 0)
1062 std::cout << "Setting domain vertices..." << std::endl;
1063#endif
1064 lb_obj.setVertices(vertices);
1065#ifdef ALL_DEBUG_ENABLED
1066 MPI_Barrier(cart_comm);
1067 if (localRank == 0)
1068 std::cout << "Setting domain minimum size..." << std::endl;
1069#endif
1070 lb_obj.setMinDomainSize(minSize);
1071 // print out number of particles to check communication!
1072 int n_points_global = 0;
1073 MPI_Reduce(&n_points, &n_points_global, 1, MPI_INT, MPI_SUM, 0,
1074 MPI_COMM_WORLD);
1075 if (localRank == 0)
1076 std::cout << "number of particles in step " << i_loop << ": "
1077 << n_points_global << std::endl;
1078#ifdef ALL_DEBUG_ENABLED
1079 MPI_Barrier(cart_comm);
1080 if(localRank == 0)
1081 std::cout << "Providing current LB efficiency" << std::endl;
1082#endif
1083 double LBefficiency = lb_obj.getEfficiency();
1084 if (localRank == 0)
1085 std::cout << "current LB efficieny: " << LBefficiency << std::endl;
1086
1087#ifdef ALL_DEBUG_ENABLED
1088 MPI_Barrier(cart_comm);
1089 if (localRank == 0)
1090 std::cout << "Balancing domains..." << std::endl;
1091#endif
1092 lb_obj.balance();
1093#ifdef ALL_DEBUG_ENABLED
1094 MPI_Barrier(cart_comm);
1095 if (localRank == 0)
1096 std::cout << "Providing estimated new LB efficiency" << std::endl;
1097#endif
1098 if (localRank == 0)
1099 std::cout << "Estimated LB efficency: "
1100 << lb_obj.getEstimatedEfficiency()
1101 << std::endl;
1102
1103#ifdef ALL_DEBUG_ENABLED
1104 MPI_Barrier(cart_comm);
1105 if (localRank == 0)
1106 std::cout << "Updating vertices..." << std::endl;
1107#endif
1108 new_vertices = lb_obj.getVertices();
1109 old_vertices = vertices;
1110 vertices = new_vertices;
1111
1112#ifdef ALL_VORONOI_ACTIVE
1113 if (chosen_method == ALL::LB_t::VORONOI) {
1114#ifdef ALL_DEBUG_ENABLED
1115 MPI_Barrier(cart_comm);
1116 if (localRank == 0)
1117 std::cout << "Updating vertices (Voronoi)..." << std::endl;
1118#endif
1119 vertices.resize(2);
1120 vertices.at(1) = vertices.at(0);
1121 }
1122#endif
1123#ifdef ALL_DEBUG_ENABLED
1124 MPI_Barrier(cart_comm);
1125 if (localRank == 0)
1126 std::cout << "Updating upper / lower vertices..." << std::endl;
1127#endif
1128 lp = vertices.at(0);
1129 up = vertices.at(vertices.size() - 1);
1130
1131#ifdef ALL_DEBUG_ENABLED
1132 MPI_Barrier(cart_comm);
1133 if (localRank == 0)
1134 std::cout << "beginning particles transfer..." << std::endl;
1135#endif
1136 switch (chosen_method) {
1138 case ALL::LB_t::TENSOR: {
1139 int n_transfer[2];
1140 int n_recv[2];
1141 for (int i = 0; i < sys_dim; ++i) {
1142 for (int j = 0; j < 2; ++j) {
1143 n_transfer[j] = 0;
1144 n_recv[j] = 0;
1145 }
1146 for (auto P = points.begin(); P != points.end(); ++P) {
1147 ALL::Point<double> p = *P;
1148 if (p[i] < vertices.at(0)[i]) {
1149 // copy particles that left the domain
1150 // to the transfer array
1151 for (int j = 0; j < sys_dim; j++) {
1152 if (p[j] < 0.0)
1153 p[j] = p[j] + box_size[j];
1154 transfer[n_transfer[0] * (sys_dim + 1) + j] = p[j];
1155 }
1156 transfer[n_transfer[0] * (sys_dim + 1) + sys_dim] = p.getWeight();
1157 n_transfer[0]++;
1158 } else if (p[i] >= vertices.at(1)[i]) {
1159 // copy particles that left the domain to the transfer array
1160 for (int j = 0; j < sys_dim; j++) {
1161 if (p[j] > box_size[j])
1162 p[j] = p[j] - box_size[j];
1163 transfer[(sys_dim + 1) * max_particles +
1164 n_transfer[1] * (sys_dim + 1) + j] = p[j];
1165 }
1166 transfer[(sys_dim + 1) * max_particles +
1167 n_transfer[1] * (sys_dim + 1) + sys_dim] = p.getWeight();
1168 n_transfer[1]++;
1169 }
1170 }
1171 for (auto P = points.begin(); P != points.end(); ++P) {
1172 ALL::Point<double> p = *P;
1173 if (p[i] < vertices.at(0)[i] || p[i] >= vertices.at(1)[i]) {
1174 points.erase(P);
1175 n_points--;
1176 P--;
1177 }
1178 }
1179 MPI_Status status;
1180
1181 MPI_Request sreq_r, rreq_r;
1182 MPI_Request sreq_l, rreq_l;
1183 MPI_Status sstatus, rstatus;
1184
1185 MPI_Irecv(&n_recv[1], 1, MPI_INT, r_neig[i], 10, cart_comm, &rreq_r);
1186 MPI_Irecv(&n_recv[0], 1, MPI_INT, l_neig[i], 20, cart_comm, &rreq_l);
1187
1188 MPI_Isend(&n_transfer[0], 1, MPI_INT, l_neig[i], 10, cart_comm,
1189 &sreq_l);
1190 MPI_Isend(&n_transfer[1], 1, MPI_INT, r_neig[i], 20, cart_comm,
1191 &sreq_r);
1192
1193 MPI_Wait(&sreq_l, &sstatus);
1194 MPI_Wait(&sreq_r, &rstatus);
1195
1196 MPI_Wait(&rreq_l, &sstatus);
1197 MPI_Wait(&rreq_r, &rstatus);
1198
1199 // send particles to corresponding neighbor
1200 MPI_Irecv(&recv[max_particles * (sys_dim + 1)],
1201 (sys_dim + 1) * n_recv[1], MPI_DOUBLE, r_neig[i], 30,
1202 cart_comm, &rreq_r);
1203 MPI_Irecv(&recv[0], (sys_dim + 1) * n_recv[0], MPI_DOUBLE, l_neig[i],
1204 40, cart_comm, &rreq_l);
1205
1206 MPI_Isend(&transfer[0], (sys_dim + 1) * n_transfer[0], MPI_DOUBLE,
1207 l_neig[i], 30, cart_comm, &sreq_l);
1208 MPI_Isend(&transfer[max_particles * (sys_dim + 1)],
1209 (sys_dim + 1) * n_transfer[1], MPI_DOUBLE, r_neig[i], 40,
1210 cart_comm, &sreq_r);
1211
1212 MPI_Wait(&sreq_l, &sstatus);
1213 MPI_Wait(&sreq_r, &rstatus);
1214
1215 MPI_Wait(&rreq_l, &sstatus);
1216 MPI_Wait(&rreq_r, &rstatus);
1217
1218 for (int j = 0; j < n_recv[0]; ++j) {
1219 ALL::Point<double> p(sys_dim, &recv[j * (sys_dim + 1)],
1220 recv[j * (sys_dim + 1) + sys_dim]);
1221 points.push_back(p);
1222 n_points++;
1223 }
1224 for (int j = 0; j < n_recv[1]; ++j) {
1226 sys_dim, &recv[(max_particles + j) * (sys_dim + 1)],
1227 recv[(max_particles + j) * (sys_dim + 1) + sys_dim]);
1228 points.push_back(p);
1229 n_points++;
1230 }
1231 }
1232 } break;
1233 case ALL::LB_t::STAGGERED: {
1234 int n_transfer[2];
1235 int n_recv[2 * MAX_NEIG];
1236 int offset_neig[2];
1237 std::vector<int> neighbors = lb_obj.getNeighbors();
1238 int *nNeighbors;
1239 nNeighbors = neighbors.data();
1240
1241 offset_neig[0] = 0;
1242 offset_neig[1] = nNeighbors[0];
1243
1244 loc_neighbors = 0;
1245 for (int n = 0; n < 6; ++n)
1246 loc_neighbors += nNeighbors[n];
1247
1248 MPI_Allreduce(&loc_neighbors, &max_neighbors, 1, MPI_INT, MPI_MAX,
1249 cart_comm);
1250
1251 for (int i = 0; i < sys_dim; ++i) {
1252
1253 if (nNeighbors[i] + nNeighbors[i + 1] > max_neig) {
1254 std::cout << ">>> resizing transfer buffers from " << max_neig
1255 << " neighbors ";
1256 max_neig = (int)(1.5 * (double)(nNeighbors[i] + nNeighbors[i + 1]));
1257 std::cout << "to " << max_neig << " neighbors on process "
1258 << localRank << std::endl;
1259 delete[] transfer;
1260 delete[] recv;
1261 recv = new double[max_neig * (sys_dim + 1) * max_particles];
1262 transfer = new double[max_neig * (sys_dim + 1) * max_particles];
1263 }
1264 // MPI_Barrier(cart_comm);
1265 // if (localRank == 0)
1266 // std::cout << "filling buffers with remote points, dim "
1267 // << i << std::endl;
1268
1269 for (int j = 0; j < 2; ++j) {
1270 n_transfer[j] = 0;
1271 }
1272
1273 for (auto P = points.begin(); P != points.end(); ++P) {
1274 ALL::Point<double> p = *P;
1275 if (p[i] < vertices.at(0)[i]) {
1276 // periodic correction
1277 if (p[i] < 0.0)
1278 p[i] += box_size[i];
1279 // copy particles that left the domain
1280 // to the transfer array
1281 for (int d = 0; d < sys_dim; ++d)
1282 transfer[n_transfer[0] * (sys_dim + 1) + d] = p[d];
1283 transfer[n_transfer[0] * (sys_dim + 1) + sys_dim] = p.getWeight();
1284 n_transfer[0]++;
1285 if (n_transfer[0] > max_particles) {
1286 std::stringstream ss;
1287 ss << "Trying to send more particles than buffer size allows! "
1288 << " n_transfer: " << n_transfer[0]
1289 << " max_particles: " << max_particles;
1290 throw ALL::InvalidArgumentException(__FILE__, __func__, __LINE__,
1291 ss.str().c_str());
1292 }
1293 } else if (p[i] >= vertices.at(1)[i]) {
1294 // periodic correction
1295 if (p[i] >= box_size[i])
1296 p[i] -= box_size[i];
1297 // copy particles that left the domain
1298 // to the transfer array
1299 for (int d = 0; d < sys_dim; ++d) {
1300 transfer[(sys_dim + 1) * max_particles +
1301 n_transfer[1] * (sys_dim + 1) + d] = p[d];
1302 }
1303 transfer[(sys_dim + 1) * max_particles +
1304 n_transfer[1] * (sys_dim + 1) + sys_dim] = p.getWeight();
1305 n_transfer[1]++;
1306 if (n_transfer[1] > max_particles) {
1307 std::stringstream ss;
1308 ss << "Trying to send more particles than buffer size allows! "
1309 << " n_transfer: " << n_transfer[0]
1310 << " max_particles: " << max_particles;
1311 throw ALL::InvalidArgumentException(__FILE__, __func__, __LINE__,
1312 ss.str().c_str());
1313 }
1314 }
1315 }
1316
1317 // MPI_Barrier(cart_comm);
1318 // if (localRank == 0)
1319 // std::cout << "cleaning point vector, dim " << i << std::endl;
1320
1321 auto rem_it = std::remove_if(
1322 points.begin(), points.end(), [vertices, i](ALL::Point<double> p) {
1323 return p[i] < vertices.at(0)[i] || p[i] >= vertices.at(1)[i];
1324 });
1325 points.erase(rem_it, points.end());
1326 n_points = points.size();
1327
1328 // MPI_Barrier(cart_comm);
1329 // if (localRank == 0)
1330 // std::cout << "exchanging number of sent points, dim "
1331 // << i << std::endl;
1332
1333 // TODO -> better estimation than max 1024 neighbors ...
1334 MPI_Request sreq_r[MAX_NEIG], rreq_r[MAX_NEIG];
1335 MPI_Request sreq_l[MAX_NEIG], rreq_l[MAX_NEIG];
1336 MPI_Status lsstatus[MAX_NEIG], rsstatus[MAX_NEIG];
1337 MPI_Status lrstatus[MAX_NEIG], rrstatus[MAX_NEIG];
1338
1339 for (int d = 0; d < nNeighbors[2 * i]; ++d) {
1340 MPI_Irecv(&n_recv[d], 1, MPI_INT, neighbors.at(offset_neig[0] + d),
1341 20, cart_comm, &rreq_l[d]);
1342 MPI_Isend(&n_transfer[0], 1, MPI_INT,
1343 neighbors.at(offset_neig[0] + d), 10, cart_comm,
1344 &sreq_l[d]);
1345 }
1346 for (int d = 0; d < nNeighbors[2 * i + 1]; ++d) {
1347 MPI_Irecv(&n_recv[MAX_NEIG + d], 1, MPI_INT,
1348 neighbors.at(offset_neig[1] + d), 10, cart_comm,
1349 &rreq_r[d]);
1350 MPI_Isend(&n_transfer[1], 1, MPI_INT,
1351 neighbors.at(offset_neig[1] + d), 20, cart_comm,
1352 &sreq_r[d]);
1353 }
1354
1355 if (nNeighbors[2 * i] > MAX_NEIG) {
1357 __FILE__, __func__, __LINE__,
1358 "Number of neighbors higher than expected \
1359 maximum number of neighbors.");
1360 }
1361
1362 if (nNeighbors[2 * i + 1] > MAX_NEIG) {
1364 __FILE__, __func__, __LINE__,
1365 "Number of neighbors higher than expected \
1366 maximum number of neighbors.");
1367 }
1368
1369 MPI_Waitall(nNeighbors[2 * i], sreq_l, lsstatus);
1370 MPI_Waitall(nNeighbors[2 * i + 1], sreq_r, lrstatus);
1371
1372 MPI_Waitall(nNeighbors[2 * i], rreq_l, rsstatus);
1373 MPI_Waitall(nNeighbors[2 * i + 1], rreq_r, rrstatus);
1374
1375 // MPI_Barrier(cart_comm);
1376 // if (localRank == 0)
1377 // std::cout << "exchanging point data, dim " << i << std::endl;
1378
1379 int offset_l = 0;
1380 // send particles to corresponding neighbor
1381 for (int d = 0; d < nNeighbors[2 * i]; ++d) {
1382 MPI_Irecv(&recv[offset_l * (sys_dim + 1)],
1383 (sys_dim + 1) * n_recv[d], MPI_DOUBLE,
1384 neighbors.at(offset_neig[0] + d), 40, cart_comm,
1385 &rreq_l[d]);
1386 MPI_Isend(&transfer[0], (sys_dim + 1) * n_transfer[0], MPI_DOUBLE,
1387 neighbors.at(offset_neig[0] + d), 30, cart_comm,
1388 &sreq_l[d]);
1389 offset_l += n_recv[d];
1390 }
1391
1392 int offset_r = 0;
1393 for (int d = 0; d < nNeighbors[2 * i + 1]; ++d) {
1394 MPI_Irecv(
1395 &recv[max_particles * (sys_dim + 1) + offset_r * (sys_dim + 1)],
1396 (sys_dim + 1) * n_recv[MAX_NEIG + d], MPI_DOUBLE,
1397 neighbors.at(offset_neig[1] + d), 30, cart_comm, &rreq_r[d]);
1398 MPI_Isend(&transfer[max_particles * (sys_dim + 1)],
1399 (sys_dim + 1) * n_transfer[1], MPI_DOUBLE,
1400 neighbors.at(offset_neig[1] + d), 40, cart_comm,
1401 &sreq_r[d]);
1402 offset_r += n_recv[MAX_NEIG + d];
1403 }
1404
1405 MPI_Waitall(nNeighbors[2 * i], sreq_l, lsstatus);
1406 MPI_Waitall(nNeighbors[2 * i + 1], sreq_r, lrstatus);
1407
1408 MPI_Waitall(nNeighbors[2 * i], rreq_l, rsstatus);
1409 MPI_Waitall(nNeighbors[2 * i + 1], rreq_r, rrstatus);
1410
1411 // MPI_Barrier(cart_comm);
1412 // if (localRank == 0)
1413 // std::cout << "adding received point data, dim "
1414 // << i << std::endl;
1415 for (int j = 0; j < offset_l; ++j) {
1416 ALL::Point<double> p(sys_dim, &recv[j * (sys_dim + 1)],
1417 recv[j * (sys_dim + 1) + sys_dim]);
1418 // if the particle does not belong to the local domain skip it
1419 bool outside = false;
1420 for (int d = 0; d < i; ++d)
1421 outside = outside ||
1422 (p[d] < vertices.at(0)[d] || p[d] >= vertices.at(1)[d]);
1423 if (outside)
1424 continue;
1425 points.push_back(p);
1426 n_points++;
1427 }
1428 for (int j = 0; j < offset_r; ++j) {
1430 sys_dim, &recv[(max_particles + j) * (sys_dim + 1)],
1431 recv[(max_particles + j) * (sys_dim + 1) + sys_dim]);
1432 // if the particle does not belong to the local domain skip it
1433 bool outside = false;
1434 for (int d = 0; d < i; ++d)
1435 outside = outside ||
1436 (p[d] < vertices.at(0)[d] || p[d] >= vertices.at(1)[d]);
1437 if (outside)
1438 continue;
1439 points.push_back(p);
1440 n_points++;
1441 }
1442
1443 offset_neig[0] = offset_neig[1] + nNeighbors[2 * i + 1];
1444 if (i < sys_dim - 1)
1445 offset_neig[1] = offset_neig[0] + nNeighbors[2 * (i + 1)];
1446 }
1447 } break;
1448 case ALL::LB_t::FORCEBASED: {
1449
1450 // array of all vertices of all neighbors (and self)
1451 double comm_vertices[27 * 8 * 3];
1452
1453 std::vector<int> neighbors(27);
1454 int neig = 0;
1455 int coords[3];
1456 bool exists[27];
1457
1458 // get list of neighbors
1459 for (int z = -1; z <= 1; ++z) {
1460 coords[2] = local_coords[2] + z;
1461 for (int y = -1; y <= 1; ++y) {
1462 coords[1] = local_coords[1] + y;
1463 for (int x = -1; x <= 1; ++x) {
1464 coords[0] = local_coords[0] + x;
1465 if (coords[0] >= 0 && coords[0] < global_dim[0] &&
1466 coords[1] >= 0 && coords[1] < global_dim[1] &&
1467 coords[2] >= 0 && coords[2] < global_dim[2]) {
1468 MPI_Cart_rank(cart_comm, coords, &neighbors.at(neig));
1469 exists[neig] = true;
1470 } else {
1471 neighbors.at(neig) = MPI_PROC_NULL;
1472 exists[neig] = false;
1473 }
1474 ++neig;
1475 }
1476 }
1477 }
1478 // no communication to self
1479 neighbors.at(13) = MPI_PROC_NULL;
1480
1481 for (int i = 0; i < 27 * 8 * 3; ++i)
1482 comm_vertices[i] = -1.0;
1483
1484 // copy local indices to position 13
1485 for (int i = 0; i < 8; ++i) {
1486 for (int d = 0; d < 3; ++d) {
1487 comm_vertices[13 * 8 * 3 + 3 * i + d] = vertices.at(i)[d];
1488 }
1489 }
1490
1491 int offset = 0;
1492 int comm_size = 0;
1493 MPI_Request request[54];
1494 MPI_Status status[54];
1495
1496 for (int i = 0; i < 54; ++i)
1497 request[i] = MPI_REQUEST_NULL;
1498
1499 // send local indices to neighbors
1500 for (int d = 0; d < 3; ++d) {
1501 // offset for the buffer
1502 comm_size = (int)std::pow(3, d);
1503
1504 // local coords
1505 coords[0] = local_coords[0];
1506 coords[1] = local_coords[1];
1507 coords[2] = local_coords[2];
1508
1509 int low_neig;
1510 int up_neig;
1511
1512 coords[d] -= 1;
1513 if (coords[d] >= 0)
1514 MPI_Cart_rank(cart_comm, coords, &low_neig);
1515 else
1516 low_neig = MPI_PROC_NULL;
1517 coords[d] += 2;
1518 if (coords[d] < global_dim[d])
1519 MPI_Cart_rank(cart_comm, coords, &up_neig);
1520 else
1521 up_neig = MPI_PROC_NULL;
1522
1523 MPI_Isend(&comm_vertices[(13 - offset) * 24], comm_size * 24,
1524 MPI_DOUBLE, low_neig, 1010, cart_comm, &request[0]);
1525 MPI_Isend(&comm_vertices[(13 - offset) * 24], comm_size * 24,
1526 MPI_DOUBLE, up_neig, 2010, cart_comm, &request[1]);
1527
1528 // increase offset here, as the previous offset gives
1529 // the start address of the whole dimension
1530 // data to be transfered
1531
1532 offset += comm_size;
1533
1534 MPI_Irecv(&comm_vertices[(13 - offset) * 24], comm_size * 24,
1535 MPI_DOUBLE, low_neig, 2010, cart_comm, &request[28]);
1536 MPI_Irecv(&comm_vertices[(14 + offset - comm_size) * 24],
1537 comm_size * 24, MPI_DOUBLE, up_neig, 1010, cart_comm,
1538 &request[27]);
1539
1540 MPI_Waitall(54, request, status);
1541 }
1542
1543#ifdef ALL_VTK_OUTPUT
1544#ifdef ALL_VTK_FORCE_SORT
1545 // creating an unstructured grid for local domain and neighbors
1546 auto vtkpoints = vtkSmartPointer<vtkPoints>::New();
1547 auto unstructuredGrid = vtkSmartPointer<vtkForceBasedGrid>::New();
1548 for (int i = 0; i < 27; ++i) {
1549 for (int v = 0; v < 8; ++v) {
1550 vtkpoints->InsertNextPoint(comm_vertices[i * 24 + v * 3],
1551 comm_vertices[i * 24 + v * 3 + 1],
1552 comm_vertices[i * 24 + v * 3 + 2]);
1553 }
1554 }
1555 unstructuredGrid->SetPoints(vtkpoints);
1556
1557 auto work = vtkSmartPointer<vtkFloatArray>::New();
1558 work->SetNumberOfComponents(1);
1559 work->SetNumberOfTuples(27);
1560 work->SetName("Cell");
1561
1562 for (int n = 0; n < 27; ++n) {
1563 // define grid points, i.e. vertices of local domain
1564 vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3,
1565 8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7};
1566
1567 auto faces = vtkSmartPointer<vtkCellArray>::New();
1568 // setup faces of polyhedron
1569 vtkIdType f0[3] = {8 * n + 0, 8 * n + 2, 8 * n + 1};
1570 vtkIdType f1[3] = {8 * n + 1, 8 * n + 2, 8 * n + 3};
1571
1572 vtkIdType f2[3] = {8 * n + 0, 8 * n + 4, 8 * n + 2};
1573 vtkIdType f3[3] = {8 * n + 2, 8 * n + 4, 8 * n + 6};
1574
1575 vtkIdType f4[3] = {8 * n + 2, 8 * n + 6, 8 * n + 3};
1576 vtkIdType f5[3] = {8 * n + 3, 8 * n + 6, 8 * n + 7};
1577
1578 vtkIdType f6[3] = {8 * n + 1, 8 * n + 5, 8 * n + 3};
1579 vtkIdType f7[3] = {8 * n + 3, 8 * n + 5, 8 * n + 7};
1580
1581 vtkIdType f8[3] = {8 * n + 0, 8 * n + 4, 8 * n + 1};
1582 vtkIdType f9[3] = {8 * n + 1, 8 * n + 4, 8 * n + 5};
1583
1584 vtkIdType fa[3] = {8 * n + 4, 8 * n + 6, 8 * n + 5};
1585 vtkIdType fb[3] = {8 * n + 5, 8 * n + 6, 8 * n + 7};
1586
1587 faces->InsertNextCell(3, f0);
1588 faces->InsertNextCell(3, f1);
1589 faces->InsertNextCell(3, f2);
1590 faces->InsertNextCell(3, f3);
1591 faces->InsertNextCell(3, f4);
1592 faces->InsertNextCell(3, f5);
1593 faces->InsertNextCell(3, f6);
1594 faces->InsertNextCell(3, f7);
1595 faces->InsertNextCell(3, f8);
1596 faces->InsertNextCell(3, f9);
1597 faces->InsertNextCell(3, fa);
1598 faces->InsertNextCell(3, fb);
1599
1600 unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12,
1601 faces->GetPointer());
1602 work->SetValue(n, (double)n);
1603 }
1604 unstructuredGrid->GetCellData()->AddArray(work);
1605
1606 /* Debug output: print local cell and neighbors */
1607 /*
1608 if (localRank == 26)
1609 {
1610 auto writer = vtkSmartPointer<vtkXMLForceBasedGridWriter>::New();
1611 writer->SetInputData(unstructuredGrid);
1612 writer->SetFileName("test.vtu");
1613 writer->SetDataModeToAscii();
1614 //writer->SetDataModeToBinary();
1615 writer->Write();
1616 }
1617 */
1618
1619 MPI_Allreduce(&n_points, &check_np, 1, MPI_INT, MPI_SUM, cart_comm);
1620
1621 auto locator = vtkSmartPointer<vtkCellLocator>::New();
1622 locator->SetDataSet(unstructuredGrid);
1623 locator->BuildLocator();
1624#else
1625 auto in_triangle = [=](double nv[27 * 8 * 3], int n, int A, int B,
1626 int C, double x, double y, double z) {
1627 ALL::Point<double> a(3, nv + n * 24 + A * 8);
1628 ALL::Point<double> b(3, nv + n * 24 + B * 8);
1629 ALL::Point<double> c(3, nv + n * 24 + C * 8 + 0);
1630 ALL::Point<double> p(3);
1631 p[0] = x;
1632 p[1] = y;
1633 p[2] = z;
1634 return false;
1635 // return p.same_side_line(a, b, c) &&
1636 // p.same_side_line(b, c, a) &&
1637 // p.same_side_line(c, a, b);
1638 };
1639
1640 // check if (x,y,z) is on the same side as A in
1641 // regard to the plane spanned by (B,C,D)
1642 auto same_side_plane = [=](double nv[27 * 8 * 3], int n, int A, int B,
1643 int C, int D, double x, double y, double z) {
1644 // compute difference vectors required:
1645 // C-B
1646 double CBx = nv[n * 24 + C * 8 + 0] - nv[n * 24 + B * 8 + 0];
1647 double CBy = nv[n * 24 + C * 8 + 1] - nv[n * 24 + B * 8 + 1];
1648 double CBz = nv[n * 24 + C * 8 + 2] - nv[n * 24 + B * 8 + 2];
1649 // D-B
1650 double DBx = nv[n * 24 + D * 8 + 0] - nv[n * 24 + B * 8 + 0];
1651 double DBy = nv[n * 24 + D * 8 + 1] - nv[n * 24 + B * 8 + 1];
1652 double DBz = nv[n * 24 + D * 8 + 2] - nv[n * 24 + B * 8 + 2];
1653 // A-B
1654 double ABx = nv[n * 24 + A * 8 + 0] - nv[n * 24 + B * 8 + 0];
1655 double ABy = nv[n * 24 + A * 8 + 1] - nv[n * 24 + B * 8 + 1];
1656 double ABz = nv[n * 24 + A * 8 + 2] - nv[n * 24 + B * 8 + 2];
1657 // P-B
1658 double PBx = x - nv[n * 24 + B * 8 + 0];
1659 double PBy = y - nv[n * 24 + B * 8 + 1];
1660 double PBz = z - nv[n * 24 + B * 8 + 2];
1661
1662 // compute normal vector of plane
1663 // n = (C-B) x (D-B)
1664 double nx = CBy * DBz - CBz * DBy;
1665 double ny = CBz * DBx - CBx * DBz;
1666 double nz = CBx * DBy - CBy * DBx;
1667
1668 // compute dot product <A-B,n>
1669 double ABn = ABx * nx + ABy * ny + ABz * nz;
1670
1671 // compute dot product <P-B,n>
1672 double PBn = PBx * nx + PBy * ny + PBz * nz;
1673
1674 /*
1675 if (
1676 x <= TRACK_X+0.01 && x >= TRACK_X &&
1677 y <= TRACK_Y+0.01 && y >= TRACK_Y &&
1678 z <= TRACK_Z+0.01 && z >= TRACK_Z
1679 )
1680 {
1681 std::cerr << "Tracked Particle: "
1682 << localRank << " ( "
1683 << x << " , "
1684 << y << " , "
1685 << z << " ) "
1686 << n << ", "
1687 << A << ", "
1688 << B << ", "
1689 << C << ", "
1690 << D << ") "
1691 << ( ( std::abs(PBn) < 1e-12 ) && in_triangle(nv, n, B,
1692 C, D, x, y, z) ) << " "
1693 << ( ( ABn * PBn ) > 0 )
1694 << std::endl;
1695 }
1696 */
1697
1698 if (std::abs(PBn) < 1e-12) {
1699 return in_triangle(nv, n, B, C, D, x, y, z);
1700 } else
1701 // check if sign matches
1702 return ((ABn * PBn) > 0);
1703 };
1704
1705 // check if (x,y,z) is in the tetrahedron described
1706 // by the points within the array t
1707 auto in_tetraeder = [=](double nv[27 * 8 * 3], int n, int t[4],
1708 double x, double y, double z) {
1709 return same_side_plane(nv, n, t[0], t[1], t[2], t[3], x, y, z) &&
1710 same_side_plane(nv, n, t[3], t[0], t[1], t[2], x, y, z) &&
1711 same_side_plane(nv, n, t[2], t[3], t[0], t[1], x, y, z) &&
1712 same_side_plane(nv, n, t[1], t[2], t[3], t[0], x, y, z);
1713 };
1714
1715 auto containing_neighbor = [=](double nv[27 * 8 * 3], bool e[27],
1716 double x, double y, double z) {
1717 // definition of tetrahedra
1718 /*
1719 int tetrahedra[8][4] =
1720 {
1721 { 0, 1, 2, 4 },
1722 { 1, 0, 3, 5 },
1723 { 2, 3, 0, 6 },
1724 { 3, 2, 1, 7 },
1725 { 4, 5, 6, 0 },
1726 { 5, 4, 7, 1 },
1727 { 6, 7, 4, 2 },
1728 { 7, 6, 5, 3 }
1729 };
1730 */
1731 int tetrahedra[5][4] = {{1, 2, 4, 7},
1732 {0, 1, 2, 4},
1733 {1, 2, 3, 7},
1734 {1, 4, 5, 7},
1735 {2, 4, 6, 7}};
1736
1737 // check if the particle is in the local
1738 // tetrahedron, if so keep it
1739 // (needed to decide what about particles
1740 // on surfaces)
1741 for (int t = 0; t < 5; ++t) {
1742 if (in_tetraeder(nv, 13, tetrahedra[t], x, y, z))
1743 return 13;
1744 }
1745
1746 // loop over neighbors
1747 for (int n = 0; n < 27; ++n) {
1748 if (!e[n] || n == 13)
1749 continue;
1750 // loop over tetrahedra
1751 for (int t = 0; t < 5; ++t) {
1752 if (in_tetraeder(nv, n, tetrahedra[t], x, y, z))
1753 return n;
1754 }
1755 }
1756
1757 return -1;
1758 };
1759
1760#endif
1761
1762 int n_send[27];
1763 int n_recv[27];
1764 for (int i = 0; i < 27; ++i) {
1765 n_send[i] = 0;
1766 n_recv[i] = 0;
1767 }
1768
1769 int check_np = 0;
1770 int check_np_new = 0;
1771
1772 for (auto P = points.begin(); P != points.end(); ++P) {
1773 ALL::Point<double> p = *P;
1774 double pcoords[3];
1775 double pccoords[3];
1776 int subId;
1777 double weights[4];
1778 for (int d = 0; d < 3; ++d)
1779 pcoords[d] = p[d];
1780#ifdef ALL_VTK_FORCE_SORT
1781 vtkIdType cellId = locator->FindCell(pcoords);
1782#else
1783 int cellId = containing_neighbor(comm_vertices, exists, pcoords[0],
1784 pcoords[1], pcoords[2]);
1785#endif
1786 /*
1787 vtkIdType cellId =
1788 unstructuredGrid->FindCell(pcoords, NULL, 0, 1e-6, subId, pccoords,
1789 weights);
1790 */
1791
1792 // if the particle is in a valid neighboring cell
1793 if (cellId >= 0 && cellId != 13 && cellId <= 26) {
1794 if (n_send[cellId] == max_particles) {
1796 __FILE__, __func__, __LINE__,
1797 "Trying to send more particles than \
1798 buffer size allows!");
1799 }
1800 for (int d = 0; d < 3; ++d) {
1801 transfer[cellId * (sys_dim + 1) * max_particles +
1802 n_send[cellId] * (sys_dim + 1) + d] = p[d];
1803 }
1804 transfer[cellId * (sys_dim + 1) * max_particles +
1805 n_send[cellId] * (sys_dim + 1) + sys_dim] = p.getWeight();
1806 n_send[cellId]++;
1807 points.erase(P);
1808 --P;
1809 --n_points;
1810 }
1811 }
1812
1813 for (int n = 0; n < 27; ++n) {
1814 MPI_Isend(&n_send[n], 1, MPI_INT, neighbors.at(n), 1020, cart_comm,
1815 &request[n]);
1816 MPI_Irecv(&n_recv[n], 1, MPI_INT, neighbors.at(n), 1020, cart_comm,
1817 &request[27 + n]);
1818 }
1819 MPI_Waitall(54, request, status);
1820
1821 for (int n = 0; n < 27; ++n) {
1822 MPI_Isend(&transfer[n * (sys_dim + 1) * max_particles],
1823 (sys_dim + 1) * n_send[n], MPI_DOUBLE, neighbors.at(n),
1824 1030, cart_comm, &request[n]);
1825 MPI_Irecv(&recv[n * (sys_dim + 1) * max_particles],
1826 (sys_dim + 1) * n_recv[n], MPI_DOUBLE, neighbors.at(n),
1827 1030, cart_comm, &request[27 + n]);
1828 }
1829 MPI_Waitall(54, request, status);
1830
1831 for (int n = 0; n < 27; ++n) {
1832 if (exists[n]) {
1833 for (int i = 0; i < n_recv[n]; ++i) {
1835 sys_dim,
1836 &recv[n * (sys_dim + 1) * max_particles + i * (sys_dim + 1)],
1837 recv[n * (sys_dim + 1) * max_particles + i * (sys_dim + 1) +
1838 sys_dim]);
1839 points.push_back(p);
1840 ++n_points;
1841 }
1842 }
1843 }
1844
1845#else
1846 if (localRank == 0)
1847 std::cout << "Currently no FORCEBASED test without VTK!"
1848 << std::endl;
1849 MPI_Abort(MPI_COMM_WORLD, -1);
1850#endif
1851 } break;
1852#ifdef ALL_VORONOI_ACTIVE
1853 case ALL::LB_t::VORONOI: {
1854 // get neighbor information
1855 std::vector<double> neighbor_vertices = lb_obj.getNeighborVertices();
1856 std::vector<int> neighbors = lb_obj.getNeighbors();
1857 int nNeighbors = neighbors.size();
1858
1859 // compute voronoi cells
1860
1861 voro::container con_copy(0.0, box_size[0], 0.0, box_size[1], 0.0,
1862 box_size[2], ALL_VORONOI_SUBBLOCKS,
1864 false, false, false, 10);
1865 // add neighbor points first to maintain
1866 // mapping to neighbors array
1867 for (auto i = 0; i < nNeighbors; ++i) {
1868 con_copy.put(i, neighbor_vertices.at(3 * i),
1869 neighbor_vertices.at(3 * i + 1),
1870 neighbor_vertices.at(3 * i + 2));
1871 }
1872
1873 // add local vertex to complete map of cells
1874 con_copy.put(nNeighbors, vertices.at(0)[0], vertices.at(0)[1],
1875 vertices.at(0)[2]);
1876
1877 /*
1878 std::ostringstream ss_local_gp_pov2;
1879 ss_local_gp_pov2 << "voronoi/copy_points_"
1880 << std::setw(7) << std::setfill('0')
1881 << i_loop << ".pov";
1882 std::ostringstream ss_local_vc_pov2;
1883 ss_local_vc_pov2 << "voronoi/copy_cells_"
1884 << std::setw(7) << std::setfill('0')
1885 << i_loop << ".pov";
1886
1887 if (localRank == 0)
1888 {
1889 con_copy.draw_particles_pov(ss_local_gp_pov2.str().c_str());
1890 con_copy.draw_cells_pov(ss_local_vc_pov2.str().c_str());
1891 }
1892 */
1893 // collect particles that left the domain
1894 // and determine to which domain they will be transfered
1895
1896 std::vector<std::vector<double>> remote_particles(nNeighbors);
1897
1898 for (auto P = points.begin(); P != points.end(); ++P) {
1899 ALL::Point<double> p = *P;
1900 double x, y, z;
1901 int pos;
1902
1903 con_copy.find_voronoi_cell(p[0], p[1], p[2], x, y, z, pos);
1904 if (pos < nNeighbors) {
1905 remote_particles.at(pos).push_back(p[0]);
1906 remote_particles.at(pos).push_back(p[1]);
1907 remote_particles.at(pos).push_back(p[2]);
1908 remote_particles.at(pos).push_back(p.getWeight());
1909 p[0] = -1000.0;
1910 }
1911 }
1912
1913 for (auto P = points.begin(); P != points.end(); ++P) {
1914 ALL::Point<double> p = *P;
1915 if (p[0] < -500.0) {
1916 points.erase(P);
1917 n_points--;
1918 P--;
1919 }
1920 }
1921
1922 // exchange number of particles to be exchanged
1923
1924 int remote_s[nNeighbors];
1925 int remote_r[nNeighbors];
1926 MPI_Request request_s[nNeighbors];
1927 MPI_Request request_r[nNeighbors];
1928 MPI_Status status_s[nNeighbors];
1929 MPI_Status status_r[nNeighbors];
1930
1931 for (auto i = 0; i < nNeighbors; ++i) {
1932 // send number of values to be send
1933 remote_s[i] = remote_particles.at(i).size();
1934 MPI_Isend(&remote_s[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
1935 &request_s[i]);
1936 MPI_Irecv(&remote_r[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
1937 &request_r[i]);
1938 }
1939
1940 MPI_Waitall(nNeighbors, request_s, status_s);
1941 MPI_Waitall(nNeighbors, request_r, status_r);
1942
1943 /*
1944 for (int i = 0; i < 25; ++i)
1945 {
1946 if (localRank == i)
1947 {
1948 std::cout << localRank << ": ";
1949 for (int j = 0; j < nNeighbors; ++j)
1950 std::cout << " " << neighbors.at(j) << " ";
1951 std::cout << "| " << nNeighbors << std::endl;
1952 }
1953 MPI_Barrier(cart_comm);
1954 }
1955
1956 for (int i = 0; i < 25; ++i)
1957 {
1958 if (localRank == i)
1959 {
1960 std::cout << localRank << ": ";
1961 for (int j = 0; j < nNeighbors; ++j)
1962 std::cout << " " << remote_s[j]
1963 << " / " << remote_r[j] << " ";
1964 std::cout << n_points << std::endl;
1965 }
1966 MPI_Barrier(cart_comm);
1967 }
1968 */
1969
1970 std::vector<std::vector<double>> received_particles(nNeighbors);
1971 for (auto i = 0; i < nNeighbors; ++i) {
1972 if (remote_r[i] > 0) {
1973 received_particles.at(i).resize(remote_r[i]);
1974 MPI_Irecv(received_particles.at(i).data(), remote_r[i], MPI_DOUBLE,
1975 neighbors.at(i), 4000, cart_comm, &request_r[i]);
1976 } else
1977 request_r[i] = MPI_REQUEST_NULL;
1978 if (remote_s[i] > 0)
1979 MPI_Isend(remote_particles.at(i).data(), remote_s[i], MPI_DOUBLE,
1980 neighbors.at(i), 4000, cart_comm, &request_s[i]);
1981 else
1982 request_s[i] = MPI_REQUEST_NULL;
1983 }
1984
1985 MPI_Waitall(nNeighbors, request_s, status_s);
1986 MPI_Waitall(nNeighbors, request_r, status_r);
1987
1988 for (auto i = 0; i < nNeighbors; ++i) {
1989 for (auto j = 0; j < remote_r[i]; j += 4) {
1990 ALL::Point<double> tmp_point(3, received_particles.at(i).data() + j,
1991 received_particles.at(i).at(j + 3));
1992 points.push_back(tmp_point);
1993 n_points++;
1994 }
1995 }
1996
1997 // exchange particles
1998 } break;
1999#endif
2000 case ALL::LB_t::HISTOGRAM: {
2001 // determine current dimension
2002 int curr_dim = 2 - (i_loop % 3);
2003
2004 MPI_Comm comm_col;
2005 // create temporary communicator to exchange borders
2006 switch (curr_dim) {
2007 case 0: {
2008 // x-plane
2009 MPI_Comm_split(cart_comm,
2010 local_coords[1] + local_coords[2] * global_dim[1],
2011 local_coords[0], &comm_col);
2012 break;
2013 }
2014 case 1: {
2015 // y-plane
2016 MPI_Comm_split(cart_comm,
2017 local_coords[0] + local_coords[2] * global_dim[0],
2018 local_coords[1], &comm_col);
2019 break;
2020 }
2021 case 2: {
2022 // z-plane
2023 MPI_Comm_split(cart_comm,
2024 local_coords[0] + local_coords[1] * global_dim[0],
2025 local_coords[2], &comm_col);
2026 break;
2027 }
2028 }
2029
2030 // vector to collect the borders into
2031
2032 std::vector<double> borders(2 * global_dim[curr_dim]);
2033 std::vector<double> old_borders(2 * global_dim[curr_dim]);
2034 std::vector<double> local_borders(2);
2035 std::vector<double> old_border(2);
2036
2037 local_borders.at(0) = vertices.at(0)[curr_dim];
2038 local_borders.at(1) = vertices.at(1)[curr_dim];
2039 old_border.at(0) = old_vertices.at(0)[curr_dim];
2040 old_border.at(1) = old_vertices.at(1)[curr_dim];
2041
2042 int size, rank;
2043 MPI_Comm_rank(comm_col, &rank);
2044 MPI_Comm_size(comm_col, &size);
2045
2046 // collect borders
2047 MPI_Allgather(local_borders.data(), 2, MPI_DOUBLE, borders.data(), 2,
2048 MPI_DOUBLE, comm_col);
2049
2050 // collect borders
2051 MPI_Allgather(old_border.data(), 2, MPI_DOUBLE, old_borders.data(), 2,
2052 MPI_DOUBLE, comm_col);
2053
2054 // compare old domains with new domains
2055 std::vector<int> send_neig;
2056 std::vector<int> recv_neig;
2057
2058 for (int n = 0; n < global_dim[curr_dim]; ++n) {
2059 /*
2060 if (localRank == 13)
2061 std::cout << "DEBUG: " << n << " " <<
2062 old_border.at(0) << " " <<
2063 old_border.at(1) << " / " <<
2064 borders.at(2*n) << " " <<
2065 borders.at(2*n+1) << " / " <<
2066 (borders.at(2*n) <= old_border.at(0)) << " " <<
2067 (borders.at(2*n+1) > old_border.at(0)) << " " <<
2068 (borders.at(2*n) <= old_border.at(1)) << " " <<
2069 (borders.at(2*n+1) >= old_border.at(1)) << std::endl;
2070 */
2071 if ((borders.at(2 * n) <= old_border.at(0) &&
2072 borders.at(2 * n + 1) >= old_border.at(0)) ||
2073 (borders.at(2 * n) <= old_border.at(1) &&
2074 borders.at(2 * n + 1) >= old_border.at(1)) ||
2075 (borders.at(2 * n) > old_border.at(0) &&
2076 borders.at(2 * n + 1) < old_border.at(1)))
2077 send_neig.push_back(n);
2078 if ((old_borders.at(2 * n) <= local_borders.at(0) &&
2079 old_borders.at(2 * n + 1) >= local_borders.at(0)) ||
2080 (old_borders.at(2 * n) <= local_borders.at(1) &&
2081 old_borders.at(2 * n + 1) >= local_borders.at(1)) ||
2082 (old_borders.at(2 * n) > local_borders.at(0) &&
2083 old_borders.at(2 * n + 1) < local_borders.at(1)))
2084 recv_neig.push_back(n);
2085 }
2086
2087 // vectors to transfer and received points
2088 std::vector<std::vector<double>> send_vec(send_neig.size());
2089
2090 // all points are sorted into the correct vectors
2091 for (auto p : points) {
2092 for (int n = 0; n < (int)send_neig.size(); ++n) {
2093 int neig_id = send_neig.at(n);
2094 /*
2095 if (localRank == 1)
2096 {
2097 std::cout << p[curr_dim] << " "
2098 << borders.at(2*neig_id) << " "
2099 << borders.at(2*neig_id+1) << std::endl;
2100 }
2101 */
2102 if ((borders.at(2 * neig_id) <= p[curr_dim] &&
2103 borders.at(2 * neig_id + 1) > p[curr_dim])) {
2104 send_vec.at(n).push_back(p[0]);
2105 send_vec.at(n).push_back(p[1]);
2106 send_vec.at(n).push_back(p[2]);
2107 send_vec.at(n).push_back(p.getWeight());
2108 send_vec.at(n).push_back((double)(p.get_id()));
2109 }
2110 }
2111 }
2112 // clear old point vector
2113 points.clear();
2114 n_points = 0;
2115
2116 std::vector<int> n_send(send_neig.size());
2117 for (int n = 0; n < (int)send_neig.size(); ++n)
2118 n_send.at(n) = send_vec.at(n).size();
2119
2120 // communicate number of particles to be send to neighbors
2121 std::vector<int> n_recv(recv_neig.size());
2122
2123 std::vector<MPI_Request> sreq(send_neig.size());
2124 std::vector<MPI_Request> rreq(recv_neig.size());
2125 std::vector<MPI_Status> ssta(send_neig.size());
2126 std::vector<MPI_Status> rsta(recv_neig.size());
2127
2128 for (int n = 0; n < (int)send_neig.size(); ++n) {
2129 MPI_Isend(n_send.data() + n, 1, MPI_INT, send_neig.at(n), 2000,
2130 comm_col, sreq.data() + n);
2131 }
2132 for (int n = 0; n < (int)recv_neig.size(); ++n) {
2133 MPI_Irecv(n_recv.data() + n, 1, MPI_INT, recv_neig.at(n), 2000,
2134 comm_col, rreq.data() + n);
2135 }
2136 MPI_Waitall(send_neig.size(), sreq.data(), ssta.data());
2137
2138 int recvs = 0;
2139
2140 /*
2141 std::cout << localRank << " " << send_neig.at(0) << " "
2142 << ((send_neig.size() >=
2143 2)?send_neig.at(1):-1) << " "
2144 << ((send_neig.size() >=
2145 3)?send_neig.at(2):-1) << " "
2146 << send_neig.size() << " | "
2147 << recv_neig.at(0) << " "
2148 << ((recv_neig.size() >=
2149 2)?recv_neig.at(1):-1) << " "
2150 << ((recv_neig.size() >=
2151 3)?recv_neig.at(2):-1) << " "
2152 << recv_neig.size() << " | "
2153 << old_border.at(0) << " " <<
2154 old_border.at(1) << " | "
2155 << local_borders.at(0) << " " <<
2156 local_borders.at(1)
2157 << std::endl;
2158 */
2159
2160 std::vector<std::vector<double>> recv_vec(recv_neig.size());
2161 while (recvs < (int)recv_neig.size()) {
2162 int idx;
2163 MPI_Waitany(recv_neig.size(), rreq.data(), &idx, rsta.data());
2164 recv_vec.at(idx).resize(n_recv.at(idx));
2165 recvs++;
2166 }
2167
2168 // transfer points from old domains to new domains
2169 for (int n = 0; n < (int)send_neig.size(); ++n) {
2170 MPI_Isend(send_vec.at(n).data(), send_vec.at(n).size(), MPI_DOUBLE,
2171 send_neig.at(n), 3000, comm_col, sreq.data() + n);
2172 }
2173 for (int n = 0; n < (int)recv_neig.size(); ++n) {
2174 MPI_Irecv(recv_vec.at(n).data(), recv_vec.at(n).size(), MPI_DOUBLE,
2175 recv_neig.at(n), 3000, comm_col, rreq.data() + n);
2176 }
2177 MPI_Waitall(send_neig.size(), sreq.data(), ssta.data());
2178
2179 recvs = 0;
2180 while (recvs < (int)recv_neig.size()) {
2181 int idx;
2182 MPI_Waitany(recv_neig.size(), rreq.data(), &idx, rsta.data());
2183 for (int p = 0; p < (int)recv_vec.at(idx).size(); p += 5) {
2184 ALL::Point<double> tmp(3, recv_vec.at(idx).data() + p,
2185 recv_vec.at(idx).at(p + 3),
2186 (long)(recv_vec.at(idx).at(p + 4)));
2187 points.push_back(tmp);
2188 }
2189 recvs++;
2190 }
2191 n_points = points.size();
2192
2193 MPI_Comm_free(&comm_col);
2194 break;
2195 }
2196 default:
2197 break;
2198 }
2199
2200 if (i_loop <= 100 || i_loop % OUTPUT_INTV == 0) {
2201#ifdef ALL_VORONOI_ACTIVE
2202 if (chosen_method != ALL::LB_t::VORONOI) {
2203#endif
2204#ifdef ALL_VTK_OUTPUT
2205 // if (localRank == 0)
2206 // std::cout << "creating vtk outlines output" << std::endl;
2207 if (chosen_method != ALL::LB_t::FORCEBASED)
2208 lb_obj.printVTKoutlines(output_step);
2209 // if (localRank == 0)
2210 // std::cout << "creating vtk vertices output" << std::endl;
2211 if (chosen_method == ALL::LB_t::FORCEBASED)
2212 lb_obj.printVTKvertices(output_step);
2213 // if (localRank == 0)
2214 // std::cout << "creating points output" << std::endl;
2215 // if (i_loop == 0)
2216 print_points(points, i_loop / OUTPUT_INTV, chosen_method, cart_comm);
2217#endif
2218 output_step++;
2219#ifdef ALL_VORONOI_ACTIVE
2220 } else {
2221#ifdef ALL_VTK_OUTPUT
2222 print_points(points, i_loop / OUTPUT_INTV, chosen_method, cart_comm);
2223#endif
2224 }
2225#endif
2226 }
2227
2228 // calculate quality parameters
2229 if (!weighted_points) {
2230 n_local = (double)n_points;
2231 } else {
2232 n_local = 0.0;
2233 for (auto p = points.begin(); p != points.end(); ++p)
2234 n_local += p->getWeight();
2235 }
2236
2237 double n_total_points;
2238 MPI_Allreduce(&n_local, &n_total_points, 1, MPI_DOUBLE, MPI_SUM,
2239 cart_comm);
2240
2241 MPI_Allreduce(&n_local, &n_total, 1, MPI_DOUBLE, MPI_SUM, cart_comm);
2242 avg_work = n_total / (double)n_ranks;
2243 MPI_Allreduce(&n_local, &n_min, 1, MPI_DOUBLE, MPI_MIN, cart_comm);
2244 MPI_Allreduce(&n_local, &n_max, 1, MPI_DOUBLE, MPI_MAX, cart_comm);
2245 d_min = n_min / avg_work;
2246 d_max = n_max / avg_work;
2247 d_ratio = (d_max - d_min) / (d_max + d_min);
2248
2249 double loc_diff, tot_diff;
2250 loc_diff = (n_local - avg_work) * (n_local - avg_work);
2251 MPI_Reduce(&loc_diff, &tot_diff, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm);
2252
2253 d_min = n_min / avg_work;
2254 d_max = n_max / avg_work;
2255 d_ratio = (d_max - d_min) / (d_max + d_min);
2256
2257 if (localRank == 0) {
2258 tot_diff = sqrt(tot_diff);
2259 std::ofstream of;
2260 if (!weighted_points)
2261 of.open("stddev.dat", std::ios::out | std::ios::app);
2262 else
2263 of.open("stddev_w.dat", std::ios::out | std::ios::app);
2264 of << i_loop + 1 << " " << tot_diff << std::endl;
2265 of << std::flush;
2266 of.close();
2267
2268 if (!weighted_points)
2269 of.open("minmax.dat", std::ios::out | std::ios::app);
2270 else
2271 of.open("minmax_w.dat", std::ios::out | std::ios::app);
2272 of << i_loop + 1 << " " << d_min << " " << d_max << " " << d_ratio
2273 << " " << max_neighbors << std::endl;
2274 of << std::flush;
2275 of.close();
2276 if (i_loop % OUTPUT_INTV == 0) {
2277 std::cout << "d_min: " << d_min << std::endl;
2278 std::cout << "d_max: " << d_max << std::endl;
2279 std::cout << "d_ratio: " << d_ratio << std::endl;
2280 std::cout << std::flush;
2281 }
2282 if (d_ratio < min_ratio) {
2283 min_ratio = d_ratio;
2284 min_step = i_loop;
2285 }
2286 }
2287
2288 // output of borders / contents
2289 for (int i = 0; i < n_ranks; ++i) {
2290 if (localRank == i) {
2291 std::ofstream of;
2292 if (!weighted_points)
2293 of.open("domain_data.dat", std::ios::out | std::ios::app);
2294 else
2295 of.open("domain_data_w.dat", std::ios::out | std::ios::app);
2296 of << (i_loop + 1) << " " << localRank << " ";
2297
2298 if (chosen_method == ALL::LB_t::HISTOGRAM) {
2299 int *nNeighbors;
2300 std::vector<int> neighbors = lb_obj.getNeighbors();
2301 nNeighbors = neighbors.data();
2302 int n_neig = 0;
2303 for (int j = 0; j < 3; ++j) {
2304 n_neig += nNeighbors[j];
2305 }
2306 of << n_neig << " ";
2307 }
2308 /*
2309 for (int j = 0; j < sys_dim; ++j)
2310 {
2311 of << " " << local_coords[j]
2312 << " " << lp[j] << " " << up[j] << " ";
2313 }
2314 */
2315
2316 of << n_local << " ";
2317
2318 of << std::endl;
2319 if (i == n_ranks - 1)
2320 of << std::endl;
2321 of.close();
2322 MPI_Barrier(cart_comm);
2323 } else
2324 MPI_Barrier(cart_comm);
2325 }
2326
2327 if (std::abs(n_total_points - total_points) > 1e-6) {
2328 std::cout << std::setprecision(14) << n_total_points
2329 << " != " << total_points << std::endl;
2330 return (-1);
2331 }
2332
2333 if (i_loop == max_loop - 1) {
2334 // output of the last configuration after lb-step!
2335 lb_obj.setVertices(vertices);
2336 double vis_work = 0.0;
2337 for (int p = 0; p < n_points; ++p) {
2338 vis_work += points.at(p).getWeight();
2339 }
2340 lb_obj.setWork(vis_work);
2341#ifdef ALL_VORONOI_ACTIVE
2342 if (chosen_method != ALL::LB_t::VORONOI) {
2343#endif
2344#ifdef ALL_VTK_OUTPUT
2345 if (chosen_method != ALL::LB_t::FORCEBASED) {
2346 lb_obj.printVTKoutlines(output_step);
2347 double width[3];
2348 double volume = 1.0;
2349
2350 for (int i = 0; i < 3; ++i) {
2351 width[i] = vertices.at(1)[i] - vertices.at(0)[i];
2352 volume *= width[i];
2353 }
2354
2355 double surface = 2.0 * width[0] * width[1] +
2356 2.0 * width[1] * width[2] +
2357 2.0 * width[0] * width[2];
2358
2359 for (int r = 0; r < n_ranks; ++r) {
2360 if (r == localRank) {
2361 std::ofstream of;
2362 of.open("domain_data.dat", std::ios::out | std::ios::app);
2363 of << localRank << " " << width[0] << " " << width[1] << " "
2364 << width[2] << " " << vis_work << " " << volume << " "
2365 << surface << " " << (surface / volume) << " " << std::endl;
2366 of.close();
2367 }
2368 MPI_Barrier(cart_comm);
2369 }
2370 }
2371 if (chosen_method == ALL::LB_t::FORCEBASED)
2372 lb_obj.printVTKvertices(output_step);
2373#endif
2374 output_step++;
2375#ifdef ALL_VORONOI_ACTIVE
2376 } else {
2377#ifdef ALL_VTK_OUTPUT
2378 print_points(points, i_loop / OUTPUT_INTV, chosen_method, cart_comm);
2379#endif
2380 }
2381#endif
2382 }
2383 }
2384
2385 // create binary output (e.g. for HimeLB)
2386 // format:
2387 // n_ranks integers (number of particles per domain)
2388 // n_ranks integers (offset in file)
2389 // <n_particles[0]> * (long + 3*double) double values (particle positions)
2390
2391 // open file
2392 int err;
2393 MPI_File outfile;
2394 err =
2395 MPI_File_open(MPI_COMM_WORLD, "particles_output.bin",
2396 MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &outfile);
2397
2398 // write out the number of particles on each process
2399 MPI_File_write_at(outfile, (MPI_Offset)(localRank * sizeof(int)), &n_points,
2400 1, MPI_INT, MPI_STATUS_IGNORE);
2401
2402 // compute the offset for each process
2403 long n_p = (long)n_points;
2404 long offset;
2405
2406 MPI_Scan(&n_p, &offset, 1, MPI_LONG, MPI_SUM, cart_comm);
2407 offset -= n_p;
2408 // size of data for a single block
2409 long block_size = sizeof(long); // + 3 * sizeof(double);
2410
2411 offset *= block_size;
2412 offset += n_ranks * (sizeof(int) + sizeof(long));
2413
2414 MPI_File_write_at(
2415 outfile, (MPI_Offset)(n_ranks * sizeof(int) + localRank * sizeof(long)),
2416 &offset, 1, MPI_LONG, MPI_STATUS_IGNORE);
2417
2418 for (int n = 0; n < n_points; ++n) {
2419 long id = points.at(n).get_id();
2420 double loc[3];
2421 loc[0] = points.at(n)[0];
2422 loc[1] = points.at(n)[1];
2423 loc[2] = points.at(n)[2];
2424 MPI_File_write_at(outfile, (MPI_Offset)((offset + n * block_size)), &id,
2425 1, MPI_LONG, MPI_STATUS_IGNORE);
2426 /*
2427 MPI_File_write_at(outfile,
2428 (MPI_Offset)((offset+n*block_size)+sizeof(long)),
2429 loc,
2430 3,
2431 MPI_DOUBLE,
2432 MPI_STATUS_IGNORE);
2433 */
2434 }
2435
2436 MPI_File_close(&outfile);
2437
2438 /*
2439 // output of borders / contents
2440 if (n_ranks < 216)
2441 {
2442 for (int i = 0; i < n_ranks; ++i)
2443 {
2444 if (localRank == i)
2445 {
2446 std::ofstream of;
2447 if (!weighted_points)
2448 of.open("domain_data.dat", std::ios::out | std::ios::app);
2449 else
2450 of.open("domain_data_w.dat", std::ios::out | std::ios::app);
2451 of << 0 << " " << localRank << " ";
2452
2453 for (int j = 0; j < sys_dim; ++j)
2454 {
2455 of << " " << local_coords[j] << " "
2456 << lp[j] << " "
2457 << up[j] << " ";
2458 }
2459
2460 of << " " << n_local << " ";
2461
2462 of << std::endl;
2463 if (i == n_ranks - 1) of << std::endl;
2464 of.close();
2465 MPI_Barrier(cart_comm);
2466 }
2467 else
2468 MPI_Barrier(cart_comm);
2469 }
2470 }
2471 */
2472
2473 delete transfer;
2474
2475 if (localRank == 0) {
2476 std::cout << std::endl;
2477 std::cout << "min_ratio: " << min_ratio << std::endl;
2478 std::cout << "min_step: " << min_step << std::endl;
2479 std::cout << std::endl;
2480 }
2481 MPI_Barrier(cart_comm);
2482 MPI_Finalize();
2483 return EXIT_SUCCESS;
2484 } catch (ALL::CustomException &e) {
2485 std::cout << e.what() << std::endl;
2486 return EXIT_FAILURE;
2487 }
2488}
2489// vim: sw=2 et ts=2
int main(int argc, char **argv)
#define ALL_VORONOI_SUBBLOCKS
void print_points(std::vector< ALL::Point< double > > plist, int step, ALL::LB_t method, MPI_Comm comm)
Definition ALL_test.cpp:211
#define ALL_HISTOGRAM_DEFAULT_WIDTH
Definition ALL_test.cpp:65
void generate_points(std::vector< ALL::Point< double > > &points, std::vector< double > l, std::vector< double > u, int *lcoords, int *gcoords, int dimension, int &n, int rank)
Definition ALL_test.cpp:71
#define N_GENERATE
Definition ALL_test.cpp:60
#define BOX_SIZE
Definition ALL_test.cpp:58
#define ALL_VORONOI_PREP_STEPS
Definition ALL_test.cpp:68
#define OUTPUT_INTV
Definition ALL_test.cpp:63
#define SEED
Definition ALL_test.cpp:61
#define N_LOOP
Definition ALL_test.cpp:62
void read_points(std::vector< ALL::Point< double > > &points, std::vector< double > l, std::vector< double > u, int &n_points, char *filename, int dimension, int rank, MPI_Comm comm)
Definition ALL_test.cpp:125
#define MAX_NEIG
Definition ALL_test.cpp:64
std::vector< T > & getNeighborVertices()
Definition ALL.hpp:448
void setSysSize(const std::vector< T > &sysSize)
Definition ALL.hpp:474
void setProcGridParams(const std::vector< int > &loc, const std::vector< int > &size)
Definition ALL.hpp:494
void setMethodData(const void *data)
Definition ALL.hpp:486
void setCommunicator(const MPI_Comm comm)
Definition ALL.hpp:211
std::vector< int > & getNeighbors()
Definition ALL.hpp:442
void printVTKvertices(const int step)
Definition ALL.hpp:678
std::vector< Point< T > > & getVertices()
Definition ALL.hpp:423
W getEstimatedEfficiency()
Definition ALL.hpp:460
W getEfficiency()
Definition ALL.hpp:454
void setMinDomainSize(const std::vector< T > &minSize)
Definition ALL.hpp:510
void setWork(const W work)
Definition ALL.hpp:281
void setup()
Definition ALL.hpp:292
std::vector< Point< T > > & balance()
Definition ALL.hpp:299
void setVertices(const std::vector< Point< T > > &inp)
Definition ALL.hpp:195
void printVTKoutlines(const int step)
Definition ALL.hpp:519
T getWeight() const
LB_t
enum type to describe the different balancing methods
Definition ALL.hpp:80
@ FORCEBASED
unstructured-mesh load balancing
Definition ALL.hpp:86
@ STAGGERED
staggered grid load balancing
Definition ALL.hpp:82
@ VORONOI
voronoi cell based load balancing
Definition ALL.hpp:89
@ HISTOGRAM
histogram based load balancing
Definition ALL.hpp:92
@ TENSOR_MAX
tensor based load balancing using maximum values
Definition ALL.hpp:94
@ TENSOR
tensor based load balancing
Definition ALL.hpp:84
Customized exceptions for ALL, modified for each specific exception type.
virtual const char * what() const