286int main(
int argc,
char **argv) {
288 MPI_Init(&argc, &argv);
290 const int sys_dim = 3;
293 std::vector<double> sys_size(6);
301 std::vector<double> minSize(3, 2.0);
304 bool weighted_points =
false;
305 char *filename = NULL;
308 int global_dim[sys_dim];
309 for (
int d = 0; d < sys_dim; ++d)
315 "Wrong number of arguments: ALL_test \
316 [ <int:method> [ <int:max_loop> \
317 [ <double:gamma> [ <int:weighted> \
318 [ <char*:input_file> \
319 [ <double:lx> <double:ly> <double:lz> \
320 [ <int:dx> <int:dy> <int:dz> ]]]]]]]");
324 chosen_method = (
ALL::LB_t)atoi(argv[1]);
325 }
else if (argc < 4) {
326 chosen_method = (
ALL::LB_t)atoi(argv[1]);
327 max_loop = atoi(argv[2]);
328 }
else if (argc < 5) {
329 chosen_method = (
ALL::LB_t)atoi(argv[1]);
330 max_loop = atoi(argv[2]);
331 gamma = atof(argv[3]);
332 }
else if (argc < 6) {
333 chosen_method = (
ALL::LB_t)atoi(argv[1]);
334 max_loop = atoi(argv[2]);
335 gamma = atof(argv[3]);
336 weighted_points = atoi(argv[4]) == 1;
337 }
else if (argc < 7) {
338 chosen_method = (
ALL::LB_t)atoi(argv[1]);
339 max_loop = atoi(argv[2]);
340 gamma = atof(argv[3]);
341 weighted_points = atoi(argv[4]) == 1;
343 }
else if (argc == 9) {
344 chosen_method = (
ALL::LB_t)atoi(argv[1]);
345 max_loop = atoi(argv[2]);
346 gamma = atof(argv[3]);
347 weighted_points = atoi(argv[4]) == 1;
349 box_size[0] = atof(argv[6]);
350 box_size[1] = atof(argv[7]);
351 box_size[2] = atof(argv[8]);
352 sys_size.at(1) = atof(argv[6]);
353 sys_size.at(3) = atof(argv[7]);
354 sys_size.at(5) = atof(argv[8]);
355 }
else if (argc == 12) {
356 chosen_method = (
ALL::LB_t)atoi(argv[1]);
357 max_loop = atoi(argv[2]);
358 gamma = atof(argv[3]);
359 weighted_points = atoi(argv[4]) == 1;
361 box_size[0] = atof(argv[6]);
362 box_size[1] = atof(argv[7]);
363 box_size[2] = atof(argv[8]);
364 global_dim[0] = atoi(argv[9]);
365 global_dim[1] = atoi(argv[10]);
366 global_dim[2] = atoi(argv[11]);
367 sys_size.at(1) = atof(argv[6]);
368 sys_size.at(3) = atof(argv[7]);
369 sys_size.at(5) = atof(argv[8]);
373 std::vector<double> dummy(sys_dim);
374 std::vector<ALL::Point<double>> points;
375 int max_neighbors, loc_neighbors;
381 int local_coords[sys_dim];
382 int periodicity[sys_dim];
384 std::vector<double> ds(sys_dim);
386 std::vector<double> l(sys_dim);
387 std::vector<double> u(sys_dim);
389 double min_ratio = 1.01;
392 for (
int i = 0; i < sys_dim; ++i) {
397 MPI_Comm_size(MPI_COMM_WORLD, &n_ranks);
399 if (global_dim[0] == 0) {
401 MPI_Dims_create(n_ranks, sys_dim, global_dim);
405 MPI_Cart_create(MPI_COMM_WORLD, sys_dim, global_dim, periodicity, 1,
409 MPI_Cart_get(cart_comm, sys_dim, global_dim, periodicity, local_coords);
412 MPI_Cart_rank(cart_comm, local_coords, &localRank);
415 MPI_Cart_coords(cart_comm, localRank, sys_dim, coords);
418 if (localRank == 0) {
419 switch (chosen_method) {
422 std::cout <<
"chosen method: TENSOR" << std::endl;
425 std::cout <<
"chosen method: STAGGERED" << std::endl;
428 std::cout <<
"chosen method: FORCEBASED" << std::endl;
430#ifdef ALL_VORONOI_ACTIVE
432 std::cout <<
"chosen method: VORONOI" << std::endl;
436 std::cout <<
"chosen method: HISTOGRAM" << std::endl;
440 "Invalid method chosen.");
442 std::cout <<
"chosen gamma: " << gamma << std::endl;
445 for (
int i = 0; i < sys_dim; ++i) {
446 ds.at(i) = box_size[i] / (double)global_dim[i];
447 l.at(i) = local_coords[i] * ds.at(i);
448 u.at(i) = (1.0 + local_coords[i]) * ds.at(i);
453 std::vector<ALL::Point<double>> *tv =
new std::vector<ALL::Point<double>>(2);
460 switch (chosen_method) {
471#ifdef ALL_VORONOI_ACTIVE
488 std::vector<ALL::Point<double>> vertices(nvertices, lp);
489 std::vector<ALL::Point<double>> new_vertices(nvertices, lp);
490 std::vector<ALL::Point<double>> old_vertices(nvertices, lp);
492 switch (chosen_method) {
503 for (
int i = 0; i < nvertices; ++i)
505 for (
int z = 0; z < 2; ++z)
506 for (
int y = 0; y < 2; ++y)
507 for (
int x = 0; x < 2; ++x) {
508 int vertex = 4 * z + 2 * y + x;
509 vertices.at(vertex)[0] =
510 vertices.at(vertex)[0] + (double)x * ds.at(0);
511 vertices.at(vertex)[1] =
512 vertices.at(vertex)[1] + (double)y * ds.at(1);
513 vertices.at(vertex)[2] =
514 vertices.at(vertex)[2] + (double)z * ds.at(2);
517#ifdef ALL_VORONOI_ACTIVE
521 for (
int d = 0; d < sys_dim; ++d)
522 vertices.at(0)[d] = 0.5 * (lp[d] + up[d]);
523 vertices.at(1) = vertices.at(0);
532 "Invalid method chosen.");
538 int max_particles = 1;
540 std::cout <<
"creating / reading point data" << std::endl;
545 read_points(points, l, u, n_points, filename, sys_dim, localRank,
551 MPI_Allreduce(&n_points, &max_particles, 1, MPI_INT, MPI_MAX, cart_comm);
553 max_particles = (int)std::ceil((
double)max_particles * 1.5);
556 std::cout <<
"maximum number of points on any process: " << max_particles
560 recv =
new double[max_neig * (sys_dim + 1) * max_particles];
561 transfer =
new double[max_neig * (sys_dim + 1) * max_particles];
567 for (
int i = 0; i < sys_dim; ++i) {
568 MPI_Cart_shift(cart_comm, i, 1, &self, &r_neig[i]);
569 MPI_Cart_shift(cart_comm, i, -1, &self, &l_neig[i]);
570 if (local_coords[i] == 0)
571 l_neig[i] = MPI_PROC_NULL;
572 if (local_coords[i] == global_dim[i] - 1)
573 r_neig[i] = MPI_PROC_NULL;
576 double d_min, d_max, d_ratio;
578 if (!weighted_points) {
579 n_local = (double)n_points;
582 for (
auto p = points.begin(); p != points.end(); ++p)
583 n_local += p->getWeight();
586 MPI_Allreduce(&n_local, &n_total, 1, MPI_DOUBLE, MPI_SUM, cart_comm);
587 double avg_work = (double)n_total / (
double)n_ranks;
589 MPI_Allreduce(&n_local, &n_min, 1, MPI_DOUBLE, MPI_MIN, cart_comm);
590 MPI_Allreduce(&n_local, &n_max, 1, MPI_DOUBLE, MPI_MAX, cart_comm);
591 d_min = n_min / avg_work;
592 d_max = n_max / avg_work;
593 d_ratio = (d_max - d_min) / (d_max + d_min);
599 MPI_Allreduce(&n_local, &total_points, 1, MPI_DOUBLE, MPI_SUM, cart_comm);
603 for (
int i = 0; i < n_ranks; ++i) {
604 if (localRank == i) {
606 of.open(
"domain_data.dat", std::ios::out | std::ios::app);
607 of << 0 <<
" " << localRank <<
" ";
609 for (
int j = 0; j < sys_dim; ++j) {
610 of <<
" " << local_coords[j] <<
" " << lp[j] <<
" " << up[j] <<
" ";
613 of <<
" " << n_local <<
" ";
616 if (i == n_ranks - 1)
619 MPI_Barrier(cart_comm);
621 MPI_Barrier(cart_comm);
625#ifdef ALL_VORONOI_ACTIVE
629 for (
int i = 0; i < n_ranks; ++i) {
630 if (localRank == i) {
633 of.open(
"voronoi/particles.pov", std::ios::out | std::ios::app);
635 of.open(
"voronoi/particles.pov", std::ios::out);
637 for (
auto it = points.begin(); it != points.end(); ++it) {
638 of <<
"// rank " << localRank <<
" particle " << j << std::endl;
639 of <<
"sphere{<" << (*it)[0] <<
"," << (*it)[1] <<
"," << (*it)[2]
640 <<
">, p}" << std::endl;
644 MPI_Barrier(cart_comm);
648 double cow_sys[sys_dim + 1];
649 double target_point[sys_dim + 1];
651 cow_sys[0] = cow_sys[1] = cow_sys[2] = cow_sys[3] = 0.0;
653 for (
int i = 0; i < n_points; ++i) {
654 for (
int d = 0; d < sys_dim; ++d)
655 cow_sys[d] += points.at(i)[d] * points.at(i).getWeight();
656 cow_sys[sys_dim] += points.at(i).getWeight();
659 MPI_Allreduce(cow_sys, target_point, sys_dim + 1, MPI_DOUBLE, MPI_SUM,
662 for (
int d = 0; d < sys_dim; ++d)
663 target_point[d] /= target_point[sys_dim];
665 std::default_random_engine generator(
SEED + localRank - 42 * 23);
666 std::uniform_real_distribution<double> dist(-0.25, 0.25);
671 int nNeighbors = n_ranks;
672 std::vector<double> neighbor_vertices(sys_dim * n_ranks);
673 std::vector<int> neighbors(n_ranks);
675 for (
int n = 0; n < n_ranks; ++n)
678 double local_vertex[sys_dim];
680 for (
auto &v : local_vertex)
682 double local_work = 0.0;
684 for (
int p = 0; p < n_points; ++p) {
685 local_work += points.at(p).getWeight();
686 for (
int d = 0; d < sys_dim; ++d)
687 local_vertex[d] += points.at(p)[d] * points.at(p).getWeight();
690 for (
int d = 0; d < sys_dim; ++d) {
691 if (local_work < 1e-6)
693 local_vertex[d] /= local_work;
694 vertices.at(0)[d] = local_vertex[d];
699 for (
int d = 0; d < sys_dim; ++d) {
701 diff[d] = target_point[d] - local_vertex[d] + dist(generator);
704 double dd = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
707 if (std::fabs(dd) < 1e-6)
710 for (
int d = 0; d < sys_dim; ++d) {
711 local_vertex[d] = vertices.at(0)[d] + diff[d] / dd;
712 local_vertex[d] = (local_vertex[d] < 0.0)
713 ? local_vertex[d] + 1.0
714 : ((local_vertex[d] > box_size[d])
715 ? local_vertex[d] - box_size[d]
717 vertices.at(0)[d] = local_vertex[d];
720 MPI_Allgather(local_vertex, sys_dim, MPI_DOUBLE,
721 neighbor_vertices.data(), sys_dim, MPI_DOUBLE, cart_comm);
725 voro::container con_prep(0.0, box_size[0], 0.0, box_size[1], 0.0,
728 false,
false,
false, 10);
731 for (
auto i = 0; i < nNeighbors; ++i) {
732 con_prep.put(i, neighbor_vertices.at(3 * i),
733 neighbor_vertices.at(3 * i + 1),
734 neighbor_vertices.at(3 * i + 2));
759 std::vector<std::vector<double>> remote_particles(nNeighbors);
761 for (
auto it = points.begin(); it != points.end(); ++it) {
765 con_prep.find_voronoi_cell((*it)[0], (*it)[1], (*it)[2], x, y, z,
767 remote_particles.at(pos).push_back((*it)[0]);
768 remote_particles.at(pos).push_back((*it)[1]);
769 remote_particles.at(pos).push_back((*it)[2]);
770 remote_particles.at(pos).push_back(it->getWeight());
777 int remote_s[nNeighbors];
778 int remote_r[nNeighbors];
779 MPI_Request request_s[nNeighbors];
780 MPI_Request request_r[nNeighbors];
781 MPI_Status status_s[nNeighbors];
782 MPI_Status status_r[nNeighbors];
784 for (
auto i = 0; i < nNeighbors; ++i) {
786 remote_s[i] = remote_particles.at(i).size();
787 MPI_Isend(&remote_s[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
789 MPI_Irecv(&remote_r[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
793 MPI_Waitall(nNeighbors, request_s, status_s);
794 MPI_Waitall(nNeighbors, request_r, status_r);
822 std::vector<std::vector<double>> received_particles(nNeighbors);
823 for (
auto i = 0; i < nNeighbors; ++i) {
824 if (remote_r[i] > 0) {
825 received_particles.at(i).resize(remote_r[i]);
826 MPI_Irecv(received_particles.at(i).data(), remote_r[i], MPI_DOUBLE,
827 neighbors.at(i), 4000, cart_comm, &request_r[i]);
829 request_r[i] = MPI_REQUEST_NULL;
831 MPI_Isend(remote_particles.at(i).data(), remote_s[i], MPI_DOUBLE,
832 neighbors.at(i), 4000, cart_comm, &request_s[i]);
834 request_s[i] = MPI_REQUEST_NULL;
837 MPI_Waitall(nNeighbors, request_s, status_s);
838 MPI_Waitall(nNeighbors, request_r, status_r);
840 for (
auto i = 0; i < nNeighbors; ++i) {
841 for (
auto j = 0; j < remote_r[i]; j += 4) {
843 received_particles.at(i).at(j + 3));
844 points.push_back(tmp_point);
858 for (
int i = 0; i < n_ranks; ++i) {
859 if (localRank == i) {
861 if (!weighted_points)
862 of.open(
"prep_data.dat", std::ios::out | std::ios::app);
864 of.open(
"prep_data_w.dat", std::ios::out | std::ios::app);
865 of << (i_preloop + 1) <<
" " << localRank <<
" ";
867 of <<
" " << vertices.at(0)[0] <<
" " << vertices.at(0)[1] <<
" "
868 << vertices.at(0)[2] <<
" " << n_points << std::endl;
870 if (i == n_ranks - 1)
873 MPI_Barrier(cart_comm);
875 MPI_Barrier(cart_comm);
880 double limit_efficiency = 0.5;
883 std::vector<int> local_coordinates(local_coords, local_coords+sys_dim);
884 std::vector<int> global_dimensions(global_dim, global_dim+sys_dim);
888 for (
int i_loop = 0; i_loop < max_loop; ++i_loop) {
889 MPI_Barrier(cart_comm);
890 if (d_ratio < limit_efficiency) {
892 limit_efficiency /= 2.0;
895 std::cout <<
"loop " << i_loop <<
": " << std::endl;
896 std::vector<double> work;
897 std::vector<int> n_bins(3, -1);
903 if (!weighted_points) {
905#ifdef ALL_VORONOI_ACTIVE
908 for (
int i = 0; i < 3; ++i)
910 for (
auto p : points) {
911 cow = cow + (p * (1.0 / n_points));
913 if (points.size() != 0)
914 vertices.at(1) = cow;
916 vertices.at(1) = vertices.at(0);
921 work = std::vector<double>(1, (
double)n_points);
926 int d = 2 - i_loop % 3;
928 lb = std::ceil(lp[d] / histogram_width) * histogram_width;
929 ub = std::ceil(up[d] / histogram_width) * histogram_width;
930 n_bins.at(d) = (ub - lb) / histogram_width;
932 work = std::vector<double>(n_bins.at(d), 0.0);
934 for (
auto p : points) {
935 int idx = (int)std::floor(((p[d] - lb) / histogram_width));
944 int rank_left, rank_right;
945 MPI_Cart_shift(cart_comm, 0, 1, &rank_left, &rank_right);
947 MPI_Request sreq, rreq;
948 MPI_Status ssta, rsta;
952 MPI_Isend(&overlap, 1, MPI_DOUBLE, rank_left, 0, cart_comm, &sreq);
953 MPI_Irecv(&recv_work, 1, MPI_DOUBLE, rank_right, 0, cart_comm, &rreq);
954 MPI_Wait(&sreq, &ssta);
955 MPI_Wait(&rreq, &rsta);
957 if (local_coords[d] != global_dim[d] - 1)
958 work.at(n_bins.at(d) - 1) += recv_work;
963 work = std::vector<double>(1, 0.0);
964 for (
auto p = points.begin(); p != points.end(); ++p)
965 work.at(0) += p->getWeight();
970 int d = 2 - i_loop % 3;
972 lb = std::ceil(lp[d] / histogram_width) * histogram_width;
973 ub = std::ceil(up[d] / histogram_width) * histogram_width;
974 n_bins.at(d) = (ub - lb) / histogram_width;
976 work = std::vector<double>(n_bins.at(d), 0.0);
978 for (
auto p : points) {
979 int idx = (int)std::floor(((p[d] - lb) / histogram_width));
981 work.at(idx) += p.getWeight();
983 overlap += p.getWeight();
988 int rank_left, rank_right;
989 MPI_Cart_shift(cart_comm, 0, 1, &rank_left, &rank_right);
991 MPI_Request sreq, rreq;
992 MPI_Status ssta, rsta;
996 MPI_Isend(&overlap, 1, MPI_DOUBLE, rank_left, 0, cart_comm, &sreq);
997 MPI_Irecv(&recv_work, 1, MPI_DOUBLE, rank_right, 0, cart_comm, &rreq);
998 MPI_Wait(&sreq, &ssta);
999 MPI_Wait(&rreq, &rsta);
1001 if (local_coords[d] != global_dim[d] - 1)
1002 work.at(n_bins.at(d) - 1) += recv_work;
1004#ifdef ALL_VORONOI_ACTIVE
1006 if (points.size() > 0) {
1008 for (
int i = 0; i < 3; ++i)
1010 for (
auto p : points) {
1011 cow = cow + (p * (1.0 / (work.at(0) * p.getWeight())));
1013 vertices.at(1) = cow;
1015 vertices.at(1) = vertices.at(0);
1019#ifdef ALL_DEBUG_ENABLED
1020 MPI_Barrier(cart_comm);
1022 std::cout <<
"finished computation of work" << std::endl;
1026 std::cout <<
"Setting work..." << std::endl;
1029#ifdef ALL_DEBUG_ENABLED
1030 MPI_Barrier(cart_comm);
1032 std::cout <<
"Setting process grid information..." << std::endl;
1034#ifdef ALL_DEBUG_ENABLED
1035 MPI_Barrier(cart_comm);
1037 std::cout <<
"Setting communicator..." << std::endl;
1040#ifdef ALL_DEBUG_ENABLED
1041 MPI_Barrier(cart_comm);
1043 std::cout <<
"Setting up method..." << std::endl;
1045#ifdef ALL_DEBUG_ENABLED
1046 MPI_Barrier(cart_comm);
1048 std::cout <<
"Setting method data..." << std::endl;
1053#ifdef ALL_DEBUG_ENABLED
1054 MPI_Barrier(cart_comm);
1056 std::cout <<
"Setting system size..." << std::endl;
1059#ifdef ALL_DEBUG_ENABLED
1060 MPI_Barrier(cart_comm);
1062 std::cout <<
"Setting domain vertices..." << std::endl;
1065#ifdef ALL_DEBUG_ENABLED
1066 MPI_Barrier(cart_comm);
1068 std::cout <<
"Setting domain minimum size..." << std::endl;
1072 int n_points_global = 0;
1073 MPI_Reduce(&n_points, &n_points_global, 1, MPI_INT, MPI_SUM, 0,
1076 std::cout <<
"number of particles in step " << i_loop <<
": "
1077 << n_points_global << std::endl;
1078#ifdef ALL_DEBUG_ENABLED
1079 MPI_Barrier(cart_comm);
1081 std::cout <<
"Providing current LB efficiency" << std::endl;
1085 std::cout <<
"current LB efficieny: " << LBefficiency << std::endl;
1087#ifdef ALL_DEBUG_ENABLED
1088 MPI_Barrier(cart_comm);
1090 std::cout <<
"Balancing domains..." << std::endl;
1093#ifdef ALL_DEBUG_ENABLED
1094 MPI_Barrier(cart_comm);
1096 std::cout <<
"Providing estimated new LB efficiency" << std::endl;
1099 std::cout <<
"Estimated LB efficency: "
1103#ifdef ALL_DEBUG_ENABLED
1104 MPI_Barrier(cart_comm);
1106 std::cout <<
"Updating vertices..." << std::endl;
1109 old_vertices = vertices;
1110 vertices = new_vertices;
1112#ifdef ALL_VORONOI_ACTIVE
1114#ifdef ALL_DEBUG_ENABLED
1115 MPI_Barrier(cart_comm);
1117 std::cout <<
"Updating vertices (Voronoi)..." << std::endl;
1120 vertices.at(1) = vertices.at(0);
1123#ifdef ALL_DEBUG_ENABLED
1124 MPI_Barrier(cart_comm);
1126 std::cout <<
"Updating upper / lower vertices..." << std::endl;
1128 lp = vertices.at(0);
1129 up = vertices.at(vertices.size() - 1);
1131#ifdef ALL_DEBUG_ENABLED
1132 MPI_Barrier(cart_comm);
1134 std::cout <<
"beginning particles transfer..." << std::endl;
1136 switch (chosen_method) {
1141 for (
int i = 0; i < sys_dim; ++i) {
1142 for (
int j = 0; j < 2; ++j) {
1146 for (
auto P = points.begin(); P != points.end(); ++P) {
1148 if (p[i] < vertices.at(0)[i]) {
1151 for (
int j = 0; j < sys_dim; j++) {
1153 p[j] = p[j] + box_size[j];
1154 transfer[n_transfer[0] * (sys_dim + 1) + j] = p[j];
1156 transfer[n_transfer[0] * (sys_dim + 1) + sys_dim] = p.
getWeight();
1158 }
else if (p[i] >= vertices.at(1)[i]) {
1160 for (
int j = 0; j < sys_dim; j++) {
1161 if (p[j] > box_size[j])
1162 p[j] = p[j] - box_size[j];
1163 transfer[(sys_dim + 1) * max_particles +
1164 n_transfer[1] * (sys_dim + 1) + j] = p[j];
1166 transfer[(sys_dim + 1) * max_particles +
1167 n_transfer[1] * (sys_dim + 1) + sys_dim] = p.
getWeight();
1171 for (
auto P = points.begin(); P != points.end(); ++P) {
1173 if (p[i] < vertices.at(0)[i] || p[i] >= vertices.at(1)[i]) {
1181 MPI_Request sreq_r, rreq_r;
1182 MPI_Request sreq_l, rreq_l;
1183 MPI_Status sstatus, rstatus;
1185 MPI_Irecv(&n_recv[1], 1, MPI_INT, r_neig[i], 10, cart_comm, &rreq_r);
1186 MPI_Irecv(&n_recv[0], 1, MPI_INT, l_neig[i], 20, cart_comm, &rreq_l);
1188 MPI_Isend(&n_transfer[0], 1, MPI_INT, l_neig[i], 10, cart_comm,
1190 MPI_Isend(&n_transfer[1], 1, MPI_INT, r_neig[i], 20, cart_comm,
1193 MPI_Wait(&sreq_l, &sstatus);
1194 MPI_Wait(&sreq_r, &rstatus);
1196 MPI_Wait(&rreq_l, &sstatus);
1197 MPI_Wait(&rreq_r, &rstatus);
1200 MPI_Irecv(&recv[max_particles * (sys_dim + 1)],
1201 (sys_dim + 1) * n_recv[1], MPI_DOUBLE, r_neig[i], 30,
1202 cart_comm, &rreq_r);
1203 MPI_Irecv(&recv[0], (sys_dim + 1) * n_recv[0], MPI_DOUBLE, l_neig[i],
1204 40, cart_comm, &rreq_l);
1206 MPI_Isend(&transfer[0], (sys_dim + 1) * n_transfer[0], MPI_DOUBLE,
1207 l_neig[i], 30, cart_comm, &sreq_l);
1208 MPI_Isend(&transfer[max_particles * (sys_dim + 1)],
1209 (sys_dim + 1) * n_transfer[1], MPI_DOUBLE, r_neig[i], 40,
1210 cart_comm, &sreq_r);
1212 MPI_Wait(&sreq_l, &sstatus);
1213 MPI_Wait(&sreq_r, &rstatus);
1215 MPI_Wait(&rreq_l, &sstatus);
1216 MPI_Wait(&rreq_r, &rstatus);
1218 for (
int j = 0; j < n_recv[0]; ++j) {
1220 recv[j * (sys_dim + 1) + sys_dim]);
1221 points.push_back(p);
1224 for (
int j = 0; j < n_recv[1]; ++j) {
1226 sys_dim, &recv[(max_particles + j) * (sys_dim + 1)],
1227 recv[(max_particles + j) * (sys_dim + 1) + sys_dim]);
1228 points.push_back(p);
1239 nNeighbors = neighbors.data();
1242 offset_neig[1] = nNeighbors[0];
1245 for (
int n = 0; n < 6; ++n)
1246 loc_neighbors += nNeighbors[n];
1248 MPI_Allreduce(&loc_neighbors, &max_neighbors, 1, MPI_INT, MPI_MAX,
1251 for (
int i = 0; i < sys_dim; ++i) {
1253 if (nNeighbors[i] + nNeighbors[i + 1] > max_neig) {
1254 std::cout <<
">>> resizing transfer buffers from " << max_neig
1256 max_neig = (int)(1.5 * (
double)(nNeighbors[i] + nNeighbors[i + 1]));
1257 std::cout <<
"to " << max_neig <<
" neighbors on process "
1258 << localRank << std::endl;
1261 recv =
new double[max_neig * (sys_dim + 1) * max_particles];
1262 transfer =
new double[max_neig * (sys_dim + 1) * max_particles];
1269 for (
int j = 0; j < 2; ++j) {
1273 for (
auto P = points.begin(); P != points.end(); ++P) {
1275 if (p[i] < vertices.at(0)[i]) {
1278 p[i] += box_size[i];
1281 for (
int d = 0; d < sys_dim; ++d)
1282 transfer[n_transfer[0] * (sys_dim + 1) + d] = p[d];
1283 transfer[n_transfer[0] * (sys_dim + 1) + sys_dim] = p.
getWeight();
1285 if (n_transfer[0] > max_particles) {
1286 std::stringstream ss;
1287 ss <<
"Trying to send more particles than buffer size allows! "
1288 <<
" n_transfer: " << n_transfer[0]
1289 <<
" max_particles: " << max_particles;
1293 }
else if (p[i] >= vertices.at(1)[i]) {
1295 if (p[i] >= box_size[i])
1296 p[i] -= box_size[i];
1299 for (
int d = 0; d < sys_dim; ++d) {
1300 transfer[(sys_dim + 1) * max_particles +
1301 n_transfer[1] * (sys_dim + 1) + d] = p[d];
1303 transfer[(sys_dim + 1) * max_particles +
1304 n_transfer[1] * (sys_dim + 1) + sys_dim] = p.
getWeight();
1306 if (n_transfer[1] > max_particles) {
1307 std::stringstream ss;
1308 ss <<
"Trying to send more particles than buffer size allows! "
1309 <<
" n_transfer: " << n_transfer[0]
1310 <<
" max_particles: " << max_particles;
1321 auto rem_it = std::remove_if(
1323 return p[i] < vertices.at(0)[i] || p[i] >= vertices.at(1)[i];
1325 points.erase(rem_it, points.end());
1326 n_points = points.size();
1339 for (
int d = 0; d < nNeighbors[2 * i]; ++d) {
1340 MPI_Irecv(&n_recv[d], 1, MPI_INT, neighbors.at(offset_neig[0] + d),
1341 20, cart_comm, &rreq_l[d]);
1342 MPI_Isend(&n_transfer[0], 1, MPI_INT,
1343 neighbors.at(offset_neig[0] + d), 10, cart_comm,
1346 for (
int d = 0; d < nNeighbors[2 * i + 1]; ++d) {
1347 MPI_Irecv(&n_recv[
MAX_NEIG + d], 1, MPI_INT,
1348 neighbors.at(offset_neig[1] + d), 10, cart_comm,
1350 MPI_Isend(&n_transfer[1], 1, MPI_INT,
1351 neighbors.at(offset_neig[1] + d), 20, cart_comm,
1355 if (nNeighbors[2 * i] >
MAX_NEIG) {
1357 __FILE__, __func__, __LINE__,
1358 "Number of neighbors higher than expected \
1359 maximum number of neighbors.");
1362 if (nNeighbors[2 * i + 1] >
MAX_NEIG) {
1364 __FILE__, __func__, __LINE__,
1365 "Number of neighbors higher than expected \
1366 maximum number of neighbors.");
1369 MPI_Waitall(nNeighbors[2 * i], sreq_l, lsstatus);
1370 MPI_Waitall(nNeighbors[2 * i + 1], sreq_r, lrstatus);
1372 MPI_Waitall(nNeighbors[2 * i], rreq_l, rsstatus);
1373 MPI_Waitall(nNeighbors[2 * i + 1], rreq_r, rrstatus);
1381 for (
int d = 0; d < nNeighbors[2 * i]; ++d) {
1382 MPI_Irecv(&recv[offset_l * (sys_dim + 1)],
1383 (sys_dim + 1) * n_recv[d], MPI_DOUBLE,
1384 neighbors.at(offset_neig[0] + d), 40, cart_comm,
1386 MPI_Isend(&transfer[0], (sys_dim + 1) * n_transfer[0], MPI_DOUBLE,
1387 neighbors.at(offset_neig[0] + d), 30, cart_comm,
1389 offset_l += n_recv[d];
1393 for (
int d = 0; d < nNeighbors[2 * i + 1]; ++d) {
1395 &recv[max_particles * (sys_dim + 1) + offset_r * (sys_dim + 1)],
1396 (sys_dim + 1) * n_recv[
MAX_NEIG + d], MPI_DOUBLE,
1397 neighbors.at(offset_neig[1] + d), 30, cart_comm, &rreq_r[d]);
1398 MPI_Isend(&transfer[max_particles * (sys_dim + 1)],
1399 (sys_dim + 1) * n_transfer[1], MPI_DOUBLE,
1400 neighbors.at(offset_neig[1] + d), 40, cart_comm,
1405 MPI_Waitall(nNeighbors[2 * i], sreq_l, lsstatus);
1406 MPI_Waitall(nNeighbors[2 * i + 1], sreq_r, lrstatus);
1408 MPI_Waitall(nNeighbors[2 * i], rreq_l, rsstatus);
1409 MPI_Waitall(nNeighbors[2 * i + 1], rreq_r, rrstatus);
1415 for (
int j = 0; j < offset_l; ++j) {
1417 recv[j * (sys_dim + 1) + sys_dim]);
1419 bool outside =
false;
1420 for (
int d = 0; d < i; ++d)
1421 outside = outside ||
1422 (p[d] < vertices.at(0)[d] || p[d] >= vertices.at(1)[d]);
1425 points.push_back(p);
1428 for (
int j = 0; j < offset_r; ++j) {
1430 sys_dim, &recv[(max_particles + j) * (sys_dim + 1)],
1431 recv[(max_particles + j) * (sys_dim + 1) + sys_dim]);
1433 bool outside =
false;
1434 for (
int d = 0; d < i; ++d)
1435 outside = outside ||
1436 (p[d] < vertices.at(0)[d] || p[d] >= vertices.at(1)[d]);
1439 points.push_back(p);
1443 offset_neig[0] = offset_neig[1] + nNeighbors[2 * i + 1];
1444 if (i < sys_dim - 1)
1445 offset_neig[1] = offset_neig[0] + nNeighbors[2 * (i + 1)];
1451 double comm_vertices[27 * 8 * 3];
1453 std::vector<int> neighbors(27);
1459 for (
int z = -1; z <= 1; ++z) {
1460 coords[2] = local_coords[2] + z;
1461 for (
int y = -1; y <= 1; ++y) {
1462 coords[1] = local_coords[1] + y;
1463 for (
int x = -1; x <= 1; ++x) {
1464 coords[0] = local_coords[0] + x;
1465 if (coords[0] >= 0 && coords[0] < global_dim[0] &&
1466 coords[1] >= 0 && coords[1] < global_dim[1] &&
1467 coords[2] >= 0 && coords[2] < global_dim[2]) {
1468 MPI_Cart_rank(cart_comm, coords, &neighbors.at(neig));
1469 exists[neig] =
true;
1471 neighbors.at(neig) = MPI_PROC_NULL;
1472 exists[neig] =
false;
1479 neighbors.at(13) = MPI_PROC_NULL;
1481 for (
int i = 0; i < 27 * 8 * 3; ++i)
1482 comm_vertices[i] = -1.0;
1485 for (
int i = 0; i < 8; ++i) {
1486 for (
int d = 0; d < 3; ++d) {
1487 comm_vertices[13 * 8 * 3 + 3 * i + d] = vertices.at(i)[d];
1493 MPI_Request request[54];
1494 MPI_Status status[54];
1496 for (
int i = 0; i < 54; ++i)
1497 request[i] = MPI_REQUEST_NULL;
1500 for (
int d = 0; d < 3; ++d) {
1502 comm_size = (int)std::pow(3, d);
1505 coords[0] = local_coords[0];
1506 coords[1] = local_coords[1];
1507 coords[2] = local_coords[2];
1514 MPI_Cart_rank(cart_comm, coords, &low_neig);
1516 low_neig = MPI_PROC_NULL;
1518 if (coords[d] < global_dim[d])
1519 MPI_Cart_rank(cart_comm, coords, &up_neig);
1521 up_neig = MPI_PROC_NULL;
1523 MPI_Isend(&comm_vertices[(13 - offset) * 24], comm_size * 24,
1524 MPI_DOUBLE, low_neig, 1010, cart_comm, &request[0]);
1525 MPI_Isend(&comm_vertices[(13 - offset) * 24], comm_size * 24,
1526 MPI_DOUBLE, up_neig, 2010, cart_comm, &request[1]);
1532 offset += comm_size;
1534 MPI_Irecv(&comm_vertices[(13 - offset) * 24], comm_size * 24,
1535 MPI_DOUBLE, low_neig, 2010, cart_comm, &request[28]);
1536 MPI_Irecv(&comm_vertices[(14 + offset - comm_size) * 24],
1537 comm_size * 24, MPI_DOUBLE, up_neig, 1010, cart_comm,
1540 MPI_Waitall(54, request, status);
1543#ifdef ALL_VTK_OUTPUT
1544#ifdef ALL_VTK_FORCE_SORT
1546 auto vtkpoints = vtkSmartPointer<vtkPoints>::New();
1547 auto unstructuredGrid = vtkSmartPointer<vtkForceBasedGrid>::New();
1548 for (
int i = 0; i < 27; ++i) {
1549 for (
int v = 0; v < 8; ++v) {
1550 vtkpoints->InsertNextPoint(comm_vertices[i * 24 + v * 3],
1551 comm_vertices[i * 24 + v * 3 + 1],
1552 comm_vertices[i * 24 + v * 3 + 2]);
1555 unstructuredGrid->SetPoints(vtkpoints);
1557 auto work = vtkSmartPointer<vtkFloatArray>::New();
1558 work->SetNumberOfComponents(1);
1559 work->SetNumberOfTuples(27);
1560 work->SetName(
"Cell");
1562 for (
int n = 0; n < 27; ++n) {
1564 vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3,
1565 8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7};
1567 auto faces = vtkSmartPointer<vtkCellArray>::New();
1569 vtkIdType f0[3] = {8 * n + 0, 8 * n + 2, 8 * n + 1};
1570 vtkIdType f1[3] = {8 * n + 1, 8 * n + 2, 8 * n + 3};
1572 vtkIdType f2[3] = {8 * n + 0, 8 * n + 4, 8 * n + 2};
1573 vtkIdType f3[3] = {8 * n + 2, 8 * n + 4, 8 * n + 6};
1575 vtkIdType f4[3] = {8 * n + 2, 8 * n + 6, 8 * n + 3};
1576 vtkIdType f5[3] = {8 * n + 3, 8 * n + 6, 8 * n + 7};
1578 vtkIdType f6[3] = {8 * n + 1, 8 * n + 5, 8 * n + 3};
1579 vtkIdType f7[3] = {8 * n + 3, 8 * n + 5, 8 * n + 7};
1581 vtkIdType f8[3] = {8 * n + 0, 8 * n + 4, 8 * n + 1};
1582 vtkIdType f9[3] = {8 * n + 1, 8 * n + 4, 8 * n + 5};
1584 vtkIdType fa[3] = {8 * n + 4, 8 * n + 6, 8 * n + 5};
1585 vtkIdType fb[3] = {8 * n + 5, 8 * n + 6, 8 * n + 7};
1587 faces->InsertNextCell(3, f0);
1588 faces->InsertNextCell(3, f1);
1589 faces->InsertNextCell(3, f2);
1590 faces->InsertNextCell(3, f3);
1591 faces->InsertNextCell(3, f4);
1592 faces->InsertNextCell(3, f5);
1593 faces->InsertNextCell(3, f6);
1594 faces->InsertNextCell(3, f7);
1595 faces->InsertNextCell(3, f8);
1596 faces->InsertNextCell(3, f9);
1597 faces->InsertNextCell(3, fa);
1598 faces->InsertNextCell(3, fb);
1600 unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12,
1601 faces->GetPointer());
1602 work->SetValue(n, (
double)n);
1604 unstructuredGrid->GetCellData()->AddArray(work);
1619 MPI_Allreduce(&n_points, &check_np, 1, MPI_INT, MPI_SUM, cart_comm);
1621 auto locator = vtkSmartPointer<vtkCellLocator>::New();
1622 locator->SetDataSet(unstructuredGrid);
1623 locator->BuildLocator();
1625 auto in_triangle = [=](
double nv[27 * 8 * 3],
int n,
int A,
int B,
1626 int C,
double x,
double y,
double z) {
1642 auto same_side_plane = [=](
double nv[27 * 8 * 3],
int n,
int A,
int B,
1643 int C,
int D,
double x,
double y,
double z) {
1646 double CBx = nv[n * 24 + C * 8 + 0] - nv[n * 24 + B * 8 + 0];
1647 double CBy = nv[n * 24 + C * 8 + 1] - nv[n * 24 + B * 8 + 1];
1648 double CBz = nv[n * 24 + C * 8 + 2] - nv[n * 24 + B * 8 + 2];
1650 double DBx = nv[n * 24 + D * 8 + 0] - nv[n * 24 + B * 8 + 0];
1651 double DBy = nv[n * 24 + D * 8 + 1] - nv[n * 24 + B * 8 + 1];
1652 double DBz = nv[n * 24 + D * 8 + 2] - nv[n * 24 + B * 8 + 2];
1654 double ABx = nv[n * 24 + A * 8 + 0] - nv[n * 24 + B * 8 + 0];
1655 double ABy = nv[n * 24 + A * 8 + 1] - nv[n * 24 + B * 8 + 1];
1656 double ABz = nv[n * 24 + A * 8 + 2] - nv[n * 24 + B * 8 + 2];
1658 double PBx = x - nv[n * 24 + B * 8 + 0];
1659 double PBy = y - nv[n * 24 + B * 8 + 1];
1660 double PBz = z - nv[n * 24 + B * 8 + 2];
1664 double nx = CBy * DBz - CBz * DBy;
1665 double ny = CBz * DBx - CBx * DBz;
1666 double nz = CBx * DBy - CBy * DBx;
1669 double ABn = ABx * nx + ABy * ny + ABz * nz;
1672 double PBn = PBx * nx + PBy * ny + PBz * nz;
1698 if (std::abs(PBn) < 1e-12) {
1699 return in_triangle(nv, n, B, C, D, x, y, z);
1702 return ((ABn * PBn) > 0);
1707 auto in_tetraeder = [=](
double nv[27 * 8 * 3],
int n,
int t[4],
1708 double x,
double y,
double z) {
1709 return same_side_plane(nv, n, t[0], t[1], t[2], t[3], x, y, z) &&
1710 same_side_plane(nv, n, t[3], t[0], t[1], t[2], x, y, z) &&
1711 same_side_plane(nv, n, t[2], t[3], t[0], t[1], x, y, z) &&
1712 same_side_plane(nv, n, t[1], t[2], t[3], t[0], x, y, z);
1715 auto containing_neighbor = [=](
double nv[27 * 8 * 3],
bool e[27],
1716 double x,
double y,
double z) {
1731 int tetrahedra[5][4] = {{1, 2, 4, 7},
1741 for (
int t = 0; t < 5; ++t) {
1742 if (in_tetraeder(nv, 13, tetrahedra[t], x, y, z))
1747 for (
int n = 0; n < 27; ++n) {
1748 if (!e[n] || n == 13)
1751 for (
int t = 0; t < 5; ++t) {
1752 if (in_tetraeder(nv, n, tetrahedra[t], x, y, z))
1764 for (
int i = 0; i < 27; ++i) {
1770 int check_np_new = 0;
1772 for (
auto P = points.begin(); P != points.end(); ++P) {
1778 for (
int d = 0; d < 3; ++d)
1780#ifdef ALL_VTK_FORCE_SORT
1781 vtkIdType cellId = locator->FindCell(pcoords);
1783 int cellId = containing_neighbor(comm_vertices, exists, pcoords[0],
1784 pcoords[1], pcoords[2]);
1793 if (cellId >= 0 && cellId != 13 && cellId <= 26) {
1794 if (n_send[cellId] == max_particles) {
1796 __FILE__, __func__, __LINE__,
1797 "Trying to send more particles than \
1798 buffer size allows!");
1800 for (
int d = 0; d < 3; ++d) {
1801 transfer[cellId * (sys_dim + 1) * max_particles +
1802 n_send[cellId] * (sys_dim + 1) + d] = p[d];
1804 transfer[cellId * (sys_dim + 1) * max_particles +
1805 n_send[cellId] * (sys_dim + 1) + sys_dim] = p.
getWeight();
1813 for (
int n = 0; n < 27; ++n) {
1814 MPI_Isend(&n_send[n], 1, MPI_INT, neighbors.at(n), 1020, cart_comm,
1816 MPI_Irecv(&n_recv[n], 1, MPI_INT, neighbors.at(n), 1020, cart_comm,
1819 MPI_Waitall(54, request, status);
1821 for (
int n = 0; n < 27; ++n) {
1822 MPI_Isend(&transfer[n * (sys_dim + 1) * max_particles],
1823 (sys_dim + 1) * n_send[n], MPI_DOUBLE, neighbors.at(n),
1824 1030, cart_comm, &request[n]);
1825 MPI_Irecv(&recv[n * (sys_dim + 1) * max_particles],
1826 (sys_dim + 1) * n_recv[n], MPI_DOUBLE, neighbors.at(n),
1827 1030, cart_comm, &request[27 + n]);
1829 MPI_Waitall(54, request, status);
1831 for (
int n = 0; n < 27; ++n) {
1833 for (
int i = 0; i < n_recv[n]; ++i) {
1836 &recv[n * (sys_dim + 1) * max_particles + i * (sys_dim + 1)],
1837 recv[n * (sys_dim + 1) * max_particles + i * (sys_dim + 1) +
1839 points.push_back(p);
1847 std::cout <<
"Currently no FORCEBASED test without VTK!"
1849 MPI_Abort(MPI_COMM_WORLD, -1);
1852#ifdef ALL_VORONOI_ACTIVE
1857 int nNeighbors = neighbors.size();
1861 voro::container con_copy(0.0, box_size[0], 0.0, box_size[1], 0.0,
1864 false,
false,
false, 10);
1867 for (
auto i = 0; i < nNeighbors; ++i) {
1868 con_copy.put(i, neighbor_vertices.at(3 * i),
1869 neighbor_vertices.at(3 * i + 1),
1870 neighbor_vertices.at(3 * i + 2));
1874 con_copy.put(nNeighbors, vertices.at(0)[0], vertices.at(0)[1],
1896 std::vector<std::vector<double>> remote_particles(nNeighbors);
1898 for (
auto P = points.begin(); P != points.end(); ++P) {
1903 con_copy.find_voronoi_cell(p[0], p[1], p[2], x, y, z, pos);
1904 if (pos < nNeighbors) {
1905 remote_particles.at(pos).push_back(p[0]);
1906 remote_particles.at(pos).push_back(p[1]);
1907 remote_particles.at(pos).push_back(p[2]);
1908 remote_particles.at(pos).push_back(p.
getWeight());
1913 for (
auto P = points.begin(); P != points.end(); ++P) {
1915 if (p[0] < -500.0) {
1924 int remote_s[nNeighbors];
1925 int remote_r[nNeighbors];
1926 MPI_Request request_s[nNeighbors];
1927 MPI_Request request_r[nNeighbors];
1928 MPI_Status status_s[nNeighbors];
1929 MPI_Status status_r[nNeighbors];
1931 for (
auto i = 0; i < nNeighbors; ++i) {
1933 remote_s[i] = remote_particles.at(i).size();
1934 MPI_Isend(&remote_s[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
1936 MPI_Irecv(&remote_r[i], 1, MPI_INT, neighbors.at(i), 3000, cart_comm,
1940 MPI_Waitall(nNeighbors, request_s, status_s);
1941 MPI_Waitall(nNeighbors, request_r, status_r);
1970 std::vector<std::vector<double>> received_particles(nNeighbors);
1971 for (
auto i = 0; i < nNeighbors; ++i) {
1972 if (remote_r[i] > 0) {
1973 received_particles.at(i).resize(remote_r[i]);
1974 MPI_Irecv(received_particles.at(i).data(), remote_r[i], MPI_DOUBLE,
1975 neighbors.at(i), 4000, cart_comm, &request_r[i]);
1977 request_r[i] = MPI_REQUEST_NULL;
1978 if (remote_s[i] > 0)
1979 MPI_Isend(remote_particles.at(i).data(), remote_s[i], MPI_DOUBLE,
1980 neighbors.at(i), 4000, cart_comm, &request_s[i]);
1982 request_s[i] = MPI_REQUEST_NULL;
1985 MPI_Waitall(nNeighbors, request_s, status_s);
1986 MPI_Waitall(nNeighbors, request_r, status_r);
1988 for (
auto i = 0; i < nNeighbors; ++i) {
1989 for (
auto j = 0; j < remote_r[i]; j += 4) {
1991 received_particles.at(i).at(j + 3));
1992 points.push_back(tmp_point);
2002 int curr_dim = 2 - (i_loop % 3);
2009 MPI_Comm_split(cart_comm,
2010 local_coords[1] + local_coords[2] * global_dim[1],
2011 local_coords[0], &comm_col);
2016 MPI_Comm_split(cart_comm,
2017 local_coords[0] + local_coords[2] * global_dim[0],
2018 local_coords[1], &comm_col);
2023 MPI_Comm_split(cart_comm,
2024 local_coords[0] + local_coords[1] * global_dim[0],
2025 local_coords[2], &comm_col);
2032 std::vector<double> borders(2 * global_dim[curr_dim]);
2033 std::vector<double> old_borders(2 * global_dim[curr_dim]);
2034 std::vector<double> local_borders(2);
2035 std::vector<double> old_border(2);
2037 local_borders.at(0) = vertices.at(0)[curr_dim];
2038 local_borders.at(1) = vertices.at(1)[curr_dim];
2039 old_border.at(0) = old_vertices.at(0)[curr_dim];
2040 old_border.at(1) = old_vertices.at(1)[curr_dim];
2043 MPI_Comm_rank(comm_col, &rank);
2044 MPI_Comm_size(comm_col, &size);
2047 MPI_Allgather(local_borders.data(), 2, MPI_DOUBLE, borders.data(), 2,
2048 MPI_DOUBLE, comm_col);
2051 MPI_Allgather(old_border.data(), 2, MPI_DOUBLE, old_borders.data(), 2,
2052 MPI_DOUBLE, comm_col);
2055 std::vector<int> send_neig;
2056 std::vector<int> recv_neig;
2058 for (
int n = 0; n < global_dim[curr_dim]; ++n) {
2071 if ((borders.at(2 * n) <= old_border.at(0) &&
2072 borders.at(2 * n + 1) >= old_border.at(0)) ||
2073 (borders.at(2 * n) <= old_border.at(1) &&
2074 borders.at(2 * n + 1) >= old_border.at(1)) ||
2075 (borders.at(2 * n) > old_border.at(0) &&
2076 borders.at(2 * n + 1) < old_border.at(1)))
2077 send_neig.push_back(n);
2078 if ((old_borders.at(2 * n) <= local_borders.at(0) &&
2079 old_borders.at(2 * n + 1) >= local_borders.at(0)) ||
2080 (old_borders.at(2 * n) <= local_borders.at(1) &&
2081 old_borders.at(2 * n + 1) >= local_borders.at(1)) ||
2082 (old_borders.at(2 * n) > local_borders.at(0) &&
2083 old_borders.at(2 * n + 1) < local_borders.at(1)))
2084 recv_neig.push_back(n);
2088 std::vector<std::vector<double>> send_vec(send_neig.size());
2091 for (
auto p : points) {
2092 for (
int n = 0; n < (int)send_neig.size(); ++n) {
2093 int neig_id = send_neig.at(n);
2102 if ((borders.at(2 * neig_id) <= p[curr_dim] &&
2103 borders.at(2 * neig_id + 1) > p[curr_dim])) {
2104 send_vec.at(n).push_back(p[0]);
2105 send_vec.at(n).push_back(p[1]);
2106 send_vec.at(n).push_back(p[2]);
2107 send_vec.at(n).push_back(p.getWeight());
2108 send_vec.at(n).push_back((
double)(p.get_id()));
2116 std::vector<int> n_send(send_neig.size());
2117 for (
int n = 0; n < (int)send_neig.size(); ++n)
2118 n_send.at(n) = send_vec.at(n).size();
2121 std::vector<int> n_recv(recv_neig.size());
2123 std::vector<MPI_Request> sreq(send_neig.size());
2124 std::vector<MPI_Request> rreq(recv_neig.size());
2125 std::vector<MPI_Status> ssta(send_neig.size());
2126 std::vector<MPI_Status> rsta(recv_neig.size());
2128 for (
int n = 0; n < (int)send_neig.size(); ++n) {
2129 MPI_Isend(n_send.data() + n, 1, MPI_INT, send_neig.at(n), 2000,
2130 comm_col, sreq.data() + n);
2132 for (
int n = 0; n < (int)recv_neig.size(); ++n) {
2133 MPI_Irecv(n_recv.data() + n, 1, MPI_INT, recv_neig.at(n), 2000,
2134 comm_col, rreq.data() + n);
2136 MPI_Waitall(send_neig.size(), sreq.data(), ssta.data());
2160 std::vector<std::vector<double>> recv_vec(recv_neig.size());
2161 while (recvs < (
int)recv_neig.size()) {
2163 MPI_Waitany(recv_neig.size(), rreq.data(), &idx, rsta.data());
2164 recv_vec.at(idx).resize(n_recv.at(idx));
2169 for (
int n = 0; n < (int)send_neig.size(); ++n) {
2170 MPI_Isend(send_vec.at(n).data(), send_vec.at(n).size(), MPI_DOUBLE,
2171 send_neig.at(n), 3000, comm_col, sreq.data() + n);
2173 for (
int n = 0; n < (int)recv_neig.size(); ++n) {
2174 MPI_Irecv(recv_vec.at(n).data(), recv_vec.at(n).size(), MPI_DOUBLE,
2175 recv_neig.at(n), 3000, comm_col, rreq.data() + n);
2177 MPI_Waitall(send_neig.size(), sreq.data(), ssta.data());
2180 while (recvs < (
int)recv_neig.size()) {
2182 MPI_Waitany(recv_neig.size(), rreq.data(), &idx, rsta.data());
2183 for (
int p = 0; p < (int)recv_vec.at(idx).size(); p += 5) {
2185 recv_vec.at(idx).at(p + 3),
2186 (
long)(recv_vec.at(idx).at(p + 4)));
2187 points.push_back(tmp);
2191 n_points = points.size();
2193 MPI_Comm_free(&comm_col);
2201#ifdef ALL_VORONOI_ACTIVE
2204#ifdef ALL_VTK_OUTPUT
2219#ifdef ALL_VORONOI_ACTIVE
2221#ifdef ALL_VTK_OUTPUT
2229 if (!weighted_points) {
2230 n_local = (double)n_points;
2233 for (
auto p = points.begin(); p != points.end(); ++p)
2234 n_local += p->getWeight();
2237 double n_total_points;
2238 MPI_Allreduce(&n_local, &n_total_points, 1, MPI_DOUBLE, MPI_SUM,
2241 MPI_Allreduce(&n_local, &n_total, 1, MPI_DOUBLE, MPI_SUM, cart_comm);
2242 avg_work = n_total / (double)n_ranks;
2243 MPI_Allreduce(&n_local, &n_min, 1, MPI_DOUBLE, MPI_MIN, cart_comm);
2244 MPI_Allreduce(&n_local, &n_max, 1, MPI_DOUBLE, MPI_MAX, cart_comm);
2245 d_min = n_min / avg_work;
2246 d_max = n_max / avg_work;
2247 d_ratio = (d_max - d_min) / (d_max + d_min);
2249 double loc_diff, tot_diff;
2250 loc_diff = (n_local - avg_work) * (n_local - avg_work);
2251 MPI_Reduce(&loc_diff, &tot_diff, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm);
2253 d_min = n_min / avg_work;
2254 d_max = n_max / avg_work;
2255 d_ratio = (d_max - d_min) / (d_max + d_min);
2257 if (localRank == 0) {
2258 tot_diff = sqrt(tot_diff);
2260 if (!weighted_points)
2261 of.open(
"stddev.dat", std::ios::out | std::ios::app);
2263 of.open(
"stddev_w.dat", std::ios::out | std::ios::app);
2264 of << i_loop + 1 <<
" " << tot_diff << std::endl;
2268 if (!weighted_points)
2269 of.open(
"minmax.dat", std::ios::out | std::ios::app);
2271 of.open(
"minmax_w.dat", std::ios::out | std::ios::app);
2272 of << i_loop + 1 <<
" " << d_min <<
" " << d_max <<
" " << d_ratio
2273 <<
" " << max_neighbors << std::endl;
2277 std::cout <<
"d_min: " << d_min << std::endl;
2278 std::cout <<
"d_max: " << d_max << std::endl;
2279 std::cout <<
"d_ratio: " << d_ratio << std::endl;
2280 std::cout << std::flush;
2282 if (d_ratio < min_ratio) {
2283 min_ratio = d_ratio;
2289 for (
int i = 0; i < n_ranks; ++i) {
2290 if (localRank == i) {
2292 if (!weighted_points)
2293 of.open(
"domain_data.dat", std::ios::out | std::ios::app);
2295 of.open(
"domain_data_w.dat", std::ios::out | std::ios::app);
2296 of << (i_loop + 1) <<
" " << localRank <<
" ";
2301 nNeighbors = neighbors.data();
2303 for (
int j = 0; j < 3; ++j) {
2304 n_neig += nNeighbors[j];
2306 of << n_neig <<
" ";
2316 of << n_local <<
" ";
2319 if (i == n_ranks - 1)
2322 MPI_Barrier(cart_comm);
2324 MPI_Barrier(cart_comm);
2327 if (std::abs(n_total_points - total_points) > 1e-6) {
2328 std::cout << std::setprecision(14) << n_total_points
2329 <<
" != " << total_points << std::endl;
2333 if (i_loop == max_loop - 1) {
2336 double vis_work = 0.0;
2337 for (
int p = 0; p < n_points; ++p) {
2338 vis_work += points.at(p).getWeight();
2341#ifdef ALL_VORONOI_ACTIVE
2344#ifdef ALL_VTK_OUTPUT
2348 double volume = 1.0;
2350 for (
int i = 0; i < 3; ++i) {
2351 width[i] = vertices.at(1)[i] - vertices.at(0)[i];
2355 double surface = 2.0 * width[0] * width[1] +
2356 2.0 * width[1] * width[2] +
2357 2.0 * width[0] * width[2];
2359 for (
int r = 0; r < n_ranks; ++r) {
2360 if (r == localRank) {
2362 of.open(
"domain_data.dat", std::ios::out | std::ios::app);
2363 of << localRank <<
" " << width[0] <<
" " << width[1] <<
" "
2364 << width[2] <<
" " << vis_work <<
" " << volume <<
" "
2365 << surface <<
" " << (surface / volume) <<
" " << std::endl;
2368 MPI_Barrier(cart_comm);
2375#ifdef ALL_VORONOI_ACTIVE
2377#ifdef ALL_VTK_OUTPUT
2395 MPI_File_open(MPI_COMM_WORLD,
"particles_output.bin",
2396 MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &outfile);
2399 MPI_File_write_at(outfile, (MPI_Offset)(localRank *
sizeof(
int)), &n_points,
2400 1, MPI_INT, MPI_STATUS_IGNORE);
2403 long n_p = (long)n_points;
2406 MPI_Scan(&n_p, &offset, 1, MPI_LONG, MPI_SUM, cart_comm);
2409 long block_size =
sizeof(long);
2411 offset *= block_size;
2412 offset += n_ranks * (
sizeof(int) +
sizeof(long));
2415 outfile, (MPI_Offset)(n_ranks *
sizeof(
int) + localRank *
sizeof(
long)),
2416 &offset, 1, MPI_LONG, MPI_STATUS_IGNORE);
2418 for (
int n = 0; n < n_points; ++n) {
2419 long id = points.at(n).get_id();
2421 loc[0] = points.at(n)[0];
2422 loc[1] = points.at(n)[1];
2423 loc[2] = points.at(n)[2];
2424 MPI_File_write_at(outfile, (MPI_Offset)((offset + n * block_size)), &
id,
2425 1, MPI_LONG, MPI_STATUS_IGNORE);
2436 MPI_File_close(&outfile);
2475 if (localRank == 0) {
2476 std::cout << std::endl;
2477 std::cout <<
"min_ratio: " << min_ratio << std::endl;
2478 std::cout <<
"min_step: " << min_step << std::endl;
2479 std::cout << std::endl;
2481 MPI_Barrier(cart_comm);
2483 return EXIT_SUCCESS;
2485 std::cout << e.
what() << std::endl;
2486 return EXIT_FAILURE;