PABLO  0.1
PArallel Balanced Linear Octree
 All Classes Functions Variables Pages
Class_Para_Tree_3D.tpp
1 
23 // =================================================================================== //
24 // CLASS SPECIALIZATION //
25 // =================================================================================== //
26 
27 template<>
28 class Class_Para_Tree<3>{
29  // ------------------------------------------------------------------------------- //
30  // TYPEDEFS ----------------------------------------------------------------------- //
31 public:
32  typedef vector<Class_Octant<3> > OctantsType;
33  typedef vector<uint32_t> u32vector;
34  typedef vector<double> dvector;
35  typedef vector<vector<uint32_t> > u32vector2D;
36  typedef vector<vector<uint64_t> > u64vector2D;
37  typedef vector<double> dvector1D;
38  typedef vector<vector<double> > dvector2D;
39  typedef vector<int> ivector;
40  typedef vector<vector<int> > ivector2D;
41 
42  // ------------------------------------------------------------------------------- //
43  // MEMBERS ----------------------------------------------------------------------- //
44 public:
45  //undistributed members
47  uint64_t* partition_last_desc;
49  uint64_t global_num_octants;
50  map<int,vector<uint32_t> > bordersPerProc;
51  int nproc;
52  uint8_t max_depth;
54  //distributed members
55  int rank;
58  //distributed adpapting memebrs
59  u32vector mapidx;
65  //auxiliary members
66  int error_flag;
67  bool serial;
69  //map member
72  //info member
73  uint64_t status;
76  //log member
79 #if NOMPI==0
80  MPI_Comm comm;
81 #endif
82 
83  // ------------------------------------------------------------------------------- //
84  // CONSTRUCTORS ------------------------------------------------------------------ //
85 public:
91 #if NOMPI==0
92  Class_Para_Tree(string logfile="PABLO.log",MPI_Comm comm_ = MPI_COMM_WORLD) : log(logfile,comm_),comm(comm_){
93 #else
94  Class_Para_Tree(string logfile="PABLO.log") : log(logfile){
95 #endif
96  serial = true;
97  error_flag = 0;
98  max_depth = 0;
99  global_num_octants = octree.getNumOctants();
100 #if NOMPI==0
101  error_flag = MPI_Comm_size(comm,&nproc);
102  error_flag = MPI_Comm_rank(comm,&rank);
103 #else
104  rank = 0;
105  nproc = 1;
106 #endif
107  partition_first_desc = new uint64_t[nproc];
108  partition_last_desc = new uint64_t[nproc];
109  partition_range_globalidx = new uint64_t[nproc];
110  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
111  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
112  for(int p = 0; p < nproc; ++p){
113  partition_range_globalidx[p] = 0;
114  partition_last_desc[p] = lastDescMorton;
115  partition_last_desc[p] = firstDescMorton;
116  }
117  // Write info log
118  log.writeLog("---------------------------------------------");
119  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
120  log.writeLog("---------------------------------------------");
121  log.writeLog(" ");
122  log.writeLog("---------------------------------------------");
123  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
124  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(3)));
125  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
126  log.writeLog("---------------------------------------------");
127  log.writeLog(" ");
128 #if NOMPI==0
129  MPI_Barrier(comm);
130 #endif
131  };
132 
133  //=================================================================================//
134 
143 #if NOMPI==0
144  Class_Para_Tree(double X, double Y, double Z, double L, string logfile="PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
145 #else
146  Class_Para_Tree(double X, double Y, double Z, double L, string logfile="PABLO.log"):trans(X,Y,Z,L),log(logfile){
147 #endif
148  serial = true;
149  error_flag = 0;
150  max_depth = 0;
151  global_num_octants = octree.getNumOctants();
152 #if NOMPI==0
153  error_flag = MPI_Comm_size(comm,&nproc);
154  error_flag = MPI_Comm_rank(comm,&rank);
155 #else
156  rank = 0;
157  nproc = 1;
158 #endif
159  partition_first_desc = new uint64_t[nproc];
160  partition_last_desc = new uint64_t[nproc];
161  partition_range_globalidx = new uint64_t[nproc];
162  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
163  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
164  for(int p = 0; p < nproc; ++p){
165  partition_range_globalidx[p] = 0;
166  partition_last_desc[p] = lastDescMorton;
167  partition_last_desc[p] = firstDescMorton;
168  }
169  // Write info log
170  log.writeLog("---------------------------------------------");
171  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
172  log.writeLog("---------------------------------------------");
173  log.writeLog(" ");
174  log.writeLog("---------------------------------------------");
175  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
176  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(3)));
177  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
178  log.writeLog(" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
179  log.writeLog(" " + to_string(static_cast<unsigned long long>(Y)));
180  log.writeLog(" " + to_string(static_cast<unsigned long long>(Z)));
181  log.writeLog(" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
182  log.writeLog("---------------------------------------------");
183  log.writeLog(" ");
184 #if NOMPI==0
185  MPI_Barrier(comm);
186 #endif
187  };
188 
189  //=================================================================================//
190 
201 #if NOMPI==0
202  Class_Para_Tree(double & X, double & Y, double & Z, double & L, ivector2D & XYZ, ivector & levels, string logfile="PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
203 #else
204  Class_Para_Tree(double & X, double & Y, double & Z, double & L, ivector2D & XYZ, ivector & levels, string logfile="PABLO.log"):trans(X,Y,Z,L),log(logfile){
205 #endif
206  uint8_t lev, iface;
207  uint32_t x0, y0, z0;
208  uint32_t NumOctants = XYZ.size();
209  octree.octants.resize(NumOctants);
210  for (uint32_t i=0; i<NumOctants; i++){
211  lev = uint8_t(levels[i]);
212  x0 = uint32_t(XYZ[i][0]);
213  y0 = uint32_t(XYZ[i][1]);
214  z0 = uint32_t(XYZ[i][2]);
215  Class_Octant<3> oct(lev, x0, y0, z0,false);
216  oct.setBalance(false);
217  if (x0 == 0){
218  iface = 0;
219  oct.setBound(iface);
220  }
221  else if (x0 == global3D.max_length - oct.getSize()){
222  iface = 1;
223  oct.setBound(iface);
224  }
225  if (y0 == 0){
226  iface = 2;
227  oct.setBound(iface);
228  }
229  else if (y0 == global3D.max_length - oct.getSize()){
230  iface = 3;
231  oct.setBound(iface);
232  }
233  if (z0 == 0){
234  iface = 4;
235  oct.setBound(iface);
236  }
237  else if (z0 == global3D.max_length - oct.getSize()){
238  iface = 5;
239  oct.setBound(iface);
240  }
241  octree.octants[i] = oct;
242 
243  }
244 
245 #if NOMPI==0
246  error_flag = MPI_Comm_size(comm,&nproc);
247  error_flag = MPI_Comm_rank(comm,&rank);
248  serial = true;
249  if (nproc > 1 ) serial = false;
250 #else
251  serial = true;
252  rank = 0;
253  nproc = 1;
254 #endif
255  partition_first_desc = new uint64_t[nproc];
256  partition_last_desc = new uint64_t[nproc];
257  partition_range_globalidx = new uint64_t[nproc];
258 
259  setFirstDesc();
260  setLastDesc();
261  octree.updateLocalMaxDepth();
262  updateAdapt();
263 #if NOMPI==0
264  setPboundGhosts();
265 #endif
266  // Write info log
267  log.writeLog("---------------------------------------------");
268  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
269  log.writeLog("---------------------------------------------");
270  log.writeLog(" ");
271  log.writeLog("---------------------------------------------");
272  log.writeLog("- PABLO restart -");
273  log.writeLog("---------------------------------------------");
274  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
275  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(3)));
276  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
277  log.writeLog(" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
278  log.writeLog(" " + to_string(static_cast<unsigned long long>(Y)));
279  log.writeLog(" " + to_string(static_cast<unsigned long long>(Z)));
280  log.writeLog(" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
281  log.writeLog(" Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
282  log.writeLog("---------------------------------------------");
283  log.writeLog(" ");
284 #if NOMPI==0
285  MPI_Barrier(comm);
286 #endif
287  };
288 
289  //=================================================================================//
290 
301 #if NOMPI==0
302  Class_Para_Tree(double X, double Y, double Z, double L, ivector2D & XYZ, ivector & levels, string logfile="PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
303 #else
304  Class_Para_Tree(double X, double Y, double Z, double L, ivector2D & XYZ, ivector & levels, string logfile="PABLO.log"):trans(X,Y,Z,L),log(logfile){
305 #endif
306  uint8_t lev, iface;
307  uint32_t x0, y0, z0;
308  uint32_t NumOctants = XYZ.size();
309  octree.octants.resize(NumOctants);
310  for (uint32_t i=0; i<NumOctants; i++){
311  lev = uint8_t(levels[i]);
312  x0 = uint32_t(XYZ[i][0]);
313  y0 = uint32_t(XYZ[i][1]);
314  z0 = uint32_t(XYZ[i][2]);
315  Class_Octant<3> oct(lev, x0, y0, z0,false);
316  oct.setBalance(false);
317  if (x0 == 0){
318  iface = 0;
319  oct.setBound(iface);
320  }
321  else if (x0 == global3D.max_length - oct.getSize()){
322  iface = 1;
323  oct.setBound(iface);
324  }
325  if (y0 == 0){
326  iface = 2;
327  oct.setBound(iface);
328  }
329  else if (y0 == global3D.max_length - oct.getSize()){
330  iface = 3;
331  oct.setBound(iface);
332  }
333  if (z0 == 0){
334  iface = 4;
335  oct.setBound(iface);
336  }
337  else if (z0 == global3D.max_length - oct.getSize()){
338  iface = 5;
339  oct.setBound(iface);
340  }
341  octree.octants[i] = oct;
342 
343  }
344 
345 #if NOMPI==0
346  error_flag = MPI_Comm_size(comm,&nproc);
347  error_flag = MPI_Comm_rank(comm,&rank);
348  serial = true;
349  if (nproc > 1 ) serial = false;
350 #else
351  serial = true;
352  rank = 0;
353  nproc = 1;
354 #endif
355  partition_first_desc = new uint64_t[nproc];
356  partition_last_desc = new uint64_t[nproc];
357  partition_range_globalidx = new uint64_t[nproc];
358 
359  setFirstDesc();
360  setLastDesc();
361  octree.updateLocalMaxDepth();
362  updateAdapt();
363 #if NOMPI==0
364  setPboundGhosts();
365 #endif
366  // Write info log
367  log.writeLog("---------------------------------------------");
368  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
369  log.writeLog("---------------------------------------------");
370  log.writeLog(" ");
371  log.writeLog("---------------------------------------------");
372  log.writeLog("- PABLO restart -");
373  log.writeLog("---------------------------------------------");
374  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
375  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(3)));
376  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
377  log.writeLog(" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
378  log.writeLog(" " + to_string(static_cast<unsigned long long>(Y)));
379  log.writeLog(" " + to_string(static_cast<unsigned long long>(Z)));
380  log.writeLog(" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
381  log.writeLog(" Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
382  log.writeLog("---------------------------------------------");
383  log.writeLog(" ");
384 #if NOMPI==0
385  MPI_Barrier(comm);
386 #endif
387  };
388 
389  //=================================================================================//
390 
391  ~Class_Para_Tree(){
392  log.writeLog("---------------------------------------------");
393  log.writeLog("--------------- R.I.P. PABLO ----------------");
394  log.writeLog("---------------------------------------------");
395  log.writeLog("---------------------------------------------");
396  };
397 
398  // =============================================================================== //
399  // GET/SET METHODS ----------------------------------------------------------------------- //
400 
401 public:
402  // Octant get/set Methods
407  return global3D;
408  }
413  double getX(Class_Octant<3>* const oct){
414  return trans.mapX(oct->getX());
415  };
416 
417  // Octant get/set Methods
422  double getY(Class_Octant<3>* const oct){
423  return trans.mapY(oct->getY());
424  };
425 
426  // Octant get/set Methods
431  double getZ(Class_Octant<3>* const oct){
432  return trans.mapZ(oct->getZ());
433  };
434 
439  double getSize(Class_Octant<3>* const oct){ // Get the size of octant if mapped in hypercube
440  return trans.mapSize(oct->getSize());
441  };
442 
447  double getArea(Class_Octant<3>* const oct){ // Get the face area of octant
448  return trans.mapArea(oct->getArea());
449  };
450 
455  double getVolume(Class_Octant<3>* const oct){ // Get the volume of octant
456  return trans.mapVolume(oct->getVolume());
457  };
458 
463  void getCenter(Class_Octant<3>* oct, // Get a vector of DIM with the coordinates of the center of octant
464  dvector & center){
465  dvector center_ = oct->getCenter();
466  trans.mapCenter(center_, center);
467  };
468 
473  vector<double> getCenter(Class_Octant<3>* oct) {
474  vector<double> center;
475  dvector center_ = oct->getCenter();
476  trans.mapCenter(center_, center);
477  return center;
478  }
479 
485  vector<double> getFaceCenter(Class_Octant<3>* oct, uint8_t iface) {
486  vector<double> center;
487  vector<double> center_ = oct->getFaceCenter(iface);
488  trans.mapCenter(center_, center);
489  return center;
490  }
491 
497  void getFaceCenter(Class_Octant<3>* oct, uint8_t iface, vector<double>& center) {
498  vector<double> center_ = oct->getFaceCenter(iface);
499  trans.mapCenter(center_, center);
500  }
501 
507  vector<double> getEdgeCenter(Class_Octant<3>* oct, uint8_t iedge) {
508  vector<double> center;
509  vector<double> center_ = oct->getEdgeCenter(iedge);
510  trans.mapCenter(center_, center);
511  return center;
512  }
513 
519  void getEdgeCenter(Class_Octant<3>* oct, uint8_t iedge, vector<double>& center) {
520  vector<double> center_ = oct->getEdgeCenter(iedge);
521  trans.mapCenter(center_, center);
522  }
523 
529  vector<double> getNode(uint32_t idx, uint8_t inode) {
530  vector<double> node;
531  u32vector node_ = octree.octants[idx].getNode(inode);
532  trans.mapNode(node_, node);
533  return node;
534  }
535 
541  void getNode(uint32_t idx, uint8_t inode, vector<double>& node) {
542  u32vector node_ = octree.octants[idx].getNode(inode);
543  trans.mapNode(node_, node);
544  }
545 
550  void getNodes(Class_Octant<3>* oct, // Get a vector of vector (size [nnodes][DIM]) with the nodes of octant
551  dvector2D & nodes){
552  u32vector2D nodes_;
553  oct->getNodes(nodes_);
554  trans.mapNodes(nodes_, nodes);
555  };
556 
557 
562  dvector2D getNodes(Class_Octant<3>* oct){
563  dvector2D nodes;
564  u32vector2D nodes_;
565  oct->getNodes(nodes_);
566  return nodes;
567  };
568 
574  void getNormal(Class_Octant<3>* oct, // Get a vector of vector (size [DIM]) with the normal of the iface
575  uint8_t & iface,
576  dvector & normal){
577  vector<int8_t> normal_;
578  oct->getNormal(iface, normal_);
579  trans.mapNormals(normal_, normal);
580 
581  };
582 
589  uint8_t & iface){
590  dvector normal;
591  vector<int8_t> normal_;
592  oct->getNormal(iface, normal_);
593  trans.mapNormals(normal_, normal);
594  return normal;
595  };
596 
601  int8_t getMarker(Class_Octant<3>* oct){ // Get refinement/coarsening marker for idx-th octant
602  return oct->getMarker();
603  };
604 
609  uint8_t getLevel(Class_Octant<3>* oct){ // Get refinement/coarsening marker for idx-th octant
610  return oct->getLevel();
611  };
612 
618  bool getBound(Class_Octant<3>* oct, uint8_t iface){ // Get refinement/coarsening marker for idx-th octant
619  return oct->getBound(iface);
620  };
621 
627  bool getPbound(Class_Octant<3>* oct, uint8_t iface){ // Get refinement/coarsening marker for idx-th octant
628  return oct->getPbound(iface);
629  };
630 
636  int temp = 0;
637  for(int i = 0; i < global3D.nfaces; ++i)
638  temp += oct->getBound(i);
639  return temp != 0;
640  };
641 
646  bool getPbound(Class_Octant<3>* oct){ // Get refinement/coarsening marker for idx-th octant
647  int temp = 0;
648  for(int i = 0; i < global3D.nfaces; ++i)
649  temp += oct->getPbound(i);
650  return temp != 0;
651  };
652 
657  bool getBalance(Class_Octant<3>* oct){ // Get if balancing-blocked idx-th octant
658  return !oct->getNotBalance();
659  };
660 
661 #if NOMPI==0
662 
667  if (serial)
668  return false;
669  return (findOwner(oct->computeMorton()) != rank);
670  };
671 #endif
672 
678  return oct->getIsNewR();
679  };
680 
686  return oct->getIsNewC();
687  };
688 
694 #if NOMPI==0
695  if (getIsGhost(oct)){
696  uint32_t idx = octree.findGhostMorton(oct->computeMorton());
697  return octree.globalidx_ghosts[idx];
698  }
699 #endif
700  uint32_t idx = octree.findMorton(oct->computeMorton());
701  if (rank){
702  return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
703  }
704  return uint64_t(idx);
705  };
706 
711  uint32_t getIdx(Class_Octant<3>* oct){
712 #if NOMPI==0
713  if (getIsGhost(oct)){
714  return octree.findGhostMorton(oct->computeMorton());
715  }
716 #endif
717  return octree.findMorton(oct->computeMorton());
718  };
719 
724  void setMarker(Class_Octant<3>* oct, int8_t marker){ // Set refinement/coarsening marker for idx-th octant
725  oct->setMarker(marker);
726  };
727 
732  void setBalance(Class_Octant<3>* oct, bool balance){ // Set if balancing-blocked idx-th octant
733  oct->setBalance(!balance);
734  };
735 
736 private:
737  // ------------------------------------------------------------------------------- //
738  //No pointer Octants get/set Methods
739 
744  double getX(Class_Octant<3> oct) {
745  return trans.mapX(oct.getX());
746  }
747 
752  double getY(Class_Octant<3> oct) {
753  return trans.mapY(oct.getY());
754  }
755 
760  double getZ(Class_Octant<3> oct) {
761  return trans.mapZ(oct.getZ());
762  }
763 
768  double getSize(Class_Octant<3> oct) {
769  return trans.mapSize(oct.getSize());
770  }
771 
776  double getArea(Class_Octant<3> oct) {
777  return trans.mapArea(oct.getArea());
778  }
779 
784  double getVolume(Class_Octant<3> oct) {
785  return trans.mapVolume(oct.getVolume());
786  }
787 
792  void getCenter(Class_Octant<3> oct,
793  vector<double>& center) {
794  dvector center_ = oct.getCenter();
795  trans.mapCenter(center_, center);
796  }
797 
802  vector<double> getCenter(Class_Octant<3> oct) {
803  vector<double> center;
804  dvector center_ = oct.getCenter();
805  trans.mapCenter(center_, center);
806  return center;
807  }
808 
813  void getNodes(Class_Octant<3> oct,
814  dvector2D & nodes) {
815  u32vector2D nodes_;
816  oct.getNodes(nodes_);
817  trans.mapNodes(nodes_, nodes);
818  }
819 
824  dvector2D getNodes(Class_Octant<3> oct){
825  dvector2D nodes;
826  u32vector2D nodes_;
827  oct.getNodes(nodes_);
828  trans.mapNodes(nodes_, nodes);
829  return nodes;
830  }
831 
837  void getNormal(Class_Octant<3> oct,
838  uint8_t & iface,
839  dvector & normal) {
840  vector<int8_t> normal_;
841  oct.getNormal(iface, normal_);
842  trans.mapNormals(normal_, normal);
843  }
844 
850  dvector getNormal(Class_Octant<3> oct,
851  uint8_t & iface){
852  dvector normal;
853  vector<int8_t> normal_;
854  oct.getNormal(iface, normal_);
855  trans.mapNormals(normal_, normal);
856  return normal;
857  }
858 
863  int8_t getMarker(Class_Octant<3> oct){ // Get refinement/coarsening marker for idx-th octant
864  return oct.getMarker();
865  };
866 
871  uint8_t getLevel(Class_Octant<3> oct){ // Get refinement/coarsening marker for idx-th octant
872  return oct.getLevel();
873  };
874 
879  bool getBalance(Class_Octant<3> oct){ // Get if balancing-blocked idx-th octant
880  return !oct.getNotBalance();
881  };
882 
883 #if NOMPI==0
884 
888  bool getIsGhost(Class_Octant<3> oct){
889  return (findOwner(oct.computeMorton()) != rank);
890  };
891 #endif
892 
897  uint64_t getGlobalIdx(Class_Octant<3> oct){
898 #if NOMPI==0
899  if (getIsGhost(oct)){
900  uint32_t idx = octree.findGhostMorton(oct.computeMorton());
901  return octree.globalidx_ghosts[idx];
902  }
903  else{
904 #endif
905  uint32_t idx = octree.findMorton(oct.computeMorton());
906  if (rank){
907  return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
908  }
909  else{
910  return uint64_t(idx);
911  };
912 #if NOMPI==0
913  };
914 #endif
915  return global_num_octants;
916  };
917 
922  uint32_t getIdx(Class_Octant<3> oct){
923 #if NOMPI==0
924  if (getIsGhost(oct)){
925  return octree.findGhostMorton(oct.computeMorton());
926  }
927  else{
928 #endif
929  return octree.findMorton(oct.computeMorton());
930 #if NOMPI==0
931  };
932 #endif
933  return octree.getNumOctants();
934  };
935 
936 
941  void setMarker(Class_Octant<3> oct, int8_t marker){ // Set refinement/coarsening marker for idx-th octant
942  oct.setMarker(marker);
943  };
944 
949  void setBalance(Class_Octant<3> oct, bool balance){ // Set if balancing-blocked idx-th octant
950  oct.setBalance(!balance);
951  };
952 
953  // ------------------------------------------------------------------------------- //
954  // Index get/set Methods
955 
956 public:
961  double getX(uint32_t idx) {
962  return trans.mapX(octree.octants[idx].getX());
963  }
964 
969  double getY(uint32_t idx) {
970  return trans.mapY(octree.octants[idx].getY());
971  }
972 
977  double getZ(uint32_t idx) {
978  return trans.mapZ(octree.octants[idx].getZ());
979  }
980 
985  double getSize(uint32_t idx) {
986  return trans.mapSize(octree.octants[idx].getSize());
987  }
988 
993  double getArea(uint32_t idx) {
994  return trans.mapArea(octree.octants[idx].getArea());
995  }
996 
1001  double getVolume(uint32_t idx) {
1002  return trans.mapVolume(octree.octants[idx].getVolume());
1003  }
1004 
1009  void getCenter(uint32_t idx,
1010  vector<double>& center) {
1011  dvector center_ = octree.octants[idx].getCenter();
1012  trans.mapCenter(center_, center);
1013  }
1014 
1019  vector<double> getCenter(uint32_t idx) {
1020  vector<double> center;
1021  dvector center_ = octree.octants[idx].getCenter();
1022  trans.mapCenter(center_, center);
1023  return center;
1024  }
1025 
1031  vector<double> getFaceCenter(uint32_t idx, uint8_t iface) {
1032  vector<double> center;
1033  vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
1034  trans.mapCenter(center_, center);
1035  return center;
1036  }
1037 
1043  void getFaceCenter(uint32_t idx, uint8_t iface, vector<double>& center) {
1044  vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
1045  trans.mapCenter(center_, center);
1046  }
1047 
1053  vector<double> getEdgeCenter(uint32_t idx, uint8_t iedge) {
1054  vector<double> center;
1055  vector<double> center_ = octree.octants[idx].getEdgeCenter(iedge);
1056  trans.mapCenter(center_, center);
1057  return center;
1058  }
1059 
1065  void getEdgeCenter(uint32_t idx, uint8_t iedge, vector<double>& center) {
1066  vector<double> center_ = octree.octants[idx].getEdgeCenter(iedge);
1067  trans.mapCenter(center_, center);
1068  }
1069 
1074  void getNodes(uint32_t idx,
1075  dvector2D & nodes) {
1076  u32vector2D nodes_;
1077  octree.octants[idx].getNodes(nodes_);
1078  trans.mapNodes(nodes_, nodes);
1079  }
1080 
1085  dvector2D getNodes(uint32_t idx){
1086  dvector2D nodes;
1087  u32vector2D nodes_;
1088  octree.octants[idx].getNodes(nodes_);
1089  trans.mapNodes(nodes_, nodes);
1090  return nodes;
1091  }
1092 
1098  void getNormal(uint32_t idx,
1099  uint8_t & iface,
1100  dvector & normal) {
1101  vector<int8_t> normal_;
1102  octree.octants[idx].getNormal(iface, normal_);
1103  trans.mapNormals(normal_, normal);
1104  }
1105 
1111  dvector getNormal(uint32_t idx,
1112  uint8_t & iface){
1113  dvector normal;
1114  vector<int8_t> normal_;
1115  octree.octants[idx].getNormal(iface, normal_);
1116  trans.mapNormals(normal_, normal);
1117  return normal;
1118  }
1119 
1124  int8_t getMarker(uint32_t idx){ // Get refinement/coarsening marker for idx-th octant
1125  return octree.getMarker(idx);
1126  };
1127 
1132  uint8_t getLevel(uint32_t idx){ // Get refinement/coarsening marker for idx-th octant
1133  return octree.getLevel(idx);
1134  };
1135 
1140  bool getBalance(uint32_t idx){ // Get if balancing-blocked idx-th octant
1141  return !octree.getBalance(idx);
1142  };
1143 
1144 #if NOMPI==0
1145 
1149  bool getIsGhost(uint32_t idx){
1150  return (findOwner(octree.octants[idx].computeMorton()) != rank);
1151  };
1152 #endif
1153 
1158  bool getIsNewR(uint32_t idx){
1159  return octree.octants[idx].getIsNewR();
1160  };
1161 
1166  bool getIsNewC(uint32_t idx){
1167  return octree.octants[idx].getIsNewC();
1168  };
1169 
1174  uint64_t getGlobalIdx(uint32_t idx){
1175  if (rank){
1176  return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
1177  }
1178  else{
1179  return uint64_t(idx);
1180  };
1181  return global_num_octants;
1182  };
1183 
1188  uint64_t getGhostGlobalIdx(uint32_t idx){
1189  if (idx<octree.size_ghosts){
1190  return octree.globalidx_ghosts[idx];
1191  };
1192  return uint64_t(octree.size_ghosts);
1193  };
1194 
1199  void setMarker(uint32_t idx, int8_t marker){ // Set refinement/coarsening marker for idx-th octant
1200  octree.setMarker(idx, marker);
1201  };
1202 
1207  void setBalance(uint32_t idx, bool balance){ // Set if balancing-blocked idx-th octant
1208  octree.setBalance(idx, !balance);
1209  };
1210 
1211  // ------------------------------------------------------------------------------- //
1212  // Local Tree get/set Methods
1213 
1217  uint64_t getStatus(){
1218  return status;
1219  }
1220 
1224  uint32_t getNumOctants() const{
1225  return octree.getNumOctants();
1226  };
1227 
1231  uint32_t getNumGhosts() const{
1232  return octree.getSizeGhost();
1233  };
1234 
1238  uint8_t getLocalMaxDepth() const{ // Get max depth reached in local tree
1239  return octree.getLocalMaxDepth();
1240  };
1241 
1245  uint8_t getBalanceCodimension() const{
1246  return octree.getBalanceCodim();
1247  };
1248 
1254  void setBalanceCodimension(uint8_t b21codim){
1255  octree.setBalanceCodim(b21codim);
1256  };
1257 
1258  // ------------------------------------------------------------------------------- //
1259 
1260  const Class_Octant<3> & getFirstDesc() const{
1261  return octree.getFirstDesc();
1262  };
1263 
1264  const Class_Octant<3> & getLastDesc() const{
1265  return octree.getLastDesc();
1266  };
1267 
1268  uint64_t getLastDescMorton(uint32_t idx) {
1269  return octree.octants[idx].buildLastDesc().computeMorton();
1270  };
1271 
1272 private:
1273 
1274  void setFirstDesc(){
1275  octree.setFirstDesc();
1276  };
1277 
1278  void setLastDesc(){
1279  octree.setLastDesc();
1280  };
1281 
1282 public:
1283 
1284  Class_Octant<3>& extractOctant(uint32_t idx) {
1285  return octree.extractOctant(idx) ;
1286  };
1287 
1288  // --------------------------------
1289 
1294  Class_Octant<3>* getOctant(uint32_t idx) {
1295  if (idx < octree.getNumOctants()){
1296  return &octree.octants[idx] ;
1297  }
1298  return NULL;
1299  };
1300 
1306  if (idx < octree.getSizeGhost()){
1307  return &octree.ghosts[idx] ;
1308  }
1309  return NULL;
1310  };
1311 
1312  //=================================================================================//
1313 
1321  int findOwner(const uint64_t & morton){ // given the morton of an octant it finds the process owning that octant
1322  int p = -1;
1323  int length = nproc;
1324  int beg = 0;
1325  int end = nproc -1;
1326  int seed = nproc/2;
1327  while(beg != end){
1328  if(morton <= partition_last_desc[seed]){
1329  end = seed;
1330  if(morton > partition_last_desc[seed-1])
1331  beg = seed;
1332  }
1333  else{
1334  beg = seed;
1335  if(morton <= partition_last_desc[seed+1])
1336  beg = seed + 1;
1337  }
1338  length = end - beg;
1339  seed = beg + length/2;
1340  }
1341  p = beg;
1342  return p;
1343  };
1344 
1354  void findNeighbours(uint32_t idx,
1355  uint8_t iface,
1356  uint8_t codim,
1357  u32vector & neighbours,
1358  vector<bool> & isghost){
1359 
1360  if (codim == 1){
1361  octree.findNeighbours(idx, iface, neighbours, isghost);
1362  }
1363  else if (codim == 2){
1364  octree.findEdgeNeighbours(idx, iface, neighbours, isghost);
1365  }
1366  else if (codim == 3){
1367  octree.findNodeNeighbours(idx, iface, neighbours, isghost);
1368  }
1369  else {
1370  neighbours.clear();
1371  isghost.clear();
1372  }
1373  };
1374 
1385  uint8_t iface,
1386  uint8_t codim,
1387  u32vector & neighbours,
1388  vector<bool> & isghost){
1389 
1390  if (codim == 1){
1391  octree.findNeighbours(oct, iface, neighbours, isghost);
1392  }
1393  else if (codim == 2){
1394  octree.findEdgeNeighbours(oct, iface, neighbours, isghost);
1395  }
1396  else if (codim == 3){
1397  octree.findNodeNeighbours(oct, iface, neighbours, isghost);
1398  }
1399  else {
1400  neighbours.clear();
1401  isghost.clear();
1402  }
1403  };
1404 
1413  void findGhostNeighbours(uint32_t idx,
1414  uint8_t iface,
1415  uint8_t codim,
1416  u32vector & neighbours){
1417 
1418  if (codim == 1){
1419  octree.findGhostNeighbours(idx, iface, neighbours);
1420  }
1421  else if (codim == 2){
1422  octree.findGhostEdgeNeighbours(idx, iface, neighbours);
1423  }
1424  else if (codim == 3){
1425  octree.findGhostNodeNeighbours(idx, iface, neighbours);
1426  }
1427  else {
1428  neighbours.clear();
1429  }
1430  };
1431 
1432 private:
1442  void findNeighbours(Class_Octant<3> oct,
1443  uint8_t iface,
1444  uint8_t codim,
1445  u32vector & neighbours,
1446  vector<bool> & isghost){
1447 
1448  if (codim == 1){
1449  octree.findNeighbours(&oct, iface, neighbours, isghost);
1450  }
1451  else if (codim == 2){
1452  octree.findEdgeNeighbours(&oct, iface, neighbours, isghost);
1453  }
1454  else if (codim == 3){
1455  octree.findNodeNeighbours(&oct, iface, neighbours, isghost);
1456  }
1457  else {
1458  neighbours.clear();
1459  isghost.clear();
1460  }
1461  };
1462 
1463 public:
1464  //-------------------------------------------------------------------------------- //
1465  // Intersections get Methods
1466 
1470  uint32_t getNumIntersections() {
1471  return octree.intersections.size();
1472  }
1473 
1479  if (idx < octree.intersections.size()){
1480  return &octree.intersections[idx];
1481  }
1482  return NULL;
1483  }
1484 
1490  if(inter->finer && inter->isghost)
1491  return octree.extractGhostOctant(inter->owners[inter->finer]).getLevel();
1492  else
1493  return octree.extractOctant(inter->owners[inter->finer]).getLevel();
1494  }
1495 
1501  return inter->finer;
1502  }
1503 
1509  return inter->getBound();
1510  }
1511 
1517  return inter->getIsGhost();
1518  }
1519 
1520 
1526  return inter->getPbound();
1527  }
1532  uint8_t getFace(Class_Intersection<3>* inter) {
1533  return inter->iface;
1534  }
1535 
1540  u32vector getOwners(Class_Intersection<3>* inter) {
1541  u32vector owners(2);
1542  owners[0] = inter->owners[0];
1543  owners[1] = inter->owners[1];
1544  return owners;
1545  }
1546 
1552  uint32_t Size;
1553  if(inter->finer && inter->isghost)
1554  Size = octree.extractGhostOctant(inter->owners[inter->finer]).getSize();
1555  else
1556  Size = octree.extractOctant(inter->owners[inter->finer]).getSize();
1557  return trans.mapSize(Size);
1558  }
1559 
1565  uint64_t Area;
1566  if(inter->finer && inter->isghost)
1567  Area = octree.extractGhostOctant(inter->owners[1]).getArea();
1568  else
1569  Area = octree.extractOctant(inter->owners[inter->finer]).getArea();
1570  return trans.mapArea(Area);
1571  }
1572 
1577  vector<double> getCenter(Class_Intersection<3>* inter){
1578  vector<double> center;
1579  Class_Octant<3> oct;
1580  if(inter->finer && inter->isghost)
1581  oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1582  else
1583  oct = octree.extractOctant(inter->owners[inter->finer]);
1584  dvector center_ = oct.getCenter();
1585  int sign = ( int(2*((inter->iface)%2)) - 1);
1586  double deplace = double (sign * int(oct.getSize())) / 2;
1587  center_[inter->iface/2] = uint32_t(int(center_[inter->iface/2]) + deplace);
1588  trans.mapCenter(center_, center);
1589  return center;
1590  }
1591 
1596  dvector2D getNodes(Class_Intersection<3>* inter){
1597  dvector2D nodes;
1598  Class_Octant<3> oct;
1599  if(inter->finer && inter->isghost)
1600  oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1601  else
1602  oct = octree.extractOctant(inter->owners[inter->finer]);
1603  uint8_t iface = inter->iface;
1604  u32vector2D nodes_all;
1605  oct.getNodes(nodes_all);
1606  u32vector2D nodes_(global3D.nnodesperface, u32vector(3));
1607  for (int i=0; i<global3D.nnodesperface; i++){
1608  for (int j=0; j<3; j++){
1609  nodes_[i][j] = nodes_all[global3D.facenode[iface][i]][j];
1610  }
1611  }
1612  trans.mapNodesIntersection(nodes_, nodes);
1613  return nodes;
1614  }
1615 
1621  dvector normal;
1622  Class_Octant<3> oct;
1623  if(inter->finer && inter->isghost)
1624  oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1625  else
1626  oct = octree.extractOctant(inter->owners[inter->finer]);
1627  uint8_t iface = inter->iface;
1628  vector<int8_t> normal_;
1629  oct.getNormal(iface, normal_);
1630  trans.mapNormals(normal_, normal);
1631  return normal;
1632  }
1633 
1634  //-------------------------------------------------------------------------------- //
1635  // No Pointer Intersections get Methods
1636 
1637 private:
1638  double getSize(Class_Intersection<3> inter) {
1639  uint32_t Size;
1640  if(inter.finer && inter.isghost)
1641  Size = octree.extractGhostOctant(inter.owners[inter.finer]).getSize();
1642  else
1643  Size = octree.extractOctant(inter.owners[inter.finer]).getSize();
1644  return trans.mapSize(Size);
1645  }
1646 
1647  double getArea(Class_Intersection<3> inter) {
1648  uint64_t Area;
1649  if(inter.finer && inter.isghost)
1650  Area = octree.extractGhostOctant(inter.owners[inter.finer]).getArea();
1651  else
1652  Area = octree.extractOctant(inter.owners[inter.finer]).getArea();
1653  return trans.mapArea(Area);
1654  }
1655 
1656  void getCenter(Class_Intersection<3> inter,
1657  vector<double> center){
1658  Class_Octant<3> oct;
1659  if(inter.finer && inter.isghost)
1660  oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1661  else
1662  oct = octree.extractOctant(inter.owners[inter.finer]);
1663  dvector center_ = oct.getCenter();
1664  int sign = ( int(2*((inter.iface)%2)) - 1);
1665  double deplace = double (sign * int(oct.getSize())) / 2;
1666  center_[inter.iface/2] = uint32_t(int(center_[inter.iface/2]) + deplace);
1667  trans.mapCenter(center_, center);
1668  }
1669 
1670  void getNodes(Class_Intersection<3> inter,
1671  dvector2D & nodes) {
1672  Class_Octant<3> oct;
1673  if(inter.finer && inter.isghost)
1674  oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1675  else
1676  oct = octree.extractOctant(inter.owners[inter.finer]);
1677  uint8_t iface = inter.iface;
1678  u32vector2D nodes_all;
1679  oct.getNodes(nodes_all);
1680  u32vector2D nodes_(global3D.nnodesperface, u32vector(3));
1681  for (int i=0; i<global3D.nnodesperface; i++){
1682  for (int j=0; j<3; j++){
1683  nodes_[i][j] = nodes_all[global3D.facenode[iface][i]][j];
1684  }
1685  }
1686  trans.mapNodesIntersection(nodes_, nodes);
1687  }
1688 
1689  void getNormal(Class_Intersection<3> inter,
1690  dvector & normal) {
1691  Class_Octant<3> oct;
1692  if(inter.finer && inter.isghost)
1693  oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1694  else
1695  oct = octree.extractOctant(inter.owners[inter.finer]);
1696  uint8_t iface = inter.iface;
1697  vector<int8_t> normal_;
1698  oct.getNormal(iface, normal_);
1699  trans.mapNormals(normal_, normal);
1700  }
1701 
1702  // =============================================================================== //
1703 
1704 public:
1708  octree.computeIntersections();
1709  }
1710 
1711  // =============================================================================== //
1712 
1717  Class_Octant<3>* getPointOwner(dvector & point){
1718  uint32_t noctants = octree.octants.size();
1719  uint32_t idxtry = noctants/2;
1720  uint32_t x, y, z;
1721  uint64_t morton, mortontry;
1722  int powner = 0;
1723 
1724  x = trans.mapX(point[0]);
1725  y = trans.mapX(point[1]);
1726  z = trans.mapX(point[2]);
1727  if ((x > global3D.max_length) || (y > global3D.max_length) || (z > global3D.max_length))
1728  return NULL;
1729 
1730  if (x == global3D.max_length) x = x - 1;
1731  if (y == global3D.max_length) y = y - 1;
1732  if (z == global3D.max_length) z = z - 1;
1733  morton = mortonEncode_magicbits(x,y,z);
1734 
1735 #if NOMPI==0
1736  if (!serial) powner = findOwner(morton);
1737 #else
1738  powner = 0;
1739 #endif
1740  if ((powner!=rank) && (!serial))
1741  return NULL;
1742 
1743 
1744  int32_t jump = idxtry;
1745  while(abs(jump) > 0){
1746  mortontry = octree.octants[idxtry].computeMorton();
1747  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1748  idxtry += jump;
1749  if (idxtry > noctants-1){
1750  if (jump > 0){
1751  idxtry = noctants - 1;
1752  jump = 0;
1753  }
1754  else if (jump < 0){
1755  idxtry = 0;
1756  jump = 0;
1757  }
1758  }
1759  }
1760  if(octree.octants[idxtry].computeMorton() == morton){
1761  return &octree.octants[idxtry];
1762  }
1763  else{
1764  // Step until the mortontry lower than morton (one idx of distance)
1765  {
1766  while(octree.octants[idxtry].computeMorton() < morton){
1767  idxtry++;
1768  if(idxtry > noctants-1){
1769  idxtry = noctants-1;
1770  break;
1771  }
1772  }
1773  while(octree.octants[idxtry].computeMorton() > morton){
1774  idxtry--;
1775  if(idxtry > noctants-1){
1776  idxtry = 0;
1777  break;
1778  }
1779  }
1780  }
1781  return &octree.octants[idxtry];
1782  }
1783 
1784  };
1785 
1786  // =============================================================================== //
1787 
1792  uint32_t getPointOwnerIdx(dvector & point){
1793  uint32_t noctants = octree.octants.size();
1794  uint32_t idxtry = noctants/2;
1795  uint32_t x, y, z;
1796  uint64_t morton, mortontry;
1797  int powner = 0;
1798 
1799  x = trans.mapX(point[0]);
1800  y = trans.mapY(point[1]);
1801  z = trans.mapZ(point[2]);
1802 
1803  if ((x > global3D.max_length) || (y > global3D.max_length) || (z > global3D.max_length)
1804  || (point[0] < trans.X0) || (point[1] < trans.Y0) || (point[2] < trans.Z0)){
1805  return -1;
1806  }
1807 
1808  if (x == global3D.max_length) x = x - 1;
1809  if (y == global3D.max_length) y = y - 1;
1810  if (z == global3D.max_length) z = z - 1;
1811  morton = mortonEncode_magicbits(x,y,z);
1812 
1813 
1814 #if NOMPI==0
1815  if(!serial) powner = findOwner(morton);
1816 #else
1817  powner = 0;
1818 #endif
1819  if ((powner!=rank) && (!serial))
1820  return -1;
1821 
1822 
1823  int32_t jump = idxtry;
1824  while(abs(jump) > 0){
1825 
1826  mortontry = octree.octants[idxtry].computeMorton();
1827  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1828  idxtry += jump;
1829  if (idxtry > noctants-1){
1830  if (jump > 0){
1831  idxtry = noctants - 1;
1832  jump = 0;
1833  }
1834  else if (jump < 0){
1835  idxtry = 0;
1836  jump = 0;
1837  }
1838  }
1839  }
1840  if(octree.octants[idxtry].computeMorton() == morton){
1841  return idxtry;
1842  }
1843  else{
1844  // Step until the mortontry lower than morton (one idx of distance)
1845  {
1846  while(octree.octants[idxtry].computeMorton() < morton){
1847  idxtry++;
1848  if(idxtry > noctants-1){
1849  idxtry = noctants-1;
1850  break;
1851  }
1852  }
1853  while(octree.octants[idxtry].computeMorton() > morton){
1854  idxtry--;
1855  if(idxtry > noctants-1){
1856  idxtry = 0;
1857  break;
1858  }
1859  }
1860  }
1861  return idxtry;
1862  }
1863  };
1864 
1865  // =============================================================================== //
1866 
1871  Class_Octant<3>* getPointOwner(u32vector & point){
1872  uint32_t noctants = octree.octants.size();
1873  uint32_t idxtry = noctants/2;
1874  uint32_t x, y, z;
1875  uint64_t morton, mortontry;
1876  int powner = 0;
1877 
1878  x = point[0];
1879  y = point[1];
1880  z = point[2];
1881  if ((x > global3D.max_length) || (y > global3D.max_length) || (z > global3D.max_length)
1882  || (point[0] < trans.X0) || (point[1] < trans.Y0) || (point[2] < trans.Z0))
1883  return NULL;
1884 
1885  if (x == global3D.max_length) x = x - 1;
1886  if (y == global3D.max_length) y = y - 1;
1887  if (z == global3D.max_length) z = z - 1;
1888  morton = mortonEncode_magicbits(x,y,z);
1889 
1890 #if NOMPI==0
1891  if(!serial) powner = findOwner(morton);
1892 #else
1893  powner = 0;
1894 #endif
1895  if ((powner!=rank) && (!serial))
1896  return NULL;
1897 
1898 
1899  int32_t jump = idxtry;
1900  while(abs(jump) > 0){
1901  mortontry = octree.octants[idxtry].computeMorton();
1902  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1903  idxtry += jump;
1904  if (idxtry > noctants-1){
1905  if (jump > 0){
1906  idxtry = noctants - 1;
1907  jump = 0;
1908  }
1909  else if (jump < 0){
1910  idxtry = 0;
1911  jump = 0;
1912  }
1913  }
1914  }
1915  if(octree.octants[idxtry].computeMorton() == morton){
1916  return &octree.octants[idxtry];
1917  }
1918  else{
1919  // Step until the mortontry lower than morton (one idx of distance)
1920  {
1921  while(octree.octants[idxtry].computeMorton() < morton){
1922  idxtry++;
1923  if(idxtry > noctants-1){
1924  idxtry = noctants-1;
1925  break;
1926  }
1927  }
1928  while(octree.octants[idxtry].computeMorton() > morton){
1929  idxtry--;
1930  if(idxtry > noctants-1){
1931  idxtry = 0;
1932  break;
1933  }
1934  }
1935  }
1936  return &octree.octants[idxtry];
1937  }
1938 
1939  };
1940 
1941  // =============================================================================== //
1942 
1947  uint32_t getPointOwnerIdx(u32vector & point){
1948  uint32_t noctants = octree.octants.size();
1949  uint32_t idxtry = noctants/2;
1950  uint32_t x, y, z;
1951  uint64_t morton, mortontry;
1952  int powner = 0;
1953 
1954  x = point[0];
1955  y = point[1];
1956  z = point[2];
1957 
1958  if ((x > global3D.max_length) || (y > global3D.max_length) || (z > global3D.max_length))
1959  return -1;
1960 
1961  if (x == global3D.max_length) x = x - 1;
1962  if (y == global3D.max_length) y = y - 1;
1963  if (z == global3D.max_length) z = z - 1;
1964  morton = mortonEncode_magicbits(x,y,z);
1965 
1966 #if NOMPI==0
1967  if (!serial) powner = findOwner(morton);
1968 #else
1969  powner = 0;
1970 #endif
1971  if ((powner!=rank) && (!serial))
1972  return -1;
1973 
1974  int32_t jump = idxtry;
1975  while(abs(jump) > 0){
1976  mortontry = octree.octants[idxtry].computeMorton();
1977  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1978  idxtry += jump;
1979  if (idxtry > noctants-1){
1980  if (jump > 0){
1981  idxtry = noctants - 1;
1982  jump = 0;
1983  }
1984  else if (jump < 0){
1985  idxtry = 0;
1986  jump = 0;
1987  }
1988  }
1989  }
1990  if(octree.octants[idxtry].computeMorton() == morton){
1991  return idxtry;
1992  }
1993  else{
1994  // Step until the mortontry lower than morton (one idx of distance)
1995  {
1996  while(octree.octants[idxtry].computeMorton() < morton){
1997  idxtry++;
1998  if(idxtry > noctants-1){
1999  idxtry = noctants-1;
2000  break;
2001  }
2002  }
2003  while(octree.octants[idxtry].computeMorton() > morton){
2004  idxtry--;
2005  if(idxtry > noctants-1){
2006  idxtry = 0;
2007  break;
2008  }
2009  }
2010  }
2011  return idxtry;
2012  }
2013 
2014  };
2015 
2016  // =============================================================================== //
2017 
2023  uint32_t noctants = octree.octants.size();
2024  uint32_t idxtry = noctants/2;
2025  uint32_t x, y, z;
2026  uint64_t morton, mortontry;
2027  int powner = 0;
2028 
2029  x = uint32_t(point[0]);
2030  y = uint32_t(point[1]);
2031  z = uint32_t(point[2]);
2032  if ((point[0] < 0) || (point[0] > double(global3D.max_length)) || (point[1] < 0) || (point[1] > double(global3D.max_length)) || (point[2] < 0) || (point[2] > double(global3D.max_length)))
2033  return NULL;
2034 
2035  if (x == global3D.max_length) x = x - 1;
2036  if (y == global3D.max_length) y = y - 1;
2037  if (z == global3D.max_length) z = z - 1;
2038  morton = mortonEncode_magicbits(x,y,z);
2039 
2040 #if NOMPI==0
2041  if (!serial) powner = findOwner(morton);
2042 #else
2043  powner = 0;
2044 #endif
2045  if ((powner!=rank) && (!serial))
2046  return NULL;
2047 
2048 
2049  int32_t jump = idxtry;
2050  while(abs(jump) > 0){
2051  mortontry = octree.octants[idxtry].computeMorton();
2052  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2053  idxtry += jump;
2054  if (idxtry > noctants-1){
2055  if (jump > 0){
2056  idxtry = noctants - 1;
2057  jump = 0;
2058  }
2059  else if (jump < 0){
2060  idxtry = 0;
2061  jump = 0;
2062  }
2063  }
2064  }
2065  if(octree.octants[idxtry].computeMorton() == morton){
2066  return &octree.octants[idxtry];
2067  }
2068  else{
2069  // Step until the mortontry lower than morton (one idx of distance)
2070  {
2071  while(octree.octants[idxtry].computeMorton() < morton){
2072  idxtry++;
2073  if(idxtry > noctants-1){
2074  idxtry = noctants-1;
2075  break;
2076  }
2077  }
2078  while(octree.octants[idxtry].computeMorton() > morton){
2079  idxtry--;
2080  if(idxtry > noctants-1){
2081  idxtry = 0;
2082  break;
2083  }
2084  }
2085  }
2086  return &octree.octants[idxtry];
2087  }
2088 
2089  };
2090 
2091  // =============================================================================== //
2092 
2097  uint32_t getLogicalPointOwnerIdx(dvector & point){
2098  uint32_t noctants = octree.octants.size();
2099  uint32_t idxtry = noctants/2;
2100  uint32_t x, y, z;
2101  uint64_t morton, mortontry;
2102  int powner = 0;
2103 
2104  x = uint32_t(point[0]);
2105  y = uint32_t(point[1]);
2106  z = uint32_t(point[2]);
2107  if ((point[0] < 0) || (point[0] > double(global3D.max_length)) || (point[1] < 0) || (point[1] > double(global3D.max_length)) || (point[2] < 0) || (point[2] > double(global3D.max_length)))
2108  return -1;
2109 
2110  if (x == global3D.max_length) x = x - 1;
2111  if (y == global3D.max_length) y = y - 1;
2112  if (z == global3D.max_length) z = z - 1;
2113  morton = mortonEncode_magicbits(x,y,z);
2114 
2115 #if NOMPI==0
2116  if (!serial) powner = findOwner(morton);
2117 #else
2118  powner = 0;
2119 #endif
2120  if ((powner!=rank) && (!serial))
2121  return -1;
2122 
2123 
2124  int32_t jump = idxtry;
2125  while(abs(jump) > 0){
2126  mortontry = octree.octants[idxtry].computeMorton();
2127  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2128  idxtry += jump;
2129  if (idxtry > noctants-1){
2130  if (jump > 0){
2131  idxtry = noctants - 1;
2132  jump = 0;
2133  }
2134  else if (jump < 0){
2135  idxtry = 0;
2136  jump = 0;
2137  }
2138  }
2139  }
2140  if(octree.octants[idxtry].computeMorton() == morton){
2141  return idxtry;
2142  }
2143  else{
2144  // Step until the mortontry lower than morton (one idx of distance)
2145  {
2146  while(octree.octants[idxtry].computeMorton() < morton){
2147  idxtry++;
2148  if(idxtry > noctants-1){
2149  idxtry = noctants-1;
2150  break;
2151  }
2152  }
2153  while(octree.octants[idxtry].computeMorton() > morton){
2154  idxtry--;
2155  if(idxtry > noctants-1){
2156  idxtry = 0;
2157  break;
2158  }
2159  }
2160  }
2161  return idxtry;
2162  }
2163 
2164  };
2165 
2166  // =============================================================================== //
2167  // PARATREE METHODS ----------------------------------------------------------------------- //
2168 
2169 private:
2170 #if NOMPI==0
2171  void computePartition(uint32_t* partition){ // compute octant partition giving the same number of octant to each process and redistributing the reminder
2172  uint32_t division_result = 0;
2173  uint32_t remind = 0;
2174  division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2175  remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2176  for(uint32_t i = 0; i < (uint32_t)nproc; ++i)
2177  if(i<remind)
2178  partition[i] = division_result + 1;
2179  else
2180  partition[i] = division_result;
2181  };
2182 
2183  //=================================================================================//
2184 
2185  void computePartition(uint32_t* partition, dvector* weight){ // compute octant partition giving the same number of octant to each process and redistributing the reminder
2186 
2187 
2188  if(serial){
2189 
2190  double division_result = 0;
2191  double global_weight = 0.0;
2192  for (int i=0; i<weight->size(); i++){
2193  global_weight += (*weight)[i];
2194  }
2195 
2196  division_result = global_weight/(double)nproc;
2197 
2198  //Estimate resulting weight distribution starting from proc 0 (sending tail)
2199 
2200  //Estimate sending weight by each proc in initial conf (sending tail)
2201  uint32_t i = 0, tot = 0;
2202  int iproc = 0;
2203  while (iproc < nproc-1){
2204  double partial_weight = 0.0;
2205  partition[iproc] = 0;
2206  while(partial_weight < division_result){
2207  partial_weight += (*weight)[i];
2208  tot++;
2209  partition[iproc]++;
2210  i++;
2211  }
2212  iproc++;
2213  }
2214  partition[nproc-1] = weight->size() - tot;
2215 
2216  }
2217  else{
2218 
2219  double division_result = 0;
2220  double remind = 0;
2221  dvector local_weight(nproc,0.0);
2222  dvector temp_local_weight(nproc,0.0);
2223  dvector2D sending_weight(nproc, dvector(nproc,0.0));
2224  double* rbuff = new double[nproc];
2225  double global_weight = 0.0;
2226  for (int i=0; i<weight->size(); i++){
2227  local_weight[rank] += (*weight)[i];
2228  }
2229  error_flag = MPI_Allgather(&local_weight[rank],1,MPI_DOUBLE,rbuff,1,MPI_DOUBLE,comm);
2230  for (int i=0; i<nproc; i++){
2231  local_weight[i] = rbuff[i];
2232  global_weight += rbuff[i];
2233  }
2234  delete [] rbuff; rbuff = NULL;
2235  division_result = global_weight/(double)nproc;
2236 
2237  //Estimate resulting weight distribution starting from proc 0 (sending tail)
2238 
2239  temp_local_weight = local_weight;
2240  //Estimate sending weight by each proc in initial conf (sending tail)
2241 
2242  for (int iter = 0; iter < 1; iter++){
2243 
2244  vector<double> delta(nproc);
2245  for (int i=0; i<nproc; i++){
2246  delta[i] = temp_local_weight[i] - division_result;
2247  }
2248 
2249  for (int i=0; i<nproc-1; i++){
2250 
2251  double post_weight = 0.0;
2252  for (int j=i+1; j<nproc; j++){
2253  post_weight += temp_local_weight[j];
2254  }
2255 
2256  if (temp_local_weight[i] > division_result){
2257 
2258  delta[i] = temp_local_weight[i] - division_result;
2259  if (post_weight < division_result*(nproc-i-1)){
2260 
2261  double post_delta = division_result*(nproc-i-1) - post_weight;
2262  double delta_sending = min(local_weight[i], min(delta[i], post_delta));
2263  int jproc = i+1;
2264  double sending = 0;
2265  while (delta_sending > 0 && jproc<nproc){
2266  sending = min(division_result, delta_sending);
2267  sending = min(sending, (division_result-temp_local_weight[jproc]));
2268  sending = max(sending, 0.0);
2269  sending_weight[i][jproc] += sending;
2270  temp_local_weight[jproc] += sending;
2271  temp_local_weight[i] -= sending;
2272  delta_sending -= sending;
2273  delta[i] -= delta_sending;
2274  jproc++;
2275  }
2276  } //post
2277  }//weight>
2278  }//iproc
2279 
2280 
2281  for (int i = nproc-1; i>0; i--){
2282 
2283  double pre_weight = 0.0;
2284  for (int j=i-1; j>=0; j--){
2285  pre_weight += temp_local_weight[j];
2286  }
2287  if (temp_local_weight[i] > division_result){
2288 
2289  delta[i] = temp_local_weight[i] - division_result;
2290  if (pre_weight < division_result*(i)){
2291 
2292  double pre_delta = division_result*(i) - pre_weight;
2293  double delta_sending = min(local_weight[i], min(temp_local_weight[i], min(delta[i], pre_delta)));
2294  int jproc = i-1;
2295  double sending = 0;
2296  while (delta_sending > 0 && jproc >=0){
2297  sending = min(division_result, delta_sending);
2298  sending = min(sending, (division_result-temp_local_weight[jproc]));
2299  sending = max(sending, 0.0);
2300  sending_weight[i][jproc] += sending;
2301  temp_local_weight[jproc] += sending;
2302  temp_local_weight[i] -= sending;
2303  delta_sending -= sending;
2304  delta[i] -= delta_sending;
2305  jproc--;
2306  }
2307  }//pre
2308  }//weight>
2309  }//iproc
2310  }//iter
2311 
2312  //Update partition locally
2313  //to send
2314  u32vector sending_cell(nproc,0);
2315 // int i = (*weight).size();
2316  int i = getNumOctants();
2317  for (int jproc=nproc-1; jproc>rank; jproc--){
2318  double pack_weight = 0.0;
2319  while(pack_weight < sending_weight[rank][jproc] && i > 0){
2320  i--;
2321  pack_weight += (*weight)[i];
2322  sending_cell[jproc]++;
2323  }
2324  }
2325  partition[rank] = i;
2326  i = 0;
2327  for (int jproc=0; jproc<rank; jproc++){
2328  double pack_weight = 0.0;
2329  //while(pack_weight < sending_weight[rank][jproc] && i < (*weight).size()-1){
2330  while(pack_weight < sending_weight[rank][jproc] && i < getNumOctants()-1){
2331  i++;
2332  pack_weight += (*weight)[i];
2333  sending_cell[jproc]++;
2334  }
2335  }
2336  partition[rank] -= i;
2337 
2338  //to receive
2339  u32vector rec_cell(nproc,0);
2340  MPI_Request* req = new MPI_Request[nproc*10];
2341  MPI_Status* stats = new MPI_Status[nproc*10];
2342  int nReq = 0;
2343  for (int iproc=0; iproc<nproc; iproc++){
2344  error_flag = MPI_Irecv(&rec_cell[iproc],1,MPI_UINT32_T,iproc,rank,comm,&req[nReq]);
2345  ++nReq;
2346  }
2347  for (int iproc=0; iproc<nproc; iproc++){
2348  error_flag = MPI_Isend(&sending_cell[iproc],1,MPI_UINT32_T,iproc,iproc,comm,&req[nReq]);
2349  ++nReq;
2350  }
2351  MPI_Waitall(nReq,req,stats);
2352 
2353  delete [] req; req = NULL;
2354  delete [] stats; stats = NULL;
2355 
2356  i = 0;
2357  for (int jproc=0; jproc<nproc; jproc++){
2358  i+= rec_cell[jproc];
2359  }
2360  partition[rank] += i;
2361 
2362  uint32_t part = partition[rank];
2363  error_flag = MPI_Allgather(&part,1,MPI_UINT32_T,partition,1,MPI_UINT32_T,comm);
2364 
2365  }
2366  };
2367 
2368  //=================================================================================//
2369 
2370  void computePartition(uint32_t* partition, // compute octant partition giving almost the same number of octant to each process
2371  uint8_t & level_){ // with complete families contained in octants of n "level" over the leaf in each process
2372  uint8_t level = uint8_t(min(int(max(int(max_depth) - int(level_), int(1))) , MAX_LEVEL_3D));
2373  uint32_t* partition_temp = new uint32_t[nproc];
2374 
2375  uint8_t* boundary_proc = new uint8_t[nproc-1];
2376  uint8_t dimcomm, indcomm;
2377  uint8_t* glbdimcomm = new uint8_t[nproc];
2378  uint8_t* glbindcomm = new uint8_t[nproc];
2379 
2380  uint32_t division_result = 0;
2381  uint32_t remind = 0;
2382  uint32_t Dh = uint32_t(pow(double(2),double(MAX_LEVEL_3D-level)));
2383  uint32_t istart, nocts, rest, forw, backw;
2384  uint32_t i = 0, iproc, j;
2385  uint64_t sum;
2386  int32_t* pointercomm;
2387  int32_t* deplace = new int32_t[nproc-1];
2388  division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2389  remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2390  for(uint32_t i = 0; i < uint32_t(nproc); ++i)
2391  if(i<remind)
2392  partition_temp[i] = division_result + 1;
2393  else
2394  partition_temp[i] = division_result;
2395 
2396  j = 0;
2397  sum = 0;
2398  for (iproc=0; iproc<uint32_t(nproc-1); iproc++){
2399  sum += partition_temp[iproc];
2400  while(sum > partition_range_globalidx[j]){
2401  j++;
2402  }
2403  boundary_proc[iproc] = j;
2404  }
2405  nocts = octree.octants.size();
2406  sum = 0;
2407  dimcomm = 0;
2408  indcomm = 0;
2409  for (iproc=0; iproc<uint32_t(nproc-1); iproc++){
2410  deplace[iproc] = 1;
2411  sum += partition_temp[iproc];
2412  if (boundary_proc[iproc] == rank){
2413  if (dimcomm == 0){
2414  indcomm = iproc;
2415  }
2416  dimcomm++;
2417  if (rank!=0)
2418  istart = sum - partition_range_globalidx[rank-1] - 1;
2419  else
2420  istart = sum;
2421 
2422  i = istart;
2423  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2424  while(rest!=0){
2425  if (i==nocts){
2426  i = istart + nocts;
2427  break;
2428  }
2429  i++;
2430  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2431  }
2432  forw = i - istart;
2433  i = istart;
2434  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2435  while(rest!=0){
2436  if (i==0){
2437  i = istart - nocts;
2438  break;
2439  }
2440  i--;
2441  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2442  }
2443  backw = istart - i;
2444  if (forw<backw)
2445  deplace[iproc] = forw;
2446  else
2447  deplace[iproc] = -(int32_t)backw;
2448  }
2449  }
2450 
2451  error_flag = MPI_Allgather(&dimcomm,1,MPI_UINT8_T,glbdimcomm,1,MPI_UINT8_T,comm);
2452  error_flag = MPI_Allgather(&indcomm,1,MPI_UINT8_T,glbindcomm,1,MPI_UINT8_T,comm);
2453  for (iproc=0; iproc<uint32_t(nproc); iproc++){
2454  pointercomm = &deplace[glbindcomm[iproc]];
2455  error_flag = MPI_Bcast(pointercomm, glbdimcomm[iproc], MPI_INT32_T, iproc, comm);
2456  }
2457 
2458  for (iproc=0; iproc<uint32_t(nproc); iproc++){
2459  if (iproc < uint32_t(nproc-1))
2460  partition[iproc] = partition_temp[iproc] + deplace[iproc];
2461  else
2462  partition[iproc] = partition_temp[iproc];
2463  if (iproc !=0)
2464  partition[iproc] = partition[iproc] - deplace[iproc-1];
2465  }
2466 
2467  delete [] partition_temp; partition_temp = NULL;
2468  delete [] boundary_proc; boundary_proc = NULL;
2469  delete [] glbdimcomm; glbdimcomm = NULL;
2470  delete [] glbindcomm; glbindcomm = NULL;
2471  delete [] deplace; deplace = NULL;
2472 
2473  };
2474 
2475  //=================================================================================//
2476 
2477  void updateLoadBalance(){ //update Class_Para_Tree members after a load balance
2478  octree.updateLocalMaxDepth();
2479  //update partition_range_globalidx
2480  uint64_t* rbuff = new uint64_t[nproc];
2481  uint64_t local_num_octants = octree.getNumOctants();
2482  error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
2483  for(int p = 0; p < nproc; ++p){
2484  partition_range_globalidx[p] = 0;
2485  for(int pp = 0; pp <=p; ++pp)
2486  partition_range_globalidx[p] += rbuff[pp];
2487  --partition_range_globalidx[p];
2488  }
2489  //update first last descendant
2490  octree.setFirstDesc();
2491  octree.setLastDesc();
2492  //update partition_range_position
2493  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
2494  error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
2495  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
2496  error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
2497  serial = false;
2498  delete [] rbuff; rbuff = NULL;
2499  };
2500 
2501  //=================================================================================//
2502 
2503  void setPboundGhosts(){
2504  //BUILD BORDER OCTANT INDECES VECTOR (map value) TO BE SENT TO THE RIGHT PROCESS (map key)
2505  //find local octants to be sent as ghost to the right processes
2506  //it visits the local octants building virtual neighbors on each octant face
2507  //find the owner of these virtual neighbor and build a map (process,border octants)
2508  //this map contains the local octants as ghosts for neighbor processes
2509 
2510  Class_Local_Tree<3>::OctantsType::iterator end = octree.octants.end();
2511  Class_Local_Tree<3>::OctantsType::iterator begin = octree.octants.begin();
2512  bordersPerProc.clear();
2513  for(Class_Local_Tree<3>::OctantsType::iterator it = begin; it != end; ++it){
2514  set<int> procs;
2515  //Virtual Face Neighbors
2516  for(uint8_t i = 0; i < global3D.nfaces; ++i){
2517  if(it->getBound(i) == false){
2518  uint32_t virtualNeighborsSize = 0;
2519  uint8_t nvirtualneigh=0;
2520  vector<uint64_t> virtualNeighbors = it->computeVirtualMorton(i,max_depth,virtualNeighborsSize);
2521  uint32_t maxDelta = virtualNeighborsSize/2;
2522  for(uint32_t j = 0; j <= maxDelta; ++j){
2523  int pBegin = findOwner(virtualNeighbors[j]);
2524  int pEnd = findOwner(virtualNeighbors[virtualNeighborsSize - 1 - j]);
2525  procs.insert(pBegin);
2526  procs.insert(pEnd);
2527  if(pBegin != rank || pEnd != rank){
2528  //it->setPbound(i,true);
2529  ++nvirtualneigh;
2530  }
2531  //else{
2532  // it->setPbound(i,false);
2533  //}
2534  }
2535  if (nvirtualneigh!=0){
2536  it->setPbound(i,true);
2537  }
2538  else{
2539  it->setPbound(i,false);
2540  }
2541  }
2542  }
2543  //Virtual Edge Neighbors
2544  for(uint8_t e = 0; e < global3D.nedges; ++e){
2545  uint32_t virtualEdgeNeighborSize = 0;
2546  vector<uint64_t> virtualEdgeNeighbors = it->computeEdgeVirtualMorton(e,max_depth,virtualEdgeNeighborSize,octree.balance_codim);
2547  uint32_t maxDelta = virtualEdgeNeighborSize/2;
2548  if(virtualEdgeNeighborSize){
2549  for(uint32_t ee = 0; ee <= maxDelta; ++ee){
2550  int pBegin = findOwner(virtualEdgeNeighbors[ee]);
2551  int pEnd = findOwner(virtualEdgeNeighbors[virtualEdgeNeighborSize - 1- ee]);
2552  procs.insert(pBegin);
2553  procs.insert(pEnd);
2554  }
2555  }
2556  }
2557  //Virtual Corner Neighbors
2558  for(uint8_t c = 0; c < global3D.nnodes; ++c){
2559  uint32_t virtualCornerNeighborSize = 0;
2560  uint64_t virtualCornerNeighbor = it ->computeNodeVirtualMorton(c,max_depth,virtualCornerNeighborSize);
2561  if(virtualCornerNeighborSize){
2562  int proc = findOwner(virtualCornerNeighbor);
2563  procs.insert(proc);
2564  }
2565  }
2566 
2567  set<int>::iterator pitend = procs.end();
2568  for(set<int>::iterator pit = procs.begin(); pit != pitend; ++pit){
2569  int p = *pit;
2570  if(p != rank){
2571  //TODO better reserve to avoid if
2572  bordersPerProc[p].push_back(distance(begin,it));
2573  vector<uint32_t> & bordersSingleProc = bordersPerProc[p];
2574  if(bordersSingleProc.capacity() - bordersSingleProc.size() < 2)
2575  bordersSingleProc.reserve(2*bordersSingleProc.size());
2576  }
2577  }
2578  }
2579 
2580  MPI_Barrier(comm);
2581 
2582  //PACK (mpi) BORDER OCTANTS IN CHAR BUFFERS WITH SIZE (map value) TO BE SENT TO THE RIGHT PROCESS (map key)
2583  //it visits every element in bordersPerProc (one for every neighbor proc)
2584  //for every element it visits the border octants it contains and pack them in a new structure, sendBuffers
2585  //this map has an entry Class_Comm_Buffer for every proc containing the size in bytes of the buffer and the octants
2586  //to be sent to that proc packed in a char* buffer
2587  uint64_t global_index;
2588  uint32_t x,y,z;
2589  uint8_t l;
2590  int8_t m;
2591  bool info[16];
2592  map<int,Class_Comm_Buffer> sendBuffers;
2593  map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
2594  uint32_t pbordersOversize = 0;
2595  for(map<int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
2596  pbordersOversize += bit->second.size();
2597  int buffSize = bit->second.size() * (int)ceil((double)(global3D.octantBytes + global3D.globalIndexBytes)/ (double)(CHAR_BIT/8));// + (int)ceil((double)sizeof(int)/(double)(CHAR_BIT/8));
2598  int key = bit->first;
2599  const vector<uint32_t> & value = bit->second;
2600  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
2601  int pos = 0;
2602  int nofBorders = value.size();
2603  for(int i = 0; i < nofBorders; ++i){
2604  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
2605  const Class_Octant<3> & octant = octree.octants[value[i]];
2606  x = octant.getX();
2607  y = octant.getY();
2608  z = octant.getZ();
2609  l = octant.getLevel();
2610  m = octant.getMarker();
2611  global_index = getGlobalIdx(value[i]);
2612  for(int i = 0; i < 16; ++i)
2613  info[i] = octant.info[i];
2614  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2615  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2616  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2617  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2618  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2619  for(int j = 0; j < 16; ++j){
2620  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2621  }
2622  error_flag = MPI_Pack(&global_index,1,MPI_INT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2623  }
2624  }
2625 
2626  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
2627  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
2628  //and stored in the recvBufferSizePerProc structure
2629  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
2630  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
2631  int nReq = 0;
2632  map<int,int> recvBufferSizePerProc;
2633  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2634  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2635  recvBufferSizePerProc[sit->first] = 0;
2636  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
2637  ++nReq;
2638  }
2639  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
2640  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2641  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
2642  ++nReq;
2643  }
2644  MPI_Waitall(nReq,req,stats);
2645 
2646  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
2647  //recvBuffers structure is declared and each buffer is initialized to the right size
2648  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
2649  //at the same time every process compute the size in bytes of all the ghost octants
2650  uint32_t nofBytesOverProc = 0;
2651  map<int,Class_Comm_Buffer> recvBuffers;
2652  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
2653  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
2654  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
2655  }
2656  nReq = 0;
2657  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2658  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
2659  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
2660  ++nReq;
2661  }
2662  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2663  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
2664  ++nReq;
2665  }
2666  MPI_Waitall(nReq,req,stats);
2667 
2668  //COMPUTE GHOSTS SIZE IN BYTES
2669  //number of ghosts in every process is obtained through the size in bytes of the single octant
2670  //and ghost vector in local tree is resized
2671  uint32_t nofGhosts = nofBytesOverProc / (uint32_t)(global3D.octantBytes + global3D.globalIndexBytes);
2672  octree.size_ghosts = nofGhosts;
2673  octree.ghosts.clear();
2674  octree.ghosts.resize(nofGhosts);
2675  octree.globalidx_ghosts.resize(nofGhosts);
2676 
2677  //UNPACK BUFFERS AND BUILD GHOSTS CONTAINER OF CLASS_LOCAL_TREE
2678  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked octant by octant.
2679  //every ghost octant is built and put in the ghost vector
2680  uint32_t ghostCounter = 0;
2681  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
2682  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
2683  int pos = 0;
2684  int nofGhostsPerProc = int(rrit->second.commBufferSize / (uint32_t) (global3D.octantBytes + global3D.globalIndexBytes));
2685  for(int i = 0; i < nofGhostsPerProc; ++i){
2686  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
2687  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
2688  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&z,1,MPI_UINT32_T,comm);
2689  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
2690  octree.ghosts[ghostCounter] = Class_Octant<3>(l,x,y,z);
2691  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
2692  octree.ghosts[ghostCounter].setMarker(m);
2693  for(int j = 0; j < 16; ++j){
2694  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
2695  octree.ghosts[ghostCounter].info[j] = info[j];
2696  }
2697  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&global_index,1,MPI_INT64_T,comm);
2698  octree.globalidx_ghosts[ghostCounter] = global_index;
2699  ++ghostCounter;
2700  }
2701  }
2702  recvBuffers.clear();
2703  sendBuffers.clear();
2704  recvBufferSizePerProc.clear();
2705 
2706  delete [] req; req = NULL;
2707  delete [] stats; stats = NULL;
2708 
2709  }; // set pbound and build ghosts after static load balance
2710 
2711  //=================================================================================//
2712 
2713 public:
2718  void loadBalance(){ //assign the octants to the processes following a computed partition
2719 
2720  //Write info on log
2721  log.writeLog("---------------------------------------------");
2722  log.writeLog(" LOAD BALANCE ");
2723 
2724  uint32_t* partition = new uint32_t [nproc];
2725  computePartition(partition);
2726  if(serial)
2727  {
2728  log.writeLog(" ");
2729  log.writeLog(" Initial Serial distribution : ");
2730  for(int ii=0; ii<nproc; ii++){
2731  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
2732  }
2733 
2734  uint32_t stride = 0;
2735  for(int i = 0; i < rank; ++i)
2736  stride += partition[i];
2737  Class_Local_Tree<3>::OctantsType octantsCopy = octree.octants;
2738  Class_Local_Tree<3>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
2739  Class_Local_Tree<3>::OctantsType::const_iterator last = first + partition[rank];
2740  octree.octants.assign(first, last);
2741 #if defined(__INTEL_COMPILER) || defined(__ICC)
2742 #else
2743  octree.octants.shrink_to_fit();
2744 #endif
2745  first = octantsCopy.end();
2746  last = octantsCopy.end();
2747 
2748  //Update and ghosts here
2749  updateLoadBalance();
2750  setPboundGhosts();
2751 
2752  }
2753  else
2754  {
2755  log.writeLog(" ");
2756  log.writeLog(" Initial Parallel partition : ");
2757  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
2758  for(int ii=1; ii<nproc; ii++){
2759  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
2760  }
2761 
2762  //empty ghosts
2763  octree.ghosts.clear();
2764  octree.size_ghosts = 0;
2765  //compute new partition range globalidx
2766  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
2767  for(int p = 0; p < nproc; ++p){
2768  newPartitionRangeGlobalidx[p] = 0;
2769  for(int pp = 0; pp <= p; ++pp)
2770  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
2771  --newPartitionRangeGlobalidx[p];
2772  }
2773 
2774  //find resident octants local offset lastHead(lh) and firstTail(ft)
2775  int32_t lh,ft;
2776  if(rank == 0)
2777  lh = -1;
2778  else{
2779  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
2780  }
2781  if(lh < 0)
2782  lh = - 1;
2783  else if(lh > (int32_t)(octree.octants.size() - 1))
2784  lh = octree.octants.size() - 1;
2785 
2786  if(rank == nproc - 1)
2787  ft = octree.octants.size();
2788  else if(rank == 0)
2789  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
2790  else{
2791  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
2792  }
2793  if(ft > (int32_t)(octree.octants.size() - 1))
2794  ft = octree.octants.size();
2795  else if(ft < 0)
2796  ft = 0;
2797 
2798  //compute size Head and size Tail
2799  uint32_t headSize = (uint32_t)(lh + 1);
2800  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
2801  uint32_t headOffset = headSize;
2802  uint32_t tailOffset = tailSize;
2803 
2804  //build send buffers
2805  map<int,Class_Comm_Buffer> sendBuffers;
2806 
2807  //Compute first predecessor and first successor to send buffers to
2808  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
2809  int64_t globalLastHead = (int64_t) lh;
2810  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
2811  int firstPredecessor = -1;
2812  int firstSuccessor = nproc;
2813  if(rank != 0){
2814  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
2815  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
2816  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
2817  for(int pre = rank - 1; pre >=0; --pre){
2818  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
2819  firstPredecessor = pre;
2820  }
2821  for(int post = rank + 1; post < nproc; ++post){
2822  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
2823  firstSuccessor = post;
2824  }
2825  }
2826  else if(rank == 0){
2827  firstSuccessor = 1;
2828  }
2829  MPI_Barrier(comm); //da spostare prima della prima comunicazione
2830 
2831  uint32_t x,y,z;
2832  uint8_t l;
2833  int8_t m;
2834  bool info[16];
2835  int intBuffer = 0;
2836  int contatore = 0;
2837  //build send buffers from Head
2838  uint32_t nofElementsFromSuccessiveToPrevious = 0;
2839  if(headSize != 0){
2840  for(int p = firstPredecessor; p >= 0; --p){
2841  if(headSize < partition[p]){
2842  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
2843  intBuffer = abs(intBuffer);
2844  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
2845  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
2846  nofElementsFromSuccessiveToPrevious = headSize;
2847 
2848  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
2849  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2850  int pos = 0;
2851  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2852  //PACK octants from 0 to lh in sendBuffer[p]
2853  const Class_Octant<3> & octant = octree.octants[i];
2854  x = octant.getX();
2855  y = octant.getY();
2856  z = octant.getZ();
2857  l = octant.getLevel();
2858  m = octant.getMarker();
2859  for(uint32_t ii = 0; ii < 16; ++ii)
2860  info[ii] = octant.info[ii];
2861  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2862  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2863  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2864  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2865  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2866  for(int j = 0; j < 16; ++j){
2867  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2868  }
2869  }
2870  if(nofElementsFromSuccessiveToPrevious == headSize)
2871  break;
2872 
2873  lh -= nofElementsFromSuccessiveToPrevious;
2874  globalLastHead -= nofElementsFromSuccessiveToPrevious;
2875  headSize = lh + 1;
2876  ++contatore;
2877  }
2878  else{
2879  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
2880  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
2881  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2882  int pos = 0;
2883  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2884  //pack octants from lh - partition[p] to lh
2885  const Class_Octant<3> & octant = octree.octants[i];
2886  x = octant.getX();
2887  y = octant.getY();
2888  z = octant.getZ();
2889  l = octant.getLevel();
2890  m = octant.getMarker();
2891  for(int ii = 0; ii < 16; ++ii)
2892  info[ii] = octant.info[ii];
2893  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2894  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2895  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2896  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2897  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2898  for(int j = 0; j < 16; ++j){
2899  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2900  }
2901  }
2902  lh -= nofElementsFromSuccessiveToPrevious;
2903  globalLastHead -= nofElementsFromSuccessiveToPrevious;
2904  headSize = lh + 1;
2905  if(headSize == 0)
2906  break;
2907  }
2908  }
2909 
2910  }
2911  uint32_t nofElementsFromPreviousToSuccessive = 0;
2912  contatore = 0;
2913  //build send buffers from Tail
2914  if(tailSize != 0){
2915  for(int p = firstSuccessor; p < nproc; ++p){
2916  if(tailSize < partition[p]){
2917  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2918  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
2919  nofElementsFromPreviousToSuccessive = tailSize;
2920 
2921  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
2922  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2923  int pos = 0;
2924  uint32_t octantsSize = (uint32_t)octree.octants.size();
2925  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
2926  //PACK octants from ft to octantsSize-1
2927  const Class_Octant<3> & octant = octree.octants[i];
2928  x = octant.getX();
2929  y = octant.getY();
2930  z = octant.getZ();
2931  l = octant.getLevel();
2932  m = octant.getMarker();
2933  for(int ii = 0; ii < 16; ++ii)
2934  info[ii] = octant.info[ii];
2935  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2936  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2937  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2938  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2939  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2940  for(int j = 0; j < 16; ++j){
2941  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2942  }
2943  }
2944  if(nofElementsFromPreviousToSuccessive == tailSize)
2945  break;
2946  ft += nofElementsFromPreviousToSuccessive;
2947  globalFirstTail += nofElementsFromPreviousToSuccessive;
2948  tailSize -= nofElementsFromPreviousToSuccessive;
2949  ++contatore;
2950  }
2951  else{
2952  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2953  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
2954  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2955  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
2956  int pos = 0;
2957  for(uint32_t i = ft; i <= endOctants; ++i ){
2958  //PACK octants from ft to ft + partition[p] -1
2959  const Class_Octant<3> & octant = octree.octants[i];
2960  x = octant.getX();
2961  y = octant.getY();
2962  z = octant.getZ();
2963  l = octant.getLevel();
2964  m = octant.getMarker();
2965  for(int ii = 0; ii < 16; ++ii)
2966  info[ii] = octant.info[ii];
2967  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2968  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2969  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2970  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2971  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2972  for(int j = 0; j < 16; ++j){
2973  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2974  }
2975  }
2976  ft += nofElementsFromPreviousToSuccessive;
2977  globalFirstTail += nofElementsFromPreviousToSuccessive;
2978  tailSize -= nofElementsFromPreviousToSuccessive;
2979  if(tailSize == 0)
2980  break;
2981  }
2982  }
2983  }
2984 
2985  //Build receiver sources
2986  vector<Class_Array> recvs(nproc);
2987  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
2988  recvs[rank].array[0] = rank;
2989  int counter = 1;
2990  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2991  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2992  recvs[rank].array[counter] = sit->first;
2993  ++counter;
2994  }
2995  int* nofRecvsPerProc = new int[nproc];
2996  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
2997  int globalRecvsBuffSize = 0;
2998  int* displays = new int[nproc];
2999  for(int pp = 0; pp < nproc; ++pp){
3000  displays[pp] = 0;
3001  globalRecvsBuffSize += nofRecvsPerProc[pp];
3002  for(int ppp = 0; ppp < pp; ++ppp){
3003  displays[pp] += nofRecvsPerProc[ppp];
3004  }
3005  }
3006  int* globalRecvsBuff = new int[globalRecvsBuffSize];
3007  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3008 
3009  vector<set<int> > sendersPerProc(nproc);
3010  for(int pin = 0; pin < nproc; ++pin){
3011  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3012  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3013  }
3014  }
3015 
3016  //Communicate Octants (size)
3017  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3018  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3019  int nReq = 0;
3020  map<int,int> recvBufferSizePerProc;
3021  set<int>::iterator senditend = sendersPerProc[rank].end();
3022  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3023  recvBufferSizePerProc[*sendit] = 0;
3024  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3025  ++nReq;
3026  }
3027  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3028  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3029  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3030  ++nReq;
3031  }
3032  MPI_Waitall(nReq,req,stats);
3033 
3034  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
3035  //recvBuffers structure is declared and each buffer is initialized to the right size
3036  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
3037  uint32_t nofNewHead = 0;
3038  uint32_t nofNewTail = 0;
3039  map<int,Class_Comm_Buffer> recvBuffers;
3040  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3041  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3042  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
3043  uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8)));
3044  if(rit->first < rank)
3045  nofNewHead += nofNewPerProc;
3046  else if(rit->first > rank)
3047  nofNewTail += nofNewPerProc;
3048  }
3049  nReq = 0;
3050  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3051  //nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
3052  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3053  ++nReq;
3054  }
3055  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3056  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3057  ++nReq;
3058  }
3059  MPI_Waitall(nReq,req,stats);
3060 
3061  //MOVE RESIDENT TO BEGIN IN OCTANTS
3062  uint32_t resEnd = octree.getNumOctants() - tailOffset;
3063  uint32_t nofResidents = resEnd - headOffset;
3064  int octCounter = 0;
3065  for(uint32_t i = headOffset; i < resEnd; ++i){
3066  octree.octants[octCounter] = octree.octants[i];
3067  ++octCounter;
3068  }
3069  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3070  octree.octants.resize(newCounter);
3071  //MOVE RESIDENTS IN RIGHT POSITION
3072  uint32_t resCounter = nofNewHead + nofResidents - 1;
3073  for(uint32_t k = 0; k < nofResidents ; ++k){
3074  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3075  }
3076 
3077  //UNPACK BUFFERS AND BUILD NEW OCTANTS
3078  newCounter = 0;
3079  bool jumpResident = false;
3080  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3081  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3082  uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8)));
3083  int pos = 0;
3084  if(rbit->first > rank && !jumpResident){
3085  newCounter += nofResidents ;
3086  jumpResident = true;
3087  }
3088  for(int i = nofNewPerProc - 1; i >= 0; --i){
3089  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
3090  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3091  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&z,1,MPI_UINT32_T,comm);
3092  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3093  octree.octants[newCounter] = Class_Octant<3>(l,x,y,z);
3094  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3095  octree.octants[newCounter].setMarker(m);
3096  for(int j = 0; j < 16; ++j){
3097  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3098  octree.octants[newCounter].info[j] = info[j];
3099  }
3100  ++newCounter;
3101  }
3102  }
3103 #if defined(__INTEL_COMPILER) || defined(__ICC)
3104 #else
3105  octree.octants.shrink_to_fit();
3106 #endif
3107  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3108  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3109  delete [] displays; displays = NULL;
3110  delete [] req; req = NULL;
3111  delete [] stats; stats = NULL;
3112  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3113  //Update and ghosts here
3114  updateLoadBalance();
3115  setPboundGhosts();
3116 
3117  }
3118  delete [] partition; partition = NULL;
3119 
3120  //Write info of final partition on log
3121  log.writeLog(" ");
3122  log.writeLog(" Final Parallel partition : ");
3123  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3124  for(int ii=1; ii<nproc; ii++){
3125  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3126  }
3127  log.writeLog(" ");
3128  log.writeLog("---------------------------------------------");
3129 
3130 
3131  };
3132 
3133  //=================================================================================//
3134 
3140  void loadBalance(uint8_t & level){ //assign the octants to the processes following a computed partition with complete families contained in octants of n "level" over the leaf in each process
3141  //Write info on log
3142  log.writeLog("---------------------------------------------");
3143  log.writeLog(" LOAD BALANCE ");
3144 
3145  uint32_t* partition = new uint32_t [nproc];
3146  computePartition(partition, level);
3147  if(serial)
3148  {
3149  log.writeLog(" ");
3150  log.writeLog(" Initial Serial distribution : ");
3151  for(int ii=0; ii<nproc; ii++){
3152  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3153  }
3154 
3155  uint32_t stride = 0;
3156  for(int i = 0; i < rank; ++i)
3157  stride += partition[i];
3158  Class_Local_Tree<3>::OctantsType octantsCopy = octree.octants;
3159  Class_Local_Tree<3>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
3160  Class_Local_Tree<3>::OctantsType::const_iterator last = first + partition[rank];
3161  octree.octants.assign(first, last);
3162 #if defined(__INTEL_COMPILER) || defined(__ICC)
3163 #else
3164  octree.octants.shrink_to_fit();
3165 #endif
3166  first = octantsCopy.end();
3167  last = octantsCopy.end();
3168 
3169  //Update and ghosts here
3170  updateLoadBalance();
3171  setPboundGhosts();
3172 
3173  }
3174  else
3175  {
3176  log.writeLog(" ");
3177  log.writeLog(" Initial Parallel partition : ");
3178  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3179  for(int ii=1; ii<nproc; ii++){
3180  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3181  }
3182 
3183  //empty ghosts
3184  octree.ghosts.clear();
3185  octree.size_ghosts = 0;
3186  //compute new partition range globalidx
3187  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
3188  for(int p = 0; p < nproc; ++p){
3189  newPartitionRangeGlobalidx[p] = 0;
3190  for(int pp = 0; pp <= p; ++pp)
3191  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3192  --newPartitionRangeGlobalidx[p];
3193  }
3194 
3195  //find resident octants local offset lastHead(lh) and firstTail(ft)
3196  int32_t lh,ft;
3197  if(rank == 0)
3198  lh = -1;
3199  else{
3200  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3201  }
3202  if(lh < 0)
3203  lh = - 1;
3204  else if(lh > (int32_t)(octree.octants.size() - 1))
3205  lh = octree.octants.size() - 1;
3206 
3207  if(rank == nproc - 1)
3208  ft = octree.octants.size();
3209  else if(rank == 0)
3210  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3211  else{
3212  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3213  }
3214  if(ft > (int32_t)(octree.octants.size() - 1))
3215  ft = octree.octants.size();
3216  else if(ft < 0)
3217  ft = 0;
3218 
3219  //compute size Head and size Tail
3220  uint32_t headSize = (uint32_t)(lh + 1);
3221  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3222  uint32_t headOffset = headSize;
3223  uint32_t tailOffset = tailSize;
3224 
3225  //build send buffers
3226  map<int,Class_Comm_Buffer> sendBuffers;
3227 
3228  //Compute first predecessor and first successor to send buffers to
3229  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
3230  int64_t globalLastHead = (int64_t) lh;
3231  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
3232  int firstPredecessor = -1;
3233  int firstSuccessor = nproc;
3234  if(rank != 0){
3235  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3236  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3237  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3238  for(int pre = rank - 1; pre >=0; --pre){
3239  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3240  firstPredecessor = pre;
3241  }
3242  for(int post = rank + 1; post < nproc; ++post){
3243  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3244  firstSuccessor = post;
3245  }
3246  }
3247  else if(rank == 0){
3248  firstSuccessor = 1;
3249  }
3250  MPI_Barrier(comm); //da spostare prima della prima comunicazione
3251 
3252  uint32_t x,y,z;
3253  uint8_t l;
3254  int8_t m;
3255  bool info[16];
3256  int intBuffer = 0;
3257  int contatore = 0;
3258  //build send buffers from Head
3259  uint32_t nofElementsFromSuccessiveToPrevious = 0;
3260  if(headSize != 0){
3261  for(int p = firstPredecessor; p >= 0; --p){
3262  if(headSize < partition[p]){
3263  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3264  intBuffer = abs(intBuffer);
3265  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3266  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3267  nofElementsFromSuccessiveToPrevious = headSize;
3268 
3269  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3270  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3271  int pos = 0;
3272  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3273  //PACK octants from 0 to lh in sendBuffer[p]
3274  const Class_Octant<3> & octant = octree.octants[i];
3275  x = octant.getX();
3276  y = octant.getY();
3277  z = octant.getZ();
3278  l = octant.getLevel();
3279  m = octant.getMarker();
3280  for(int ii = 0; ii < 16; ++ii)
3281  info[ii] = octant.info[ii];
3282  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3283  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3284  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3285  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3286  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3287  for(int j = 0; j < 16; ++j){
3288  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3289  }
3290  }
3291  if(nofElementsFromSuccessiveToPrevious == headSize)
3292  break;
3293 
3294  lh -= nofElementsFromSuccessiveToPrevious;
3295  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3296  headSize = lh + 1;
3297  ++contatore;
3298  }
3299  else{
3300  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3301  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3302  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3303  int pos = 0;
3304  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3305  //pack octants from lh - partition[p] to lh
3306  const Class_Octant<3> & octant = octree.octants[i];
3307  x = octant.getX();
3308  y = octant.getY();
3309  z = octant.getZ();
3310  l = octant.getLevel();
3311  m = octant.getMarker();
3312  for(int ii = 0; ii < 16; ++ii)
3313  info[ii] = octant.info[ii];
3314  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3315  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3316  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3317  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3318  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3319  for(int j = 0; j < 16; ++j){
3320  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3321  }
3322  }
3323  lh -= nofElementsFromSuccessiveToPrevious;
3324  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3325  headSize = lh + 1;
3326  if(headSize == 0)
3327  break;
3328  }
3329  }
3330 
3331  }
3332  uint32_t nofElementsFromPreviousToSuccessive = 0;
3333  contatore = 0;
3334  //build send buffers from Tail
3335  if(tailSize != 0){
3336  for(int p = firstSuccessor; p < nproc; ++p){
3337  if(tailSize < partition[p]){
3338  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3339  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3340  nofElementsFromPreviousToSuccessive = tailSize;
3341 
3342  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3343  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3344  int pos = 0;
3345  uint32_t octantsSize = (uint32_t)octree.octants.size();
3346  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3347  //PACK octants from ft to octantsSize-1
3348  const Class_Octant<3> & octant = octree.octants[i];
3349  x = octant.getX();
3350  y = octant.getY();
3351  z = octant.getZ();
3352  l = octant.getLevel();
3353  m = octant.getMarker();
3354  for(int ii = 0; ii < 16; ++ii)
3355  info[ii] = octant.info[ii];
3356  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3357  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3358  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3359  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3360  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3361  for(int j = 0; j < 16; ++j){
3362  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3363  }
3364  }
3365  if(nofElementsFromPreviousToSuccessive == tailSize)
3366  break;
3367  ft += nofElementsFromPreviousToSuccessive;
3368  globalFirstTail += nofElementsFromPreviousToSuccessive;
3369  tailSize -= nofElementsFromPreviousToSuccessive;
3370  ++contatore;
3371  }
3372  else{
3373  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3374  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3375  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3376  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3377  int pos = 0;
3378  for(uint32_t i = ft; i <= endOctants; ++i ){
3379  //PACK octants from ft to ft + partition[p] -1
3380  const Class_Octant<3> & octant = octree.octants[i];
3381  x = octant.getX();
3382  y = octant.getY();
3383  z = octant.getZ();
3384  l = octant.getLevel();
3385  m = octant.getMarker();
3386  for(int ii = 0; ii < 16; ++ii)
3387  info[ii] = octant.info[ii];
3388  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3389  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3390  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3391  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3392  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3393  for(int j = 0; j < 16; ++j){
3394  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3395  }
3396  }
3397  ft += nofElementsFromPreviousToSuccessive;
3398  globalFirstTail += nofElementsFromPreviousToSuccessive;
3399  tailSize -= nofElementsFromPreviousToSuccessive;
3400  if(tailSize == 0)
3401  break;
3402  }
3403  }
3404  }
3405 
3406  //Build receiver sources
3407  vector<Class_Array> recvs(nproc);
3408  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
3409  recvs[rank].array[0] = rank;
3410  int counter = 1;
3411  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3412  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3413  recvs[rank].array[counter] = sit->first;
3414  ++counter;
3415  }
3416  int* nofRecvsPerProc = new int[nproc];
3417  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3418  int globalRecvsBuffSize = 0;
3419  int* displays = new int[nproc];
3420  for(int pp = 0; pp < nproc; ++pp){
3421  displays[pp] = 0;
3422  globalRecvsBuffSize += nofRecvsPerProc[pp];
3423  for(int ppp = 0; ppp < pp; ++ppp){
3424  displays[pp] += nofRecvsPerProc[ppp];
3425  }
3426  }
3427  int* globalRecvsBuff = new int[globalRecvsBuffSize];
3428  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3429 
3430  vector<set<int> > sendersPerProc(nproc);
3431  for(int pin = 0; pin < nproc; ++pin){
3432  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3433  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3434  }
3435  }
3436 
3437  //Communicate Octants (size)
3438  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3439  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3440  int nReq = 0;
3441  map<int,int> recvBufferSizePerProc;
3442  set<int>::iterator senditend = sendersPerProc[rank].end();
3443  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3444  recvBufferSizePerProc[*sendit] = 0;
3445  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3446  ++nReq;
3447  }
3448  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3449  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3450  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3451  ++nReq;
3452  }
3453  MPI_Waitall(nReq,req,stats);
3454 
3455  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
3456  //recvBuffers structure is declared and each buffer is initialized to the right size
3457  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
3458  uint32_t nofNewHead = 0;
3459  uint32_t nofNewTail = 0;
3460  map<int,Class_Comm_Buffer> recvBuffers;
3461  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3462  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3463  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
3464  uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8)));
3465  if(rit->first < rank)
3466  nofNewHead += nofNewPerProc;
3467  else if(rit->first > rank)
3468  nofNewTail += nofNewPerProc;
3469  }
3470  nReq = 0;
3471  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3472  //nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
3473  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3474  ++nReq;
3475  }
3476  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3477  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3478  ++nReq;
3479  }
3480  MPI_Waitall(nReq,req,stats);
3481 
3482  //MOVE RESIDENT TO BEGIN IN OCTANTS
3483  uint32_t resEnd = octree.getNumOctants() - tailOffset;
3484  uint32_t nofResidents = resEnd - headOffset;
3485  int octCounter = 0;
3486  for(uint32_t i = headOffset; i < resEnd; ++i){
3487  octree.octants[octCounter] = octree.octants[i];
3488  ++octCounter;
3489  }
3490  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3491  octree.octants.resize(newCounter);
3492  //MOVE RESIDENTS IN RIGHT POSITION
3493  uint32_t resCounter = nofNewHead + nofResidents - 1;
3494  for(uint32_t k = 0; k < nofResidents ; ++k){
3495  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3496  }
3497 
3498  //UNPACK BUFFERS AND BUILD NEW OCTANTS
3499  newCounter = 0;
3500  bool jumpResident = false;
3501  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3502  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3503  uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8)));
3504  int pos = 0;
3505  if(rbit->first > rank && !jumpResident){
3506  newCounter += nofResidents ;
3507  jumpResident = true;
3508  }
3509  for(int i = nofNewPerProc - 1; i >= 0; --i){
3510  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
3511  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3512  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&z,1,MPI_UINT32_T,comm);
3513  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3514  octree.octants[newCounter] = Class_Octant<3>(l,x,y,z);
3515  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3516  octree.octants[newCounter].setMarker(m);
3517  for(int j = 0; j < 16; ++j){
3518  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3519  octree.octants[newCounter].info[j] = info[j];
3520  }
3521  ++newCounter;
3522  }
3523  }
3524 #if defined(__INTEL_COMPILER) || defined(__ICC)
3525 #else
3526  octree.octants.shrink_to_fit();
3527 #endif
3528  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3529  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3530  delete [] displays; displays = NULL;
3531  delete [] req; req = NULL;
3532  delete [] stats; stats = NULL;
3533  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3534 
3535  //Update and ghosts here
3536  updateLoadBalance();
3537  setPboundGhosts();
3538 
3539  }
3540  delete [] partition;
3541  partition = NULL;
3542 
3543  //Write info of final partition on log
3544  log.writeLog(" ");
3545  log.writeLog(" Final Parallel partition : ");
3546  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3547  for(int ii=1; ii<nproc; ii++){
3548  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3549  }
3550  log.writeLog(" ");
3551  log.writeLog("---------------------------------------------");
3552 
3553 
3554 
3555  };
3556 
3557  //=================================================================================//
3558 
3563  template<class Impl>
3564  void loadBalance(Class_Data_LB_Interface<Impl> & userData, dvector* weight = NULL){
3565  //Write info on log
3566  log.writeLog("---------------------------------------------");
3567  log.writeLog(" LOAD BALANCE ");
3568 
3569  uint32_t* partition = new uint32_t [nproc];
3570  if (weight == NULL)
3571  computePartition(partition);
3572  else
3573  computePartition(partition, weight);
3574 
3575  weight = NULL;
3576 
3577  if(serial)
3578  {
3579  log.writeLog(" ");
3580  log.writeLog(" Initial Serial distribution : ");
3581  for(int ii=0; ii<nproc; ii++){
3582  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3583  }
3584 
3585  uint32_t stride = 0;
3586  for(int i = 0; i < rank; ++i)
3587  stride += partition[i];
3588  Class_Local_Tree<3>::OctantsType octantsCopy = octree.octants;
3589  Class_Local_Tree<3>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
3590  Class_Local_Tree<3>::OctantsType::const_iterator last = first + partition[rank];
3591  octree.octants.assign(first, last);
3592 #if defined(__INTEL_COMPILER) || defined(__ICC)
3593 #else
3594  octree.octants.shrink_to_fit();
3595 #endif
3596  first = octantsCopy.end();
3597  last = octantsCopy.end();
3598 
3599 
3600  userData.assign(stride,partition[rank]);
3601 
3602 
3603  //Update and build ghosts here
3604  updateLoadBalance();
3605  setPboundGhosts();
3606  }
3607  else
3608  {
3609  log.writeLog(" ");
3610  log.writeLog(" Initial Parallel partition : ");
3611  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3612  for(int ii=1; ii<nproc; ii++){
3613  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3614  }
3615 
3616  //empty ghosts
3617  octree.ghosts.clear();
3618  octree.size_ghosts = 0;
3619  //compute new partition range globalidx
3620  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
3621  for(int p = 0; p < nproc; ++p){
3622  newPartitionRangeGlobalidx[p] = 0;
3623  for(int pp = 0; pp <= p; ++pp)
3624  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3625  --newPartitionRangeGlobalidx[p];
3626  }
3627 
3628  //find resident octants local offset lastHead(lh) and firstTail(ft)
3629  int32_t lh,ft;
3630  if(rank == 0)
3631  lh = -1;
3632  else{
3633  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3634  }
3635  if(lh < 0)
3636  lh = - 1;
3637  else if(lh > octree.octants.size() - 1)
3638  lh = octree.octants.size() - 1;
3639 
3640  if(rank == nproc - 1)
3641  ft = octree.octants.size();
3642  else if(rank == 0)
3643  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3644  else{
3645  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3646  }
3647  if(ft > (int32_t)(octree.octants.size() - 1))
3648  ft = octree.octants.size();
3649  else if(ft < 0)
3650  ft = 0;
3651 
3652  //compute size Head and size Tail
3653  uint32_t headSize = (uint32_t)(lh + 1);
3654  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3655  uint32_t headOffset = headSize;
3656  uint32_t tailOffset = tailSize;
3657 
3658  //build send buffers
3659  map<int,Class_Comm_Buffer> sendBuffers;
3660 
3661  //Compute first predecessor and first successor to send buffers to
3662  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
3663  int64_t globalLastHead = (int64_t) lh;
3664  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
3665  int firstPredecessor = -1;
3666  int firstSuccessor = nproc;
3667  if(rank != 0){
3668  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3669  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3670  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3671  for(int pre = rank - 1; pre >=0; --pre){
3672  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3673  firstPredecessor = pre;
3674  }
3675  for(int post = rank + 1; post < nproc; ++post){
3676  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3677  firstSuccessor = post;
3678  }
3679  }
3680  else if(rank == 0){
3681  firstSuccessor = 1;
3682  }
3683  MPI_Barrier(comm); //da spostare prima della prima comunicazione
3684 
3685  uint32_t x,y,z;
3686  uint8_t l;
3687  int8_t m;
3688  bool info[16];
3689  int intBuffer = 0;
3690  int contatore = 0;
3691  //build send buffers from Head
3692  uint32_t nofElementsFromSuccessiveToPrevious = 0;
3693  if(headSize != 0){
3694  for(int p = firstPredecessor; p >= 0; --p){
3695  if(headSize < partition[p]){
3696  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3697  intBuffer = abs(intBuffer);
3698  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3699  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3700  nofElementsFromSuccessiveToPrevious = headSize;
3701 
3702  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3703  //compute size of data in buffers
3704  if(userData.fixedSize()){
3705  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
3706  }
3707  else{
3708  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3709  buffSize += userData.size(i);
3710  }
3711  }
3712  //add room for int, number of octants in this buffer
3713  buffSize += sizeof(int);
3714  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3715  //store the number of octants at the beginning of the buffer
3716  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3717  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3718  //PACK octants from 0 to lh in sendBuffer[p]
3719  const Class_Octant<3> & octant = octree.octants[i];
3720  x = octant.getX();
3721  y = octant.getY();
3722  z = octant.getZ();
3723  l = octant.getLevel();
3724  m = octant.getMarker();
3725  for(int j = 0; j < 16; ++j)
3726  info[j] = octant.info[j];
3727  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3728  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3729  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3730  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3731  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3732  for(int j = 0; j < 16; ++j){
3733  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3734 
3735  }
3736  userData.gather(sendBuffers[p],i);
3737  }
3738  if(nofElementsFromSuccessiveToPrevious == headSize)
3739  break;
3740 
3741  lh -= nofElementsFromSuccessiveToPrevious;
3742  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3743  headSize = lh + 1;
3744  ++contatore;
3745  }
3746  else{
3747  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3748  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3749  //compute size of data in buffers
3750  if(userData.fixedSize()){
3751  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
3752  }
3753  else{
3754  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3755  buffSize += userData.size(i);
3756  }
3757  }
3758  //add room for int, number of octants in this buffer
3759  buffSize += sizeof(int);
3760  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3761  //store the number of octants at the beginning of the buffer
3762  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3763  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3764  //pack octants from lh - partition[p] to lh
3765  const Class_Octant<3> & octant = octree.octants[i];
3766  x = octant.getX();
3767  y = octant.getY();
3768  z = octant.getZ();
3769  l = octant.getLevel();
3770  m = octant.getMarker();
3771  for(int j = 0; j < 16; ++j)
3772  info[j] = octant.info[j];
3773  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3774  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3775  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3776  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3777  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3778  for(int j = 0; j < 16; ++j){
3779  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3780  }
3781  userData.gather(sendBuffers[p],i);
3782  }
3783  lh -= nofElementsFromSuccessiveToPrevious;
3784  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3785  headSize = lh + 1;
3786  if(headSize == 0)
3787  break;
3788  }
3789  }
3790  }
3791  uint32_t nofElementsFromPreviousToSuccessive = 0;
3792  contatore = 0;
3793  //build send buffers from Tail
3794  if(tailSize != 0){
3795  for(int p = firstSuccessor; p < nproc; ++p){
3796  if(tailSize < partition[p]){
3797  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3798  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3799  nofElementsFromPreviousToSuccessive = tailSize;
3800 
3801  uint32_t octantsSize = (uint32_t)octree.octants.size();
3802  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3803  //compute size of data in buffers
3804  if(userData.fixedSize()){
3805  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
3806  }
3807  else{
3808  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3809  buffSize += userData.size(i);
3810  }
3811  }
3812  //add room for int, number of octants in this buffer
3813  buffSize += sizeof(int);
3814  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3815  //store the number of octants at the beginning of the buffer
3816  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3817  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3818  //PACK octants from ft to octantsSize-1
3819  const Class_Octant<3> & octant = octree.octants[i];
3820  x = octant.getX();
3821  y = octant.getY();
3822  z = octant.getZ();
3823  l = octant.getLevel();
3824  m = octant.getMarker();
3825  for(int j = 0; j < 16; ++j)
3826  info[j] = octant.info[j];
3827  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3828  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3829  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3830  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3831  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3832  for(int j = 0; j < 16; ++j){
3833  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3834  }
3835  userData.gather(sendBuffers[p],i);
3836  }
3837  if(nofElementsFromPreviousToSuccessive == tailSize)
3838  break;
3839  ft += nofElementsFromPreviousToSuccessive;
3840  globalFirstTail += nofElementsFromPreviousToSuccessive;
3841  tailSize -= nofElementsFromPreviousToSuccessive;
3842  ++contatore;
3843  }
3844  else{
3845  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3846  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3847  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
3848  //compute size of data in buffers
3849  if(userData.fixedSize()){
3850  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
3851  }
3852  else{
3853  for(uint32_t i = ft; i <= endOctants; ++i){
3854  buffSize += userData.size(i);
3855  }
3856  }
3857  //add room for int, number of octants in this buffer
3858  buffSize += sizeof(int);
3859  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3860  //store the number of octants at the beginning of the buffer
3861  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3862  for(uint32_t i = ft; i <= endOctants; ++i ){
3863  //PACK octants from ft to ft + partition[p] -1
3864  const Class_Octant<3> & octant = octree.octants[i];
3865  x = octant.getX();
3866  y = octant.getY();
3867  z = octant.getZ();
3868  l = octant.getLevel();
3869  m = octant.getMarker();
3870  for(int j = 0; j < 16; ++j)
3871  info[j] = octant.info[j];
3872  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3873  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3874  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3875  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3876  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3877  for(int j = 0; j < 16; ++j){
3878  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3879  }
3880  userData.gather(sendBuffers[p],i);
3881  }
3882  ft += nofElementsFromPreviousToSuccessive;
3883  globalFirstTail += nofElementsFromPreviousToSuccessive;
3884  tailSize -= nofElementsFromPreviousToSuccessive;
3885  if(tailSize == 0)
3886  break;
3887  }
3888  }
3889  }
3890 
3891  //Build receiver sources
3892  vector<Class_Array> recvs(nproc);
3893  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
3894  recvs[rank].array[0] = rank;
3895  int counter = 1;
3896  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3897  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3898  recvs[rank].array[counter] = sit->first;
3899  ++counter;
3900  }
3901  int* nofRecvsPerProc = new int[nproc];
3902  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3903  int globalRecvsBuffSize = 0;
3904  int* displays = new int[nproc];
3905  for(int pp = 0; pp < nproc; ++pp){
3906  displays[pp] = 0;
3907  globalRecvsBuffSize += nofRecvsPerProc[pp];
3908  for(int ppp = 0; ppp < pp; ++ppp){
3909  displays[pp] += nofRecvsPerProc[ppp];
3910  }
3911  }
3912  int globalRecvsBuff[globalRecvsBuffSize];
3913  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3914 
3915  vector<set<int> > sendersPerProc(nproc);
3916  for(int pin = 0; pin < nproc; ++pin){
3917  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3918  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3919  }
3920  }
3921 
3922  //Communicate Octants (size)
3923  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3924  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3925  int nReq = 0;
3926  map<int,int> recvBufferSizePerProc;
3927  set<int>::iterator senditend = sendersPerProc[rank].end();
3928  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3929  recvBufferSizePerProc[*sendit] = 0;
3930  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3931  ++nReq;
3932  }
3933  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3934  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3935  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3936  ++nReq;
3937  }
3938  MPI_Waitall(nReq,req,stats);
3939 
3940  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
3941  //recvBuffers structure is declared and each buffer is initialized to the right size
3942  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
3943  uint32_t nofNewHead = 0;
3944  uint32_t nofNewTail = 0;
3945  map<int,Class_Comm_Buffer> recvBuffers;
3946 
3947  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3948  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3949  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
3950  }
3951 
3952  nReq = 0;
3953  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3954  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3955  ++nReq;
3956  }
3957  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3958  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3959  ++nReq;
3960  }
3961  MPI_Waitall(nReq,req,stats);
3962 
3963  //Unpack number of octants per sender
3964  map<int,uint32_t> nofNewOverProcs;
3965  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3966  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3967  uint32_t nofNewPerProc;
3968  MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
3969  nofNewOverProcs[rbit->first] = nofNewPerProc;
3970  if(rbit->first < rank)
3971  nofNewHead += nofNewPerProc;
3972  else if(rbit->first > rank)
3973  nofNewTail += nofNewPerProc;
3974  }
3975 
3976  //MOVE RESIDENT TO BEGIN IN OCTANTS
3977  uint32_t resEnd = octree.getNumOctants() - tailOffset;
3978  uint32_t nofResidents = resEnd - headOffset;
3979  uint32_t octCounter = 0;
3980  for(uint32_t i = headOffset; i < resEnd; ++i){
3981  octree.octants[octCounter] = octree.octants[i];
3982  userData.move(i,octCounter);
3983  ++octCounter;
3984  }
3985  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3986  octree.octants.resize(newCounter);
3987  userData.resize(newCounter);
3988  //MOVE RESIDENTS IN RIGHT POSITION
3989  uint32_t resCounter = nofNewHead + nofResidents - 1;
3990  for(uint32_t k = 0; k < nofResidents ; ++k){
3991  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3992  //TODO move data - DON
3993  userData.move(nofResidents - k - 1,resCounter - k);
3994  }
3995 
3996  //UNPACK BUFFERS AND BUILD NEW OCTANTS
3997  newCounter = 0;
3998  bool jumpResident = false;
3999 
4000  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4001  //TODO change new octants counting, probably you have to communicate the number of news per proc
4002  uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];//(uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((double)octantBytes / (double)(CHAR_BIT/8)));
4003  if(rbit->first > rank && !jumpResident){
4004  newCounter += nofResidents ;
4005  jumpResident = true;
4006  }
4007  for(int i = nofNewPerProc - 1; i >= 0; --i){
4008  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
4009  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
4010  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&z,1,MPI_UINT32_T,comm);
4011  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
4012  octree.octants[newCounter] = Class_Octant<3>(l,x,y,z);
4013  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
4014  octree.octants[newCounter].setMarker(m);
4015  for(int j = 0; j < 16; ++j){
4016  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
4017  octree.octants[newCounter].info[j] = info[j];
4018  }
4019  //TODO Unpack data
4020  userData.scatter(rbit->second,newCounter);
4021  ++newCounter;
4022  }
4023  }
4024 #if defined(__INTEL_COMPILER) || defined(__ICC)
4025 #else
4026  octree.octants.shrink_to_fit();
4027 #endif
4028  userData.shrink();
4029 
4030  delete [] newPartitionRangeGlobalidx;
4031  newPartitionRangeGlobalidx = NULL;
4032  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
4033  delete [] displays; displays = NULL;
4034  delete [] req; req = NULL;
4035  delete [] stats; stats = NULL;
4036 
4037  //Update and ghosts here
4038  updateLoadBalance();
4039  setPboundGhosts();
4040  uint32_t nofGhosts = getNumGhosts();
4041  userData.resizeGhost(nofGhosts);
4042  }
4043  delete [] partition;
4044  partition = NULL;
4045 
4046  //Write info of final partition on log
4047  log.writeLog(" ");
4048  log.writeLog(" Final Parallel partition : ");
4049  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4050  for(int ii=1; ii<nproc; ii++){
4051  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4052  }
4053  log.writeLog(" ");
4054  log.writeLog("---------------------------------------------");
4055 
4056  };
4057 
4058  //=================================================================================//
4059 
4065  template<class Impl>
4066  void loadBalance(Class_Data_LB_Interface<Impl> & userData, uint8_t & level){
4067  //Write info on log
4068  log.writeLog("---------------------------------------------");
4069  log.writeLog(" LOAD BALANCE ");
4070 
4071  uint32_t* partition = new uint32_t [nproc];
4072  computePartition(partition,level);
4073  if(serial)
4074  {
4075  log.writeLog(" ");
4076  log.writeLog(" Initial Serial distribution : ");
4077  for(int ii=0; ii<nproc; ii++){
4078  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
4079  }
4080 
4081  uint32_t stride = 0;
4082  for(int i = 0; i < rank; ++i)
4083  stride += partition[i];
4084  Class_Local_Tree<3>::OctantsType octantsCopy = octree.octants;
4085  Class_Local_Tree<3>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
4086  Class_Local_Tree<3>::OctantsType::const_iterator last = first + partition[rank];
4087  octree.octants.assign(first, last);
4088 #if defined(__INTEL_COMPILER) || defined(__ICC)
4089 #else
4090  octree.octants.shrink_to_fit();
4091 #endif
4092  first = octantsCopy.end();
4093  last = octantsCopy.end();
4094 
4095  userData.assign(stride,partition[rank]);
4096 
4097  //Update and build ghosts here
4098  updateLoadBalance();
4099  setPboundGhosts();
4100  }
4101  else
4102  {
4103  log.writeLog(" ");
4104  log.writeLog(" Initial Parallel partition : ");
4105  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4106  for(int ii=1; ii<nproc; ii++){
4107  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4108  }
4109 
4110  //empty ghosts
4111  octree.ghosts.clear();
4112  octree.size_ghosts = 0;
4113  //compute new partition range globalidx
4114  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
4115  for(int p = 0; p < nproc; ++p){
4116  newPartitionRangeGlobalidx[p] = 0;
4117  for(int pp = 0; pp <= p; ++pp)
4118  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
4119  --newPartitionRangeGlobalidx[p];
4120  }
4121 
4122  //find resident octants local offset lastHead(lh) and firstTail(ft)
4123  int32_t lh,ft;
4124  if(rank == 0)
4125  lh = -1;
4126  else{
4127  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
4128  }
4129  if(lh < 0)
4130  lh = - 1;
4131  else if(lh > octree.octants.size() - 1)
4132  lh = octree.octants.size() - 1;
4133 
4134  if(rank == nproc - 1)
4135  ft = octree.octants.size();
4136  else if(rank == 0)
4137  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
4138  else{
4139  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
4140  }
4141  if(ft > (int32_t)(octree.octants.size() - 1))
4142  ft = octree.octants.size();
4143  else if(ft < 0)
4144  ft = 0;
4145 
4146  //compute size Head and size Tail
4147  uint32_t headSize = (uint32_t)(lh + 1);
4148  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
4149  uint32_t headOffset = headSize;
4150  uint32_t tailOffset = tailSize;
4151 
4152  //build send buffers
4153  map<int,Class_Comm_Buffer> sendBuffers;
4154 
4155  //Compute first predecessor and first successor to send buffers to
4156  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
4157  int64_t globalLastHead = (int64_t) lh;
4158  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
4159  int firstPredecessor = -1;
4160  int firstSuccessor = nproc;
4161  if(rank != 0){
4162  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
4163  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
4164  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
4165  for(int pre = rank - 1; pre >=0; --pre){
4166  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
4167  firstPredecessor = pre;
4168  }
4169  for(int post = rank + 1; post < nproc; ++post){
4170  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
4171  firstSuccessor = post;
4172  }
4173  }
4174  else if(rank == 0){
4175  firstSuccessor = 1;
4176  }
4177  MPI_Barrier(comm); //da spostare prima della prima comunicazione
4178 
4179  uint32_t x,y,z;
4180  uint8_t l;
4181  int8_t m;
4182  bool info[16];
4183  int intBuffer = 0;
4184  int contatore = 0;
4185  //build send buffers from Head
4186  uint32_t nofElementsFromSuccessiveToPrevious = 0;
4187  if(headSize != 0){
4188  for(int p = firstPredecessor; p >= 0; --p){
4189  if(headSize < partition[p]){
4190  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
4191  intBuffer = abs(intBuffer);
4192  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
4193  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
4194  nofElementsFromSuccessiveToPrevious = headSize;
4195 
4196  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
4197  //compute size of data in buffers
4198  if(userData.fixedSize()){
4199  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
4200  }
4201  else{
4202  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4203  buffSize += userData.size(i);
4204  }
4205  }
4206  //add room for int, number of octants in this buffer
4207  buffSize += sizeof(int);
4208  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4209  //store the number of octants at the beginning of the buffer
4210  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4211  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4212  //PACK octants from 0 to lh in sendBuffer[p]
4213  const Class_Octant<3> & octant = octree.octants[i];
4214  x = octant.getX();
4215  y = octant.getY();
4216  z = octant.getZ();
4217  l = octant.getLevel();
4218  m = octant.getMarker();
4219  for(int j = 0; j < 16; ++j)
4220  info[j] = octant.info[j];
4221  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4222  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4223  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4224  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4225  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4226  for(int j = 0; j < 16; ++j){
4227  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4228 
4229  }
4230  userData.gather(sendBuffers[p],i);
4231  }
4232  if(nofElementsFromSuccessiveToPrevious == headSize)
4233  break;
4234 
4235  lh -= nofElementsFromSuccessiveToPrevious;
4236  globalLastHead -= nofElementsFromSuccessiveToPrevious;
4237  headSize = lh + 1;
4238  ++contatore;
4239  }
4240  else{
4241  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
4242  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
4243  //compute size of data in buffers
4244  if(userData.fixedSize()){
4245  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
4246  }
4247  else{
4248  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4249  buffSize += userData.size(i);
4250  }
4251  }
4252  //add room for int, number of octants in this buffer
4253  buffSize += sizeof(int);
4254  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4255  //store the number of octants at the beginning of the buffer
4256  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4257  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4258  //pack octants from lh - partition[p] to lh
4259  const Class_Octant<3> & octant = octree.octants[i];
4260  x = octant.getX();
4261  y = octant.getY();
4262  z = octant.getZ();
4263  l = octant.getLevel();
4264  m = octant.getMarker();
4265  for(int j = 0; j < 16; ++j)
4266  info[j] = octant.info[j];
4267  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4268  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4269  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4270  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4271  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4272  for(int j = 0; j < 16; ++j){
4273  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4274  }
4275  userData.gather(sendBuffers[p],i);
4276  }
4277  lh -= nofElementsFromSuccessiveToPrevious;
4278  globalLastHead -= nofElementsFromSuccessiveToPrevious;
4279  headSize = lh + 1;
4280  if(headSize == 0)
4281  break;
4282  }
4283  }
4284 
4285  }
4286  //build send buffers from Tail
4287  uint32_t nofElementsFromPreviousToSuccessive = 0;
4288  contatore = 0;
4289  if(tailSize != 0){
4290  for(int p = firstSuccessor; p < nproc; ++p){
4291  if(tailSize < partition[p]){
4292  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4293  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
4294  nofElementsFromPreviousToSuccessive = tailSize;
4295 
4296  uint32_t octantsSize = (uint32_t)octree.octants.size();
4297  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
4298  //compute size of data in buffers
4299  if(userData.fixedSize()){
4300  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
4301  }
4302  else{
4303  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4304  buffSize += userData.size(i);
4305  }
4306  }
4307  //add room for int, number of octants in this buffer
4308  buffSize += sizeof(int);
4309  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4310  //store the number of octants at the beginning of the buffer
4311  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4312  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4313  //PACK octants from ft to octantsSize-1
4314  const Class_Octant<3> & octant = octree.octants[i];
4315  x = octant.getX();
4316  y = octant.getY();
4317  z = octant.getZ();
4318  l = octant.getLevel();
4319  m = octant.getMarker();
4320  for(int j = 0; j < 16; ++j)
4321  info[j] = octant.info[j];
4322  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4323  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4324  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4325  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4326  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4327  for(int j = 0; j < 16; ++j){
4328  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4329  }
4330  userData.gather(sendBuffers[p],i);
4331  }
4332  if(nofElementsFromPreviousToSuccessive == tailSize)
4333  break;
4334  ft += nofElementsFromPreviousToSuccessive;
4335  globalFirstTail += nofElementsFromPreviousToSuccessive;
4336  tailSize -= nofElementsFromPreviousToSuccessive;
4337  ++contatore;
4338  }
4339  else{
4340  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4341  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
4342  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global3D.octantBytes / (double)(CHAR_BIT/8));
4343  //compute size of data in buffers
4344  if(userData.fixedSize()){
4345  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
4346  }
4347  else{
4348  for(uint32_t i = ft; i <= endOctants; ++i){
4349  buffSize += userData.size(i);
4350  }
4351  }
4352  //add room for int, number of octants in this buffer
4353  buffSize += sizeof(int);
4354  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4355  //store the number of octants at the beginning of the buffer
4356  MPI_Pack(&partition[p],1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4357  for(uint32_t i = ft; i <= endOctants; ++i ){
4358  //PACK octants from ft to ft + partition[p] -1
4359  const Class_Octant<3> & octant = octree.octants[i];
4360  x = octant.getX();
4361  y = octant.getY();
4362  z = octant.getZ();
4363  l = octant.getLevel();
4364  m = octant.getMarker();
4365  for(int j = 0; j < 16; ++j)
4366  info[j] = octant.info[j];
4367  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4368  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4369  error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4370  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4371  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4372  for(int j = 0; j < 16; ++j){
4373  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4374  }
4375  userData.gather(sendBuffers[p],i);
4376  }
4377  ft += nofElementsFromPreviousToSuccessive;
4378  globalFirstTail += nofElementsFromPreviousToSuccessive;
4379  tailSize -= nofElementsFromPreviousToSuccessive;
4380  if(tailSize == 0)
4381  break;
4382  }
4383  }
4384  }
4385 
4386  //Build receiver sources
4387  vector<Class_Array> recvs(nproc);
4388  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
4389  recvs[rank].array[0] = rank;
4390  int counter = 1;
4391  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4392  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4393  recvs[rank].array[counter] = sit->first;
4394  ++counter;
4395  }
4396  int* nofRecvsPerProc = new int[nproc];
4397  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
4398  int globalRecvsBuffSize = 0;
4399  int* displays = new int[nproc];
4400  for(int pp = 0; pp < nproc; ++pp){
4401  displays[pp] = 0;
4402  globalRecvsBuffSize += nofRecvsPerProc[pp];
4403  for(int ppp = 0; ppp < pp; ++ppp){
4404  displays[pp] += nofRecvsPerProc[ppp];
4405  }
4406  }
4407  int* globalRecvsBuff = new int[globalRecvsBuffSize];
4408  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
4409 
4410  vector<set<int> > sendersPerProc(nproc);
4411  for(int pin = 0; pin < nproc; ++pin){
4412  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
4413  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
4414  }
4415  }
4416 
4417  //Communicate Octants (size)
4418  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
4419  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
4420  int nReq = 0;
4421  map<int,int> recvBufferSizePerProc;
4422  set<int>::iterator senditend = sendersPerProc[rank].end();
4423  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4424  recvBufferSizePerProc[*sendit] = 0;
4425  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
4426  ++nReq;
4427  }
4428  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4429  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4430  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4431  ++nReq;
4432  }
4433  MPI_Waitall(nReq,req,stats);
4434 
4435  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
4436  //recvBuffers structure is declared and each buffer is initialized to the right size
4437  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
4438  uint32_t nofNewHead = 0;
4439  uint32_t nofNewTail = 0;
4440  map<int,Class_Comm_Buffer> recvBuffers;
4441 
4442  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4443  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4444  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
4445  }
4446 
4447  nReq = 0;
4448  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4449  //nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
4450  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
4451  ++nReq;
4452  }
4453  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4454  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4455  ++nReq;
4456  }
4457  MPI_Waitall(nReq,req,stats);
4458 
4459  //Unpack number of octants per sender
4460  map<int,uint32_t> nofNewOverProcs;
4461  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
4462  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4463  uint32_t nofNewPerProc;
4464  MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
4465  nofNewOverProcs[rbit->first] = nofNewPerProc;
4466  if(rbit->first < rank)
4467  nofNewHead += nofNewPerProc;
4468  else if(rbit->first > rank)
4469  nofNewTail += nofNewPerProc;
4470  }
4471 
4472  //MOVE RESIDENT TO BEGIN IN OCTANTS
4473  uint32_t resEnd = octree.getNumOctants() - tailOffset;
4474  uint32_t nofResidents = resEnd - headOffset;
4475  uint32_t octCounter = 0;
4476  for(uint32_t i = headOffset; i < resEnd; ++i){
4477  octree.octants[octCounter] = octree.octants[i];
4478  userData.move(i,octCounter);
4479  ++octCounter;
4480  }
4481  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
4482  octree.octants.resize(newCounter);
4483  userData.resize(newCounter);
4484  //MOVE RESIDENTS IN RIGHT POSITION
4485  uint32_t resCounter = nofNewHead + nofResidents - 1;
4486  for(uint32_t k = 0; k < nofResidents ; ++k){
4487  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
4488  //TODO move data - DON
4489  userData.move(nofResidents - k - 1,resCounter - k);
4490  }
4491 
4492  //UNPACK BUFFERS AND BUILD NEW OCTANTS
4493  newCounter = 0;
4494  bool jumpResident = false;
4495 
4496  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4497  //TODO change new octants counting, probably you have to communicate the number of news per proc
4498  uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];//(uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((double)octantBytes / (double)(CHAR_BIT/8)));
4499  if(rbit->first > rank && !jumpResident){
4500  newCounter += nofResidents ;
4501  jumpResident = true;
4502  }
4503  for(int i = nofNewPerProc - 1; i >= 0; --i){
4504  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
4505  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
4506  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&z,1,MPI_UINT32_T,comm);
4507  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
4508  octree.octants[newCounter] = Class_Octant<3>(l,x,y,z);
4509  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
4510  octree.octants[newCounter].setMarker(m);
4511  for(int j = 0; j < 16; ++j){
4512  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
4513  octree.octants[newCounter].info[j] = info[j];
4514  }
4515  //TODO Unpack data
4516  userData.scatter(rbit->second,newCounter);
4517  ++newCounter;
4518  }
4519  }
4520 #if defined(__INTEL_COMPILER) || defined(__ICC)
4521 #else
4522  octree.octants.shrink_to_fit();
4523 #endif
4524  userData.shrink();
4525 
4526  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
4527  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
4528  delete [] displays; displays = NULL;
4529  delete [] req; req = NULL;
4530  delete [] stats; stats = NULL;
4531  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
4532 
4533  //Update and ghosts here
4534  updateLoadBalance();
4535  setPboundGhosts();
4536  uint32_t nofGhosts = getNumGhosts();
4537  userData.resizeGhost(nofGhosts);
4538 
4539  }
4540  delete [] partition;
4541  partition = NULL;
4542 
4543  //Write info of final partition on log
4544  log.writeLog(" ");
4545  log.writeLog(" Final Parallel partition : ");
4546  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4547  for(int ii=1; ii<nproc; ii++){
4548  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4549  }
4550  log.writeLog(" ");
4551  log.writeLog("---------------------------------------------");
4552 
4553  };
4554 #endif /* NOMPI */
4555  //=================================================================================//
4556 
4557 private:
4558  void updateAdapt(){ //update Class_Para_Tree members after a refine and/or coarse
4559 #if NOMPI==0
4560  if(serial)
4561  {
4562 #endif
4563  max_depth = octree.local_max_depth;
4564  global_num_octants = octree.getNumOctants();
4565  for(int p = 0; p < nproc; ++p){
4566  partition_range_globalidx[p] = global_num_octants - 1;
4567  }
4568 #if NOMPI==0
4569  }
4570  else
4571  {
4572  //update max_depth
4573  error_flag = MPI_Allreduce(&octree.local_max_depth,&max_depth,1,MPI_UINT8_T,MPI_MAX,comm);
4574  //update global_num_octants
4575  uint64_t local_num_octants = (uint64_t) octree.getNumOctants();
4576  error_flag = MPI_Allreduce(&local_num_octants,&global_num_octants,1,MPI_UINT64_T,MPI_SUM,comm);
4577  //update partition_range_globalidx
4578  uint64_t* rbuff = new uint64_t[nproc];
4579  error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
4580  for(int p = 0; p < nproc; ++p){
4581  partition_range_globalidx[p] = 0;
4582  for(int pp = 0; pp <=p; ++pp)
4583  partition_range_globalidx[p] += rbuff[pp];
4584  --partition_range_globalidx[p];
4585  }
4586  //update partition_range_position
4587  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
4588  error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
4589  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
4590  error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
4591  delete [] rbuff; rbuff = NULL;
4592  }
4593 #endif
4594  };
4595 
4596  //=================================================================================//
4597 
4598  void updateAfterCoarse(){ //update Class_Para_Tree members and delete overlapping octants after a coarse
4599 #if NOMPI==0
4600  if(serial){
4601 #endif
4602  updateAdapt();
4603 #if NOMPI==0
4604  }
4605  else{
4606  //Only if parallel
4607  updateAdapt();
4608  uint64_t lastDescMortonPre, firstDescMortonPost;
4609  lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4610  firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4611  octree.checkCoarse(lastDescMortonPre, firstDescMortonPost);
4612  updateAdapt();
4613  }
4614 #endif
4615  };
4616 
4617  //=================================================================================//
4618 
4619  void updateAfterCoarse(u32vector & mapidx){ //update Class_Para_Tree members and delete overlapping octants after a coarse
4620 #if NOMPI==0
4621  if(serial){
4622 #endif
4623  updateAdapt();
4624 #if NOMPI==0
4625  }
4626  else{
4627  //Only if parallel
4628  updateAdapt();
4629  uint64_t lastDescMortonPre, firstDescMortonPost;
4630  lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4631  firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4632  octree.checkCoarse(lastDescMortonPre, firstDescMortonPost, mapidx);
4633  updateAdapt();
4634  }
4635 #endif
4636  };
4637 
4638  //=================================================================================//
4639 
4640 #if NOMPI==0
4641  void commMarker(){ // communicates marker of ghosts
4642  // borderPerProcs has to be built
4643 
4644  //PACK (mpi) LEVEL AND MARKER OF BORDER OCTANTS IN CHAR BUFFERS WITH SIZE (map value) TO BE SENT TO THE RIGHT PROCESS (map key)
4645  //it visits every element in bordersPerProc (one for every neighbor proc)
4646  //for every element it visits the border octants it contains and pack its marker in a new structure, sendBuffers
4647  //this map has an entry Class_Comm_Buffer for every proc containing the size in bytes of the buffer and the octants marker
4648  //to be sent to that proc packed in a char* buffer
4649  int8_t marker;
4650  bool mod;
4651  map<int,Class_Comm_Buffer> sendBuffers;
4652  map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
4653  uint32_t pbordersOversize = 0;
4654  for(map<int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
4655  pbordersOversize += bit->second.size();
4656  int buffSize = bit->second.size() * (int)ceil((double)(global3D.markerBytes + global3D.boolBytes) / (double)(CHAR_BIT/8));
4657  int key = bit->first;
4658  const vector<uint32_t> & value = bit->second;
4659  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
4660  int pos = 0;
4661  int nofBorders = value.size();
4662  for(int i = 0; i < nofBorders; ++i){
4663  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
4664  const Class_Octant<3> & octant = octree.octants[value[i]];
4665  marker = octant.getMarker();
4666  mod = octant.info[15];
4667  error_flag = MPI_Pack(&marker,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4668  error_flag = MPI_Pack(&mod,1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4669  }
4670  }
4671 
4672  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
4673  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
4674  //and stored in the recvBufferSizePerProc structure
4675  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
4676  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
4677  int nReq = 0;
4678  map<int,int> recvBufferSizePerProc;
4679  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4680  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4681  recvBufferSizePerProc[sit->first] = 0;
4682  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
4683  ++nReq;
4684  }
4685  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4686  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4687  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4688  ++nReq;
4689  }
4690  MPI_Waitall(nReq,req,stats);
4691 
4692  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
4693  //recvBuffers structure is declared and each buffer is initialized to the right size
4694  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
4695  //at the same time every process compute the size in bytes of all the level and marker of ghost octants
4696  uint32_t nofBytesOverProc = 0;
4697  map<int,Class_Comm_Buffer> recvBuffers;
4698  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4699  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4700  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
4701  }
4702  nReq = 0;
4703  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4704  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
4705  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
4706  ++nReq;
4707  }
4708  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4709  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4710  ++nReq;
4711  }
4712  MPI_Waitall(nReq,req,stats);
4713 
4714  //UNPACK BUFFERS AND BUILD GHOSTS CONTAINER OF CLASS_LOCAL_TREE
4715  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked octant by octant.
4716  //every ghost octant is built and put in the ghost vector
4717  uint32_t ghostCounter = 0;
4718  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
4719  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
4720  int pos = 0;
4721  int nofGhostsPerProc = int(rrit->second.commBufferSize / ((uint32_t) (global3D.markerBytes + global3D.boolBytes)));
4722  for(int i = 0; i < nofGhostsPerProc; ++i){
4723  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&marker,1,MPI_INT8_T,comm);
4724  octree.ghosts[ghostCounter].setMarker(marker);
4725  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&mod,1,MPI::BOOL,comm);
4726  octree.ghosts[ghostCounter].info[15] = mod;
4727  ++ghostCounter;
4728  }
4729  }
4730  recvBuffers.clear();
4731  sendBuffers.clear();
4732  recvBufferSizePerProc.clear();
4733  delete [] req; req = NULL;
4734  delete [] stats; stats = NULL;
4735 
4736  };
4737 #endif
4738  //=================================================================================//
4739 
4740  void balance21(bool const first){
4741 #if NOMPI==0
4742  bool globalDone = true, localDone = false;
4743  int iteration = 0;
4744 
4745  commMarker();
4746  octree.preBalance21(true);
4747 
4748  if (first){
4749  log.writeLog("---------------------------------------------");
4750  log.writeLog(" 2:1 BALANCE (balancing Marker before Adapt)");
4751  log.writeLog(" ");
4752  log.writeLog(" Iterative procedure ");
4753  log.writeLog(" ");
4754  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4755 
4756  commMarker();
4757 
4758  localDone = octree.localBalance(true);
4759  commMarker();
4760  octree.preBalance21(false);
4761  MPI_Barrier(comm);
4762  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4763 
4764  while(globalDone){
4765  iteration++;
4766  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4767  commMarker();
4768  localDone = octree.localBalance(false);
4769  commMarker();
4770  octree.preBalance21(false);
4771  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4772  }
4773 
4774  commMarker();
4775  log.writeLog(" Iteration : Finalizing ");
4776  log.writeLog(" ");
4777  //localDone = octree.localBalance(false);
4778  //commMarker();
4779  //octree.preBalance21(true);
4780  //commMarker();
4781 
4782  log.writeLog(" 2:1 Balancing reached ");
4783  log.writeLog(" ");
4784  log.writeLog("---------------------------------------------");
4785 
4786  }
4787  else{
4788 
4789  commMarker();
4790  MPI_Barrier(comm);
4791  localDone = octree.localBalanceAll(true);
4792  commMarker();
4793  octree.preBalance21(false);
4794  MPI_Barrier(comm);
4795  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4796 
4797  while(globalDone){
4798  iteration++;
4799  commMarker();
4800  localDone = octree.localBalanceAll(false);
4801  commMarker();
4802  octree.preBalance21(false);
4803  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4804  }
4805 
4806  commMarker();
4807 // localDone = octree.localBalance(false);
4808 // commMarker();
4809 // octree.preBalance21(false);
4810 // commMarker();
4811 
4812  }
4813 #else
4814  bool localDone = false;
4815  int iteration = 0;
4816 
4817  octree.preBalance21(true);
4818 
4819  if (first){
4820  log.writeLog("---------------------------------------------");
4821  log.writeLog(" 2:1 BALANCE (balancing Marker before Adapt)");
4822  log.writeLog(" ");
4823  log.writeLog(" Iterative procedure ");
4824  log.writeLog(" ");
4825  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4826 
4827 
4828  localDone = octree.localBalance(true);
4829  octree.preBalance21(false);
4830 
4831  while(localDone){
4832  iteration++;
4833  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4834  localDone = octree.localBalance(false);
4835  octree.preBalance21(false);
4836  }
4837 
4838  log.writeLog(" Iteration : Finalizing ");
4839  log.writeLog(" ");
4840 // localDone = octree.localBalance(false);
4841 // octree.preBalance21(false);
4842 
4843  log.writeLog(" 2:1 Balancing reached ");
4844  log.writeLog(" ");
4845  log.writeLog("---------------------------------------------");
4846 
4847  }
4848  else{
4849 
4850  localDone = octree.localBalanceAll(true);
4851  octree.preBalance21(false);
4852 
4853  while(localDone){
4854  iteration++;
4855  localDone = octree.localBalanceAll(false);
4856  octree.preBalance21(false);
4857  }
4858 
4859 // localDone = octree.localBalance(false);
4860 // octree.preBalance21(false);
4861 
4862  }
4863 
4864 #endif /* NOMPI */
4865  }
4866 
4867  //=================================================================================//
4868 
4869 public:
4872  bool adapt(){
4873  bool globalDone = false, localDone = false;
4874  uint32_t nocts = octree.getNumOctants();
4875  vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
4876 
4877  for (iter = octree.octants.begin(); iter != iterend; iter++){
4878  iter->info[12] = false;
4879  iter->info[13] = false;
4880  iter->info[15] = false;
4881  }
4882 #if NOMPI==0
4883  if(serial){
4884 #endif
4885  log.writeLog("---------------------------------------------");
4886  log.writeLog(" ADAPT (Refine/Coarse)");
4887  log.writeLog(" ");
4888 
4889  // 2:1 Balance
4890  balance21(true);
4891 
4892  log.writeLog(" ");
4893  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4894 
4895  // Refine
4896  while(octree.refine());
4897 
4898  if (octree.getNumOctants() > nocts)
4899  localDone = true;
4900  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4901  nocts = octree.getNumOctants();
4902  updateAdapt();
4903 
4904  // Coarse
4905  while(octree.coarse());
4906  updateAfterCoarse();
4907 // balance21(false);
4908 // while(octree.refine());
4909 // updateAdapt();
4910  if (octree.getNumOctants() < nocts){
4911  localDone = true;
4912  }
4913  nocts = octree.getNumOctants();
4914 
4915  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
4916 #if NOMPI==0
4917  MPI_Barrier(comm);
4918  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4919 #endif
4920  log.writeLog(" ");
4921  log.writeLog("---------------------------------------------");
4922 #if NOMPI==0
4923  }
4924  else{
4925  log.writeLog("---------------------------------------------");
4926  log.writeLog(" ADAPT (Refine/Coarse)");
4927  log.writeLog(" ");
4928 
4929  // 2:1 Balance
4930  balance21(true);
4931 
4932  log.writeLog(" ");
4933  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4934 
4935  // Refine
4936  while(octree.refine());
4937  if (octree.getNumOctants() > nocts){
4938  localDone = true;
4939  }
4940  updateAdapt();
4941  setPboundGhosts();
4942  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4943  nocts = octree.getNumOctants();
4944 
4945  // Coarse
4946  while(octree.coarse());
4947  updateAfterCoarse();
4948  setPboundGhosts();
4949 // balance21(false);
4950 // while(octree.refine());
4951 // updateAdapt();
4952 // setPboundGhosts();
4953  if (octree.getNumOctants() < nocts){
4954  localDone = true;
4955  }
4956  nocts = octree.getNumOctants();
4957 
4958  MPI_Barrier(comm);
4959  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4960  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4961  log.writeLog(" ");
4962  log.writeLog("---------------------------------------------");
4963  }
4964  status += globalDone;
4965  return globalDone;
4966 #else
4967  status += localDone;
4968  return localDone;
4969 #endif
4970  };
4971 
4972  //=================================================================================//
4973 
4974 private:
4978  bool adapt_mapidx(){ //call refine and coarse on the local tree
4979  //TODO recoding for adapting with abs(marker) > 1
4980  bool globalDone = false, localDone = false;
4981  uint32_t nocts = octree.getNumOctants();
4982  vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
4983 
4984  for (iter = octree.octants.begin(); iter != iterend; iter++){
4985  iter->info[12] = false;
4986  iter->info[13] = false;
4987  iter->info[15] = false;
4988  }
4989 
4990  // mapidx init
4991  mapidx.clear();
4992  mapidx.resize(nocts);
4993 #if defined(__INTEL_COMPILER) || defined(__ICC)
4994 #else
4995  mapidx.shrink_to_fit();
4996 #endif
4997  for (uint32_t i=0; i<nocts; i++){
4998  mapidx[i] = i;
4999  }
5000 #if NOMPI==0
5001  if(serial){
5002 #endif
5003  log.writeLog("---------------------------------------------");
5004  log.writeLog(" ADAPT (Refine/Coarse)");
5005  log.writeLog(" ");
5006 
5007  // 2:1 Balance
5008  balance21(true);
5009 
5010  log.writeLog(" ");
5011  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5012 
5013  // Refine
5014  while(octree.refine(mapidx));
5015 
5016  if (octree.getNumOctants() > nocts)
5017  localDone = true;
5018  nocts = octree.getNumOctants();
5019  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(nocts)));
5020  updateAdapt();
5021 
5022  // Coarse
5023  while(octree.coarse(mapidx));
5024  updateAfterCoarse(mapidx);
5025 // balance21(false);
5026 // while(octree.refine(mapidx));
5027 // updateAdapt();
5028  if (octree.getNumOctants() < nocts){
5029  localDone = true;
5030  }
5031  nocts = octree.getNumOctants();
5032 
5033  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5034 #if NOMPI==0
5035  MPI_Barrier(comm);
5036  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5037 #endif
5038  log.writeLog(" ");
5039  log.writeLog("---------------------------------------------");
5040 #if NOMPI==0
5041  }
5042  else{
5043  log.writeLog("---------------------------------------------");
5044  log.writeLog(" ADAPT (Refine/Coarse)");
5045  log.writeLog(" ");
5046 
5047  // 2:1 Balance
5048  balance21(true);
5049 
5050  log.writeLog(" ");
5051  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5052 
5053  // Refine
5054  while(octree.refine(mapidx));
5055  if (octree.getNumOctants() > nocts)
5056  localDone = true;
5057  updateAdapt();
5058  setPboundGhosts();
5059  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5060  nocts = octree.getNumOctants();
5061 
5062  // Coarse
5063  while(octree.coarse(mapidx));
5064  updateAfterCoarse(mapidx);
5065  setPboundGhosts();
5066 // balance21(false);
5067 // while(octree.refine(mapidx));
5068 // updateAdapt();
5069 // setPboundGhosts();
5070  if (octree.getNumOctants() < nocts){
5071  localDone = true;
5072  }
5073  nocts = octree.getNumOctants();
5074 
5075  MPI_Barrier(comm);
5076  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5077  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5078  log.writeLog(" ");
5079  log.writeLog("---------------------------------------------");
5080  }
5081  return globalDone;
5082 #else
5083  return localDone;
5084 #endif
5085  };
5086 
5087  // =============================================================================== //
5088 
5089 public:
5094  bool adapt(bool mapper_flag){
5095 
5096  bool done = false;
5097 
5098  if (mapper_flag){
5099  done = adapt_mapidx();
5100  status += done;
5101  return done;
5102  }
5103  else{
5104  done = adapt();
5105  status += done;
5106  return done;
5107  }
5108 
5109  };
5110 
5111  // =============================================================================== //
5112 
5113  // TODO TEMPORARY!!!!
5120  bool adapt(u32vector & mapper){
5121 
5122  bool done = false;
5123  done = adapt_mapidx();
5124  status += done;
5125  mapper.clear();
5126  mapper = mapidx;
5127  return done;
5128 
5129  };
5130 
5131  // =============================================================================== //
5132 
5141  void getMapping(uint32_t & idx, u32vector & mapper, vector<bool> & isghost){
5142 
5143  uint32_t i, nocts = getNumOctants();
5144  uint32_t nghbro = octree.last_ghost_bros.size();;
5145 
5146  mapper.clear();
5147  isghost.clear();
5148 
5149  mapper.push_back(mapidx[idx]);
5150  isghost.push_back(false);
5151  if (getIsNewC(idx)){
5152  if (idx < nocts-1 || !nghbro){
5153  for (i=1; i<global3D.nchildren; i++){
5154  mapper.push_back(mapidx[idx]+i);
5155  isghost.push_back(false);
5156  }
5157  }
5158  else if (idx == nocts-1 && nghbro){
5159  for (i=1; i<global3D.nchildren-nghbro; i++){
5160  mapper.push_back(mapidx[idx]+i);
5161  isghost.push_back(false);
5162  }
5163  for (i=0; i<nghbro; i++){
5164  mapper.push_back(octree.last_ghost_bros[i]);
5165  isghost.push_back(true);
5166  }
5167  }
5168  }
5169 
5170  };
5171 
5172  // =============================================================================== //
5173 
5177  bool globalDone = false, localDone = false, cDone = false;
5178  uint32_t nocts = octree.getNumOctants();
5179  vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5180 
5181  for (iter = octree.octants.begin(); iter != iterend; iter++){
5182  iter->info[12] = false;
5183  iter->info[13] = false;
5184  iter->info[15] = false;
5185  }
5186 #if NOMPI==0
5187  if(serial){
5188 #endif
5189  log.writeLog("---------------------------------------------");
5190  log.writeLog(" ADAPT (GlobalRefine)");
5191  log.writeLog(" ");
5192 
5193  log.writeLog(" ");
5194  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5195 
5196  // Refine
5197  while(octree.globalRefine());
5198 
5199  if (octree.getNumOctants() > nocts)
5200  localDone = true;
5201  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5202  nocts = octree.getNumOctants();
5203  updateAdapt();
5204 
5205 #if NOMPI==0
5206  MPI_Barrier(comm);
5207  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5208 #endif
5209  log.writeLog(" ");
5210  log.writeLog("---------------------------------------------");
5211 #if NOMPI==0
5212  }
5213  else{
5214  log.writeLog("---------------------------------------------");
5215  log.writeLog(" ADAPT (Global Refine)");
5216  log.writeLog(" ");
5217 
5218  log.writeLog(" ");
5219  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5220 
5221  // Refine
5222  while(octree.globalRefine());
5223  if (octree.getNumOctants() > nocts)
5224  localDone = true;
5225  updateAdapt();
5226  setPboundGhosts();
5227  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5228  nocts = octree.getNumOctants();
5229 
5230  MPI_Barrier(comm);
5231  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5232  log.writeLog(" ");
5233  log.writeLog("---------------------------------------------");
5234  }
5235  return globalDone;
5236 #else
5237  return localDone;
5238 #endif
5239  }
5240 
5241  // =============================================================================== //
5242 
5250  bool adaptGlobalRefine(u32vector & mapidx) {
5251  //TODO recoding for adapting with abs(marker) > 1
5252  bool globalDone = false, localDone = false;
5253  uint32_t nocts = octree.getNumOctants();
5254  vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5255 
5256  for (iter = octree.octants.begin(); iter != iterend; iter++){
5257  iter->info[12] = false;
5258  iter->info[13] = false;
5259  iter->info[15] = false;
5260  }
5261 
5262  // mapidx init
5263  mapidx.clear();
5264  mapidx.resize(nocts);
5265 #if defined(__INTEL_COMPILER) || defined(__ICC)
5266 #else
5267  mapidx.shrink_to_fit();
5268 #endif
5269  for (uint32_t i=0; i<nocts; i++){
5270  mapidx[i] = i;
5271  }
5272 #if NOMPI==0
5273  if(serial){
5274 #endif
5275  log.writeLog("---------------------------------------------");
5276  log.writeLog(" ADAPT (Global Refine)");
5277  log.writeLog(" ");
5278 
5279  log.writeLog(" ");
5280  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5281 
5282  // Refine
5283  while(octree.globalRefine(mapidx));
5284 
5285  if (octree.getNumOctants() > nocts)
5286  localDone = true;
5287  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5288  nocts = octree.getNumOctants();
5289  updateAdapt();
5290 
5291 #if NOMPI==0
5292  MPI_Barrier(comm);
5293  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5294 #endif
5295  log.writeLog(" ");
5296  log.writeLog("---------------------------------------------");
5297 #if NOMPI==0
5298  }
5299  else{
5300  log.writeLog("---------------------------------------------");
5301  log.writeLog(" ADAPT (Global Refine)");
5302  log.writeLog(" ");
5303 
5304  log.writeLog(" ");
5305  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5306 
5307  // Refine
5308  while(octree.globalRefine(mapidx));
5309  if (octree.getNumOctants() > nocts)
5310  localDone = true;
5311  updateAdapt();
5312  setPboundGhosts();
5313  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5314  nocts = octree.getNumOctants();
5315 
5316  MPI_Barrier(comm);
5317  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5318  log.writeLog(" ");
5319  log.writeLog("---------------------------------------------");
5320  }
5321  return globalDone;
5322 #else
5323  return localDone;
5324 #endif
5325  }
5326 
5327  // =============================================================================== //
5328 
5332  bool globalDone = false, localDone = false, cDone = false;
5333  uint32_t nocts = octree.getNumOctants();
5334  vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5335 
5336  for (iter = octree.octants.begin(); iter != iterend; iter++){
5337  iter->info[12] = false;
5338  iter->info[13] = false;
5339  iter->info[15] = false;
5340  }
5341 #if NOMPI==0
5342  if(serial){
5343 #endif
5344  log.writeLog("---------------------------------------------");
5345  log.writeLog(" ADAPT (Global Coarse)");
5346  log.writeLog(" ");
5347 
5348  // 2:1 Balance
5349  balance21(true);
5350 
5351  log.writeLog(" ");
5352  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5353 
5354  // Coarse
5355  while(octree.globalCoarse());
5356  updateAfterCoarse();
5357  balance21(false);
5358  while(octree.refine());
5359  updateAdapt();
5360  if (octree.getNumOctants() < nocts){
5361  localDone = true;
5362  }
5363  nocts = octree.getNumOctants();
5364 
5365  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5366 #if NOMPI==0
5367  MPI_Barrier(comm);
5368  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5369 #endif
5370  log.writeLog(" ");
5371  log.writeLog("---------------------------------------------");
5372 #if NOMPI==0
5373  }
5374  else{
5375  log.writeLog("---------------------------------------------");
5376  log.writeLog(" ADAPT (Global Coarse)");
5377  log.writeLog(" ");
5378 
5379  // 2:1 Balance
5380  balance21(true);
5381 
5382  log.writeLog(" ");
5383  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5384 
5385  // Coarse
5386  while(octree.globalCoarse());
5387  updateAfterCoarse();
5388  setPboundGhosts();
5389  balance21(false);
5390  while(octree.refine());
5391  updateAdapt();
5392  setPboundGhosts();
5393  if (octree.getNumOctants() < nocts){
5394  localDone = true;
5395  }
5396  nocts = octree.getNumOctants();
5397 
5398 
5399  MPI_Barrier(comm);
5400  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5401  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5402  log.writeLog(" ");
5403  log.writeLog("---------------------------------------------");
5404  }
5405  return globalDone;
5406 #else
5407  return localDone;
5408 #endif
5409  }
5410 
5411  // =============================================================================== //
5412 
5420  bool adaptGlobalCoarse(u32vector & mapidx) {
5421  //TODO recoding for adapting with abs(marker) > 1
5422  bool globalDone = false, localDone = false;
5423  uint32_t nocts = octree.getNumOctants();
5424  vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5425 
5426  for (iter = octree.octants.begin(); iter != iterend; iter++){
5427  iter->info[12] = false;
5428  iter->info[13] = false;
5429  iter->info[15] = false;
5430  }
5431 
5432  // mapidx init
5433  mapidx.clear();
5434  mapidx.resize(nocts);
5435 #if defined(__INTEL_COMPILER) || defined(__ICC)
5436 #else
5437  mapidx.shrink_to_fit();
5438 #endif
5439  for (uint32_t i=0; i<nocts; i++){
5440  mapidx[i] = i;
5441  }
5442 #if NOMPI==0
5443  if(serial){
5444 #endif
5445  log.writeLog("---------------------------------------------");
5446  log.writeLog(" ADAPT (Global Coarse)");
5447  log.writeLog(" ");
5448 
5449  // 2:1 Balance
5450  balance21(true);
5451 
5452  log.writeLog(" ");
5453  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5454 
5455  // Coarse
5456  while(octree.globalCoarse(mapidx));
5457  updateAfterCoarse(mapidx);
5458  balance21(false);
5459  while(octree.refine(mapidx));
5460  updateAdapt();
5461  if (octree.getNumOctants() < nocts){
5462  localDone = true;
5463  }
5464  nocts = octree.getNumOctants();
5465 
5466  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5467 #if NOMPI==0
5468  MPI_Barrier(comm);
5469  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5470 #endif
5471  log.writeLog(" ");
5472  log.writeLog("---------------------------------------------");
5473 #if NOMPI==0
5474  }
5475  else{
5476  log.writeLog("---------------------------------------------");
5477  log.writeLog(" ADAPT (Global Coarse)");
5478  log.writeLog(" ");
5479 
5480  // 2:1 Balance
5481  balance21(true);
5482 
5483  log.writeLog(" ");
5484  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5485 
5486  // Coarse
5487  while(octree.globalCoarse(mapidx));
5488  updateAfterCoarse(mapidx);
5489  setPboundGhosts();
5490  balance21(false);
5491  while(octree.refine(mapidx));
5492  updateAdapt();
5493  setPboundGhosts();
5494  if (octree.getNumOctants() < nocts){
5495  localDone = true;
5496  }
5497  nocts = octree.getNumOctants();
5498 
5499 
5500  MPI_Barrier(comm);
5501  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5502  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5503  log.writeLog(" ");
5504  log.writeLog("---------------------------------------------");
5505  }
5506  return globalDone;
5507 #else
5508  return localDone;
5509 #endif
5510  }
5511 
5512  //=================================================================================//
5513 #if NOMPI==0
5514 
5516  template<class Impl>
5518 
5519  //BUILD SEND BUFFERS
5520  map<int,Class_Comm_Buffer> sendBuffers;
5521  size_t fixedDataSize = userData.fixedSize();
5522  map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
5523  map<int,vector<uint32_t> >::iterator bitbegin = bordersPerProc.begin();
5524  for(map<int,vector<uint32_t> >::iterator bit = bitbegin; bit != bitend; ++bit){
5525  const int & key = bit->first;
5526  const vector<uint32_t> & pborders = bit->second;
5527  size_t buffSize = 0;
5528  size_t nofPbordersPerProc = pborders.size();
5529  if(fixedDataSize != 0){
5530  buffSize = fixedDataSize*nofPbordersPerProc;
5531  }
5532  else{
5533  for(size_t i = 0; i < nofPbordersPerProc; ++i){
5534  buffSize += userData.size(pborders[i]);
5535  }
5536  }
5537  //enlarge buffer to store number of pborders from this proc
5538  buffSize += sizeof(int);
5539  //build buffer for this proc
5540  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
5541  //store number of pborders from this proc at the begining
5542  MPI_Pack(&nofPbordersPerProc,1,MPI_INT,sendBuffers[key].commBuffer,sendBuffers[key].commBufferSize,&sendBuffers[key].pos,comm);
5543 
5544  //WRITE SEND BUFFERS
5545  for(size_t j = 0; j < nofPbordersPerProc; ++j){
5546  userData.gather(sendBuffers[key],pborders[j]);
5547  }
5548  }
5549 
5550  //Communicate Buffers Size
5551  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
5552  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
5553  int nReq = 0;
5554  map<int,int> recvBufferSizePerProc;
5555  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5556  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5557  recvBufferSizePerProc[sit->first] = 0;
5558  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5559  ++nReq;
5560  }
5561  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5562  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5563  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5564  ++nReq;
5565  }
5566  MPI_Waitall(nReq,req,stats);
5567 
5568  //Communicate Buffers
5569  //uint32_t nofBytesOverProc = 0;
5570  map<int,Class_Comm_Buffer> recvBuffers;
5571  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5572  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5573  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
5574  }
5575  nReq = 0;
5576  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5577  //nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5578  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5579  ++nReq;
5580  }
5581  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5582  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5583  ++nReq;
5584  }
5585  MPI_Waitall(nReq,req,stats);
5586 
5587  //READ RECEIVE BUFFERS
5588  int ghostOffset = 0;
5589  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
5590  map<int,Class_Comm_Buffer>::iterator rbitbegin = recvBuffers.begin();
5591  for(map<int,Class_Comm_Buffer>::iterator rbit = rbitbegin; rbit != rbitend; ++rbit){
5592  int nofGhostFromThisProc = 0;
5593  MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofGhostFromThisProc,1,MPI_INT,comm);
5594  for(int k = 0; k < nofGhostFromThisProc; ++k){
5595  userData.scatter(rbit->second, k+ghostOffset);
5596  }
5597  ghostOffset += nofGhostFromThisProc;
5598  }
5599 
5600  delete [] req; req = NULL;
5601  delete [] stats; stats = NULL;
5602 
5603 
5604  };
5605 #endif /* NOMPI */
5606  //=================================================================================//
5607 
5611  octree.computeConnectivity();
5612  }
5613 
5614  // =================================================================================== //
5615 
5619  octree.clearConnectivity();
5620  }
5621 
5622  // =================================================================================== //
5623 
5627  octree.updateConnectivity();
5628  }
5629 
5630  // =================================================================================== //
5631 
5635  octree.computeGhostsConnectivity();
5636  }
5637 
5638  // =================================================================================== //
5639 
5643  octree.clearGhostsConnectivity();
5644  }
5645 
5646  // =================================================================================== //
5647 
5651  octree.updateGhostsConnectivity();
5652  }
5653 
5654  // =================================================================================== //
5655 
5658  uint32_t getNumNodes() {
5659  return octree.nodes.size();
5660  }
5661 
5662  // =============================================================================== //
5663 
5667  const u32vector2D & getConnectivity(){
5668  return octree.connectivity;
5669  }
5670 
5671  // =============================================================================== //
5672 
5676  const u32vector2D & getGhostConnectivity(){
5677  return octree.ghostsconnectivity;
5678  }
5679 
5680  // =============================================================================== //
5685  u32vector getOctantConnectivity(uint32_t idx){
5686  return octree.connectivity[idx];
5687  }
5688 
5689  // =============================================================================== //
5695  return octree.connectivity[getIdx(oct)];
5696  }
5697 
5698  // =============================================================================== //
5699 
5704  u32vector getGhostOctantConnectivity(uint32_t idx){
5705  return octree.ghostsconnectivity[idx];
5706  }
5707 
5708  // =============================================================================== //
5709 
5715  return octree.ghostsconnectivity[getIdx(oct)];
5716  }
5717 
5718  // =============================================================================== //
5719 
5723  const u32vector2D & getNodes(){
5724  return octree.nodes;
5725  }
5726 
5727  // =============================================================================== //
5728 
5733  u32vector getNodeLogicalCoordinates(uint32_t inode){
5734  return octree.nodes[inode];
5735  }
5736 
5737  // =============================================================================== //
5738 
5742  const u32vector2D & getGhostNodes(){
5743  return octree.ghostsnodes;
5744  }
5745 
5746  // =============================================================================== //
5747 
5752  dvector getNodeCoordinates(uint32_t inode){
5753  vector<double> coords(3,0);
5754  coords[0] = trans.mapX(octree.nodes[inode][0]);
5755  coords[1] = trans.mapY(octree.nodes[inode][1]);
5756  coords[2] = trans.mapZ(octree.nodes[inode][2]);
5757  return coords;
5758  }
5759 
5760  // =============================================================================== //
5761 
5766  u32vector getGhostNodeLogicalCoordinates(uint32_t inode){
5767  return octree.ghostsnodes[inode];
5768  }
5769 
5770  // =============================================================================== //
5771 
5776  dvector getGhostNodeCoordinates(uint32_t inode){
5777  vector<double> coords(3,0);
5778  coords[0] = trans.mapX(octree.ghostsnodes[inode][0]);
5779  coords[1] = trans.mapY(octree.ghostsnodes[inode][1]);
5780  coords[2] = trans.mapZ(octree.ghostsnodes[inode][2]);
5781  return coords;
5782  }
5783 
5784  // =============================================================================== //
5785 
5795  vector<pair<pair<uint32_t, uint32_t>, pair<int, int> > > mapPablos(Class_Para_Tree<3> & ptree){
5796  //TODO DO IT WITH ITERATORS
5797  vector<pair<pair<uint32_t, uint32_t>, pair<int, int> > > mapper;
5798  uint64_t morton2 = 0, morton1 = 0, mortonlastdesc = 0, mortonfirstdesc = 0;
5799  uint32_t idx1 = 0, idx2 = 0;
5800  uint32_t nocts = octree.getNumOctants();
5801  uint32_t nocts2 = ptree.octree.getNumOctants();
5802  int owner;
5803  mapper.resize(nocts);
5804 #if NOMPI==0
5805  if (ptree.serial){
5806 #endif
5807  for (uint32_t i=0; i<nocts; i++){
5808  mapper[i].first.first = idx1;
5809  mapper[i].first.second = idx2;
5810  mapper[i].second.first = rank;
5811  mapper[i].second.second = rank;
5812  mortonfirstdesc = octree.octants[i].computeMorton();
5813  mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5814  while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5815  mapper[i].first.first = idx1;
5816  idx1++;
5817  if (idx1 < nocts2)
5818  morton1 = ptree.getOctant(idx1)->computeMorton();
5819  }
5820  if(idx1 > 0){
5821  idx1--;
5822  morton1 = ptree.getOctant(idx1)->computeMorton();
5823  }
5824  while(morton2 <= mortonlastdesc && idx2 < nocts2){
5825  mapper[i].first.second = idx2;
5826  idx2++;
5827  if (idx2 < nocts2)
5828  morton2 = ptree.getOctant(idx2)->computeMorton();
5829  }
5830  if (idx2 > 0){
5831  idx2--;
5832  morton2 = ptree.getOctant(idx2)->computeMorton();
5833  }
5834  }
5835 #if NOMPI==0
5836  }
5837  else{
5838  map<int,vector<uint64_t> > FirstMortonperproc, SecondMortonperproc;
5839  map<int,vector<uint64_t> > FirstMortonReceived, SecondMortonReceived;
5840  map<int,vector<uint32_t> > FirstIndexperproc, SecondIndexperproc;
5841  map<int,vector<uint32_t> > FirstLocalIndex, SecondLocalIndex;
5842  idx1 = 0;
5843  morton1 = 0;
5844  idx2 = 0;
5845  morton2 = 0;
5846  for (uint32_t i=0; i<nocts; i++){
5847  mortonfirstdesc = octree.octants[i].computeMorton();
5848  owner = ptree.findOwner(mortonfirstdesc);
5849  if (rank == owner){
5850  mapper[i].second.first = rank;
5851  while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5852  mapper[i].first.first = idx1;
5853  idx1++;
5854  if (idx1 < nocts2)
5855  morton1 = ptree.getOctant(idx1)->computeMorton();
5856  }
5857  if(idx1 > 0){
5858  idx1--;
5859  morton1 = ptree.getOctant(idx1)->computeMorton();
5860  }
5861  mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5862  owner = ptree.findOwner(mortonlastdesc);
5863  if (rank == owner){
5864  mapper[i].second.second = rank;
5865  mapper[i].first.second = idx2;
5866  while(morton2 <= mortonlastdesc && idx2 < nocts2){
5867  mapper[i].first.second = idx2;
5868  idx2++;
5869  if (idx2 < nocts2)
5870  morton2 = ptree.getOctant(idx2)->computeMorton();
5871  }
5872  if(idx2 > 0){
5873  idx2--;
5874  morton2 = ptree.getOctant(idx2)->computeMorton();
5875  }
5876  }
5877  else{
5878  mapper[i].second.second = owner;
5879  SecondMortonperproc[owner].push_back(mortonfirstdesc);
5880  SecondLocalIndex[owner].push_back(i);
5881  }
5882  }
5883  else{
5884  mapper[i].second.first = owner;
5885  FirstMortonperproc[owner].push_back(mortonfirstdesc);
5886  FirstLocalIndex[owner].push_back(i);
5887  mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5888  owner = ptree.findOwner(mortonlastdesc);
5889  if (rank == owner){
5890  mapper[i].second.second = rank;
5891  mapper[i].first.second = idx2;
5892  while(morton2 <= mortonlastdesc && idx2 < nocts2){
5893  mapper[i].first.second = idx2;
5894  idx2++;
5895  if (idx2 < nocts2)
5896  morton2 = ptree.getOctant(idx2)->computeMorton();
5897  }
5898  if(idx2 > 0){
5899  idx2--;
5900  morton2 = ptree.getOctant(idx2)->computeMorton();
5901  }
5902  }
5903  else{
5904  mapper[i].second.second = owner;
5905  SecondMortonperproc[owner].push_back(mortonfirstdesc);
5906  SecondLocalIndex[owner].push_back(i);
5907  }
5908  }
5909  }
5910 
5911  MPI_Barrier(comm);
5912 
5913 
5914  for(int iproc=0; iproc<nproc; iproc++){
5915  FirstMortonperproc[iproc].push_back(-1);
5916  SecondMortonperproc[iproc].push_back(-1);
5917  }
5918 
5919  {
5920 
5921  //COMM FIRST MORTON PER PROC
5922  map<int,Class_Comm_Buffer> sendBuffers;
5923  map<int,vector<uint64_t> >::iterator bitend = FirstMortonperproc.end();
5924  for(map<int,vector<uint64_t> >::iterator bit = FirstMortonperproc.begin(); bit != bitend; ++bit){
5925  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint64_t)) / (double)(CHAR_BIT/8));
5926  int key = bit->first;
5927  vector<uint64_t> & value = bit->second;
5928  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
5929  int pos = 0;
5930  int nofMortons = value.size();
5931  for(int i = 0; i < nofMortons; ++i){
5932  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
5933  uint64_t Morton = value[i];
5934  error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
5935  }
5936  }
5937 
5938  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
5939  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
5940  //and stored in the recvBufferSizePerProc structure
5941  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
5942  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
5943  int nReq = 0;
5944  map<int,int> recvBufferSizePerProc;
5945  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5946  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5947  recvBufferSizePerProc[sit->first] = 0;
5948  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5949  ++nReq;
5950  }
5951  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5952  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5953  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5954  ++nReq;
5955  }
5956  MPI_Waitall(nReq,req,stats);
5957 
5958  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
5959  //recvBuffers structure is declared and each buffer is initialized to the right size
5960  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
5961  //at the same time every process compute the size in bytes
5962  uint32_t nofBytesOverProc = 0;
5963  map<int,Class_Comm_Buffer> recvBuffers;
5964  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5965  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5966  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
5967  }
5968  nReq = 0;
5969  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5970  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5971  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5972  ++nReq;
5973  }
5974  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5975  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5976  ++nReq;
5977  }
5978  MPI_Waitall(nReq,req,stats);
5979 
5980  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
5981  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
5982  //every Morton is built and put in the MorontReceived vector
5983  uint32_t Mortoncounter = 0;
5984  uint64_t Morton = 0;
5985  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
5986  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
5987  int pos = 0;
5988  int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint64_t)));
5989  for(int i = 0; i < nofMortonPerProc-1; ++i){
5990  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
5991  FirstMortonReceived[rrit->first].push_back(Morton);
5992  ++Mortoncounter;
5993  }
5994  }
5995 
5996  recvBuffers.clear();
5997  sendBuffers.clear();
5998  recvBufferSizePerProc.clear();
5999  delete [] req; req = NULL;
6000  delete [] stats; stats = NULL;
6001 
6002  }
6003 
6004  {
6005  //COMM SECOND MORTON PER PROC
6006  map<int,Class_Comm_Buffer> sendBuffers;
6007  map<int,vector<uint64_t> >::iterator bitend = SecondMortonperproc.end();
6008  uint32_t pbordersOversize = 0;
6009  for(map<int,vector<uint64_t> >::iterator bit = SecondMortonperproc.begin(); bit != bitend; ++bit){
6010  pbordersOversize += bit->second.size();
6011  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint64_t)) / (double)(CHAR_BIT/8));
6012  int key = bit->first;
6013  vector<uint64_t> & value = bit->second;
6014  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
6015  int pos = 0;
6016  int nofMortons = value.size();
6017  for(int i = 0; i < nofMortons; ++i){
6018  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
6019  uint64_t Morton = value[i];
6020  error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6021  }
6022  }
6023 
6024  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
6025  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
6026  //and stored in the recvBufferSizePerProc structure
6027  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
6028  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
6029  int nReq = 0;
6030  map<int,int> recvBufferSizePerProc;
6031  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6032  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6033  recvBufferSizePerProc[sit->first] = 0;
6034  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6035  ++nReq;
6036  }
6037  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6038  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6039  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6040  ++nReq;
6041  }
6042  MPI_Waitall(nReq,req,stats);
6043 
6044  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
6045  //recvBuffers structure is declared and each buffer is initialized to the right size
6046  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
6047  //at the same time every process compute the size in bytes
6048  uint32_t nofBytesOverProc = 0;
6049  map<int,Class_Comm_Buffer> recvBuffers;
6050  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6051  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6052  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
6053  }
6054  nReq = 0;
6055  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6056  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6057  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6058  ++nReq;
6059  }
6060  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6061  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6062  ++nReq;
6063  }
6064  MPI_Waitall(nReq,req,stats);
6065 
6066  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
6067  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
6068  //every Morton is built and put in the MorontReceived vector
6069  uint32_t Mortoncounter = 0;
6070  uint64_t Morton = 0;
6071  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6072  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6073  int pos = 0;
6074  int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint64_t)));
6075  for(int i = 0; i < nofMortonPerProc-1; ++i){
6076  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
6077  SecondMortonReceived[rrit->first].push_back(Morton);
6078  ++Mortoncounter;
6079  }
6080  }
6081  recvBuffers.clear();
6082  sendBuffers.clear();
6083  recvBufferSizePerProc.clear();
6084  delete [] req; req = NULL;
6085  delete [] stats; stats = NULL;
6086  }
6087 
6088  //FIND FIRST INDEX FOR FIRST MORTONS IN EACH PROCESS
6089  for (int iproc=0; iproc<nproc; iproc++){
6090  vector<Class_Octant<3> >::iterator oend = octree.octants.end();
6091  vector<Class_Octant<3> >::iterator obegin = octree.octants.begin();
6092  vector<Class_Octant<3> >::iterator it = obegin;
6093  int nmortons = FirstMortonReceived[iproc].size();
6094  FirstIndexperproc[iproc].resize(nmortons);
6095  for (int idx=0; idx<nmortons; idx++){
6096  FirstIndexperproc[iproc][idx] = octree.getNumOctants()-1;
6097  uint32_t idx1 = 0;
6098  mortonfirstdesc = FirstMortonReceived[iproc][idx];
6099  morton1 = it->computeMorton();
6100  while(morton1 <= mortonfirstdesc && it != oend){
6101  idx1++;
6102  FirstIndexperproc[iproc][idx] = idx1;
6103  it++;
6104  if (it != oend)
6105  morton1 = it->computeMorton();
6106  }
6107  if(idx1 > 0){
6108  idx1--;
6109  it--;
6110  morton1 = ptree.getOctant(idx1)->computeMorton();
6111  }
6112  }
6113  }
6114 
6115  //FIND SECOND INDEX FOR SECOND MORTONS IN EACH PROCESS
6116  for (int iproc=0; iproc<nproc; iproc++){
6117  vector<Class_Octant<3> >::iterator oend = octree.octants.end();
6118  vector<Class_Octant<3> >::iterator obegin = octree.octants.begin();
6119  vector<Class_Octant<3> >::iterator it = obegin;
6120  int nmortons = SecondMortonReceived[iproc].size();
6121  SecondIndexperproc[iproc].resize(nmortons);
6122  for (int idx=0; idx<nmortons; idx++){
6123  SecondIndexperproc[iproc][idx] = octree.getNumOctants()-1;
6124  uint32_t idx2 = 0;
6125  mortonlastdesc = SecondMortonReceived[iproc][idx];
6126  morton2 = it->computeMorton();
6127  while(morton2 <= mortonlastdesc && it != oend){
6128  SecondIndexperproc[iproc][idx] = idx2;
6129  idx2++;
6130  it++;
6131  if (it != oend)
6132  morton2 = it->computeMorton();
6133  }
6134  if(idx2 > 0){
6135  idx2--;
6136  it--;
6137  morton2 = ptree.getOctant(idx2)->computeMorton();
6138  }
6139  }
6140  }
6141 
6142  for(int iproc=0; iproc<nproc; iproc++){
6143  FirstIndexperproc[iproc].push_back(-1);
6144  SecondIndexperproc[iproc].push_back(-1);
6145  }
6146 
6147 
6148  {
6149  //COMM BACK FIRST INDEX PER PROC
6150  map<int,Class_Comm_Buffer> sendBuffers;
6151  map<int,vector<uint32_t> >::iterator bitend = FirstIndexperproc.end();
6152  for(map<int,vector<uint32_t> >::iterator bit = FirstIndexperproc.begin(); bit != bitend; ++bit){
6153  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6154  int key = bit->first;
6155  vector<uint32_t> & value = bit->second;
6156  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
6157  int pos = 0;
6158  int nofIndices = value.size();
6159  for(int i = 0; i < nofIndices; ++i){
6160  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
6161  uint32_t Index = value[i];
6162  error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6163  }
6164  }
6165 
6166  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
6167  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
6168  //and stored in the recvBufferSizePerProc structure
6169  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
6170  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
6171  int nReq = 0;
6172  map<int,int> recvBufferSizePerProc;
6173  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6174  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6175  recvBufferSizePerProc[sit->first] = 0;
6176  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6177  ++nReq;
6178  }
6179  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6180  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6181  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6182  ++nReq;
6183  }
6184  MPI_Waitall(nReq,req,stats);
6185 
6186  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
6187  //recvBuffers structure is declared and each buffer is initialized to the right size
6188  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
6189  //at the same time every process compute the size in bytes
6190  uint32_t nofBytesOverProc = 0;
6191  map<int,Class_Comm_Buffer> recvBuffers;
6192  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6193  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6194  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
6195  }
6196  nReq = 0;
6197  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6198  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6199  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6200  ++nReq;
6201  }
6202  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6203  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6204  ++nReq;
6205  }
6206  MPI_Waitall(nReq,req,stats);
6207 
6208  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
6209  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
6210  //every Index is built and put in the mapper
6211  uint32_t Indexcounter = 0;
6212  uint32_t Index = 0;
6213  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6214  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6215  int pos = 0;
6216  int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint32_t)));
6217  for(int i = 0; i < nofIndexPerProc-1; ++i){
6218  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6219  mapper[FirstLocalIndex[rrit->first][i]].first.first = Index;
6220  ++Indexcounter;
6221  }
6222 
6223  }
6224  recvBuffers.clear();
6225  sendBuffers.clear();
6226  recvBufferSizePerProc.clear();
6227  delete [] req; req = NULL;
6228  delete [] stats; stats = NULL;
6229  }
6230 
6231  {
6232  //COMM BACK SECOND INDEX PER PROC
6233  map<int,Class_Comm_Buffer> sendBuffers;
6234  map<int,vector<uint32_t> >::iterator bitend = SecondIndexperproc.end();
6235  uint32_t pbordersOversize = 0;
6236  for(map<int,vector<uint32_t> >::iterator bit = SecondIndexperproc.begin(); bit != bitend; ++bit){
6237  pbordersOversize += bit->second.size();
6238  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6239  int key = bit->first;
6240  vector<uint32_t> & value = bit->second;
6241  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
6242  int pos = 0;
6243  int nofIndices = value.size();
6244  for(int i = 0; i < nofIndices; ++i){
6245  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
6246  uint64_t Index = value[i];
6247  error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6248  }
6249  }
6250 
6251  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
6252  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
6253  //and stored in the recvBufferSizePerProc structure
6254  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
6255  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
6256  int nReq = 0;
6257  map<int,int> recvBufferSizePerProc;
6258  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6259  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6260  recvBufferSizePerProc[sit->first] = 0;
6261  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6262  ++nReq;
6263  }
6264  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6265  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6266  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6267  ++nReq;
6268  }
6269  MPI_Waitall(nReq,req,stats);
6270 
6271  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
6272  //recvBuffers structure is declared and each buffer is initialized to the right size
6273  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
6274  //at the same time every process compute the size in bytes
6275  uint32_t nofBytesOverProc = 0;
6276  map<int,Class_Comm_Buffer> recvBuffers;
6277  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6278  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6279  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
6280  }
6281  nReq = 0;
6282  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6283  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6284  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6285  ++nReq;
6286  }
6287  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6288  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6289  ++nReq;
6290  }
6291  MPI_Waitall(nReq,req,stats);
6292 
6293  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
6294  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
6295  //every Index is built and put in the mapper
6296  uint32_t Indexcounter = 0;
6297  uint32_t Index = 0;
6298  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6299  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6300  int pos = 0;
6301  int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint32_t)));
6302  for(int i = 0; i < nofIndexPerProc-1; ++i){
6303  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6304  mapper[SecondLocalIndex[rrit->first][i]].first.second = Index;
6305  ++Indexcounter;
6306  }
6307  }
6308  recvBuffers.clear();
6309  sendBuffers.clear();
6310  recvBufferSizePerProc.clear();
6311  delete [] req; req = NULL;
6312  delete [] stats; stats = NULL;
6313  }
6314  }
6315 #endif /* NOMPI */
6316  //TODO PARALLEL VERSION - (BUGS!!!)
6317  return mapper;
6318  }
6319 
6320  // =============================================================================== //
6321 
6327  void writeLogical(string filename) {
6328 
6329  bool clear = false;
6330  if (octree.connectivity.size() == 0) {
6331  octree.computeConnectivity();
6332  clear = true;
6333  }
6334 
6335  stringstream name;
6336  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6337 
6338  ofstream out(name.str().c_str());
6339  if(!out.is_open()){
6340  stringstream ss;
6341  ss << filename << "*.vtu cannot be opened and it won't be written.";
6342  log.writeLog(ss.str());
6343  return;
6344  }
6345  int nofNodes = octree.nodes.size();
6346  int nofGhostNodes = octree.ghostsnodes.size();
6347  int nofOctants = octree.connectivity.size();
6348  int nofGhosts = octree.ghostsconnectivity.size();
6349  int nofAll = nofGhosts + nofOctants;
6350  out << "<?xml version=\"1.0\"?>" << endl
6351  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6352  << " <UnstructuredGrid>" << endl
6353  << " <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() << "\">" << endl;
6354  out << " <Points>" << endl
6355  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6356  << " " << std::fixed;
6357  for(int i = 0; i < nofNodes; i++)
6358  {
6359  for(int j = 0; j < 3; ++j)
6360  out << std::setprecision(6) << octree.nodes[i][j] << " ";
6361  if((i+1)%4==0 && i!=nofNodes-1)
6362  out << endl << " ";
6363  }
6364  for(int i = 0; i < nofGhostNodes; i++)
6365  {
6366  for(int j = 0; j < 3; ++j)
6367  out << std::setprecision(6) << octree.ghostsnodes[i][j] << " ";
6368  if((i+1)%4==0 && i!=nofNodes-1)
6369  out << endl << " ";
6370  }
6371  out << endl << " </DataArray>" << endl
6372  << " </Points>" << endl
6373  << " <Cells>" << endl
6374  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6375  << " ";
6376  for(int i = 0; i < nofOctants; i++)
6377  {
6378  for(int j = 0; j < global3D.nnodes; j++)
6379  {
6380  out << octree.connectivity[i][j] << " ";
6381  }
6382  if((i+1)%3==0 && i!=nofOctants-1)
6383  out << endl << " ";
6384  }
6385  for(int i = 0; i < nofGhosts; i++)
6386  {
6387  for(int j = 0; j < global3D.nnodes; j++)
6388  {
6389  out << octree.ghostsconnectivity[i][j] + nofNodes << " ";
6390  }
6391  if((i+1)%3==0 && i!=nofGhosts-1)
6392  out << endl << " ";
6393  }
6394  out << endl << " </DataArray>" << endl
6395  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6396  << " ";
6397  for(int i = 0; i < nofAll; i++)
6398  {
6399  out << (i+1)*global3D.nnodes << " ";
6400  if((i+1)%12==0 && i!=nofAll-1)
6401  out << endl << " ";
6402  }
6403  out << endl << " </DataArray>" << endl
6404  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6405  << " ";
6406  for(int i = 0; i < nofAll; i++)
6407  {
6408  int type;
6409  type = 11;
6410  out << type << " ";
6411  if((i+1)%12==0 && i!=nofAll-1)
6412  out << endl << " ";
6413  }
6414  out << endl << " </DataArray>" << endl
6415  << " </Cells>" << endl
6416  << " </Piece>" << endl
6417  << " </UnstructuredGrid>" << endl
6418  << "</VTKFile>" << endl;
6419 
6420  if(rank == 0){
6421  name.str("");
6422  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6423  ofstream pout(name.str().c_str());
6424  if(!pout.is_open()){
6425  stringstream ss;
6426  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6427  log.writeLog(ss.str());
6428  return;
6429  }
6430 
6431  pout << "<?xml version=\"1.0\"?>" << endl
6432  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6433  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6434  << " <PPointData>" << endl
6435  << " </PPointData>" << endl
6436  << " <PCellData Scalars=\"\">" << endl;
6437  pout << " </PCellData>" << endl
6438  << " <PPoints>" << endl
6439  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6440  << " </PPoints>" << endl;
6441  for(int i = 0; i < nproc; i++)
6442  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6443  pout << " </PUnstructuredGrid>" << endl
6444  << "</VTKFile>";
6445 
6446  pout.close();
6447 
6448  }
6449 #if NOMPI==0
6450  MPI_Barrier(comm);
6451 #endif
6452  if (clear){
6453  octree.clearConnectivity();
6454  }
6455  }
6456 
6457  // ----------------------------------------------------------------------------------- //
6458 
6464  void write(string filename) {
6465 
6466  bool clear = false;
6467  if (octree.connectivity.size() == 0) {
6468  octree.computeConnectivity();
6469  clear = true;
6470  }
6471 
6472  stringstream name;
6473  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6474 
6475  ofstream out(name.str().c_str());
6476  if(!out.is_open()){
6477  stringstream ss;
6478  ss << filename << "*.vtu cannot be opened and it won't be written.";
6479  log.writeLog(ss.str());
6480  return;
6481  }
6482  int nofNodes = octree.nodes.size();
6483  int nofGhostNodes = octree.ghostsnodes.size();
6484  int nofOctants = octree.connectivity.size();
6485  int nofGhosts = octree.ghostsconnectivity.size();
6486  int nofAll = nofGhosts + nofOctants;
6487  out << "<?xml version=\"1.0\"?>" << endl
6488  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6489  << " <UnstructuredGrid>" << endl
6490  << " <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() << "\">" << endl;
6491  out << " <Points>" << endl
6492  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6493  << " " << std::fixed;
6494  for(int i = 0; i < nofNodes; i++)
6495  {
6496  for(int j = 0; j < 3; ++j){
6497  if (j==0) out << std::setprecision(6) << trans.mapX(octree.nodes[i][j]) << " ";
6498  if (j==1) out << std::setprecision(6) << trans.mapY(octree.nodes[i][j]) << " ";
6499  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.nodes[i][j]) << " ";
6500  }
6501  if((i+1)%4==0 && i!=nofNodes-1)
6502  out << endl << " ";
6503  }
6504  for(int i = 0; i < nofGhostNodes; i++)
6505  {
6506  for(int j = 0; j < 3; ++j){
6507  if (j==0) out << std::setprecision(6) << trans.mapX(octree.ghostsnodes[i][j]) << " ";
6508  if (j==1) out << std::setprecision(6) << trans.mapY(octree.ghostsnodes[i][j]) << " ";
6509  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.ghostsnodes[i][j]) << " ";
6510  }
6511  if((i+1)%4==0 && i!=nofNodes-1)
6512  out << endl << " ";
6513  }
6514  out << endl << " </DataArray>" << endl
6515  << " </Points>" << endl
6516  << " <Cells>" << endl
6517  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6518  << " ";
6519  for(int i = 0; i < nofOctants; i++)
6520  {
6521  for(int j = 0; j < global3D.nnodes; j++)
6522  {
6523  out << octree.connectivity[i][j] << " ";
6524  }
6525  if((i+1)%3==0 && i!=nofOctants-1)
6526  out << endl << " ";
6527  }
6528  for(int i = 0; i < nofGhosts; i++)
6529  {
6530  for(int j = 0; j < global3D.nnodes; j++)
6531  {
6532  out << octree.ghostsconnectivity[i][j] + nofNodes << " ";
6533  }
6534  if((i+1)%3==0 && i!=nofGhosts-1)
6535  out << endl << " ";
6536  }
6537  out << endl << " </DataArray>" << endl
6538  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6539  << " ";
6540  for(int i = 0; i < nofAll; i++)
6541  {
6542  out << (i+1)*global3D.nnodes << " ";
6543  if((i+1)%12==0 && i!=nofAll-1)
6544  out << endl << " ";
6545  }
6546  out << endl << " </DataArray>" << endl
6547  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6548  << " ";
6549  for(int i = 0; i < nofAll; i++)
6550  {
6551  int type;
6552  type = 11;
6553  out << type << " ";
6554  if((i+1)%12==0 && i!=nofAll-1)
6555  out << endl << " ";
6556  }
6557  out << endl << " </DataArray>" << endl
6558  << " </Cells>" << endl
6559  << " </Piece>" << endl
6560  << " </UnstructuredGrid>" << endl
6561  << "</VTKFile>" << endl;
6562 
6563 
6564  if(rank == 0){
6565  name.str("");
6566  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6567  ofstream pout(name.str().c_str());
6568  if(!pout.is_open()){
6569  stringstream ss;
6570  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6571  log.writeLog(ss.str());
6572  return;
6573  }
6574 
6575  pout << "<?xml version=\"1.0\"?>" << endl
6576  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6577  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6578  << " <PPointData>" << endl
6579  << " </PPointData>" << endl
6580  << " <PCellData Scalars=\"\">" << endl;
6581  pout << " </PCellData>" << endl
6582  << " <PPoints>" << endl
6583  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6584  << " </PPoints>" << endl;
6585  for(int i = 0; i < nproc; i++)
6586  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6587  pout << " </PUnstructuredGrid>" << endl
6588  << "</VTKFile>";
6589 
6590  pout.close();
6591 
6592  }
6593 #if NOMPI==0
6594  MPI_Barrier(comm);
6595 #endif
6596  if (clear){
6597  octree.clearConnectivity();
6598  }
6599  }
6600  // =============================================================================== //
6601 
6602  // =============================================================================== //
6603 
6609  void writeTest(string filename, vector<double> data) {
6610 
6611  bool clear = false;
6612  if (octree.connectivity.size() == 0) {
6613  octree.computeConnectivity();
6614  clear = true;
6615  }
6616 
6617  stringstream name;
6618  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6619 
6620  ofstream out(name.str().c_str());
6621  if(!out.is_open()){
6622  stringstream ss;
6623  ss << filename << "*.vtu cannot be opened and it won't be written.";
6624  log.writeLog(ss.str());
6625  return;
6626  }
6627  int nofNodes = octree.nodes.size();
6628  int nofOctants = octree.connectivity.size();
6629  int nofAll = nofOctants;
6630  out << "<?xml version=\"1.0\"?>" << endl
6631  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6632  << " <UnstructuredGrid>" << endl
6633  << " <Piece NumberOfCells=\"" << octree.connectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() << "\">" << endl;
6634  out << " <CellData Scalars=\"Data\">" << endl;
6635  out << " <DataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6636  << " " << std::fixed;
6637  int ndata = octree.connectivity.size();
6638  for(int i = 0; i < ndata; i++)
6639  {
6640  out << std::setprecision(6) << data[i] << " ";
6641  if((i+1)%4==0 && i!=ndata-1)
6642  out << endl << " ";
6643  }
6644  out << endl << " </DataArray>" << endl
6645  << " </CellData>" << endl
6646  << " <Points>" << endl
6647  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6648  << " " << std::fixed;
6649  for(int i = 0; i < nofNodes; i++)
6650  {
6651  for(int j = 0; j < 3; ++j){
6652  if (j==0) out << std::setprecision(6) << trans.mapX(octree.nodes[i][j]) << " ";
6653  if (j==1) out << std::setprecision(6) << trans.mapY(octree.nodes[i][j]) << " ";
6654  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.nodes[i][j]) << " ";
6655  }
6656  if((i+1)%4==0 && i!=nofNodes-1)
6657  out << endl << " ";
6658  }
6659  out << endl << " </DataArray>" << endl
6660  << " </Points>" << endl
6661  << " <Cells>" << endl
6662  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6663  << " ";
6664  for(int i = 0; i < nofOctants; i++)
6665  {
6666  for(int j = 0; j < global3D.nnodes; j++)
6667  {
6668  out << octree.connectivity[i][j] << " ";
6669  }
6670  if((i+1)%3==0 && i!=nofOctants-1)
6671  out << endl << " ";
6672  }
6673  out << endl << " </DataArray>" << endl
6674  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6675  << " ";
6676  for(int i = 0; i < nofAll; i++)
6677  {
6678  out << (i+1)*global3D.nnodes << " ";
6679  if((i+1)%12==0 && i!=nofAll-1)
6680  out << endl << " ";
6681  }
6682  out << endl << " </DataArray>" << endl
6683  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6684  << " ";
6685  for(int i = 0; i < nofAll; i++)
6686  {
6687  int type;
6688  type = 11;
6689  out << type << " ";
6690  if((i+1)%12==0 && i!=nofAll-1)
6691  out << endl << " ";
6692  }
6693  out << endl << " </DataArray>" << endl
6694  << " </Cells>" << endl
6695  << " </Piece>" << endl
6696  << " </UnstructuredGrid>" << endl
6697  << "</VTKFile>" << endl;
6698 
6699 
6700  if(rank == 0){
6701  name.str("");
6702  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6703  ofstream pout(name.str().c_str());
6704  if(!pout.is_open()){
6705  stringstream ss;
6706  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6707  log.writeLog(ss.str());
6708  return;
6709  }
6710 
6711  pout << "<?xml version=\"1.0\"?>" << endl
6712  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6713  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6714  << " <PPointData>" << endl
6715  << " </PPointData>" << endl
6716  << " <PCellData Scalars=\"Data\">" << endl
6717  << " <PDataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\"/>" << endl
6718  << " </PCellData>" << endl
6719  << " <PPoints>" << endl
6720  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6721  << " </PPoints>" << endl;
6722  for(int i = 0; i < nproc; i++)
6723  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6724  pout << " </PUnstructuredGrid>" << endl
6725  << "</VTKFile>";
6726 
6727  pout.close();
6728 
6729  }
6730 #if NOMPI==0
6731  MPI_Barrier(comm);
6732 #endif
6733  if (clear){
6734  octree.clearConnectivity();
6735  }
6736  }
6737 
6738  // =============================================================================== //
6739 
6745  void writeTest(string filename, vector<double> data, vector<double> ghostdata) {
6746 
6747  bool clear = false;
6748  if (octree.connectivity.size() == 0) {
6749  octree.computeConnectivity();
6750  octree.computeGhostsConnectivity();
6751  clear = true;
6752  }
6753 
6754  stringstream name;
6755  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6756 
6757  ofstream out(name.str().c_str());
6758  if(!out.is_open()){
6759  stringstream ss;
6760  ss << filename << "*.vtu cannot be opened and it won't be written.";
6761  log.writeLog(ss.str());
6762  return;
6763  }
6764  int nofNodes = octree.nodes.size();
6765  int nofOctants = octree.connectivity.size();
6766  int nofGhostNodes = octree.ghostsnodes.size();
6767  int nofGhostOctants = octree.ghostsconnectivity.size();
6768  int nofAll = nofOctants + nofGhostOctants;
6769  out << "<?xml version=\"1.0\"?>" << endl
6770  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6771  << " <UnstructuredGrid>" << endl
6772  << " <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() << "\">" << endl;
6773  out << " <CellData Scalars=\"Data\">" << endl;
6774  out << " <DataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6775  << " " << std::fixed;
6776  int ndata = octree.connectivity.size();
6777  for(int i = 0; i < ndata; i++)
6778  {
6779  out << std::setprecision(6) << data[i] << " ";
6780  if((i+1)%4==0 && i!=ndata-1)
6781  out << endl << " ";
6782  }
6783  int nghostdata = octree.ghostsconnectivity.size();
6784  for(int i = 0; i < nghostdata; i++)
6785  {
6786  out << std::setprecision(6) << ghostdata[i] << " ";
6787  if((i+1)%4==0 && i!=nghostdata-1)
6788  out << endl << " ";
6789  }
6790  out << endl << " </DataArray>" << endl
6791  << " </CellData>" << endl
6792  << " <Points>" << endl
6793  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6794  << " " << std::fixed;
6795  for(int i = 0; i < nofNodes; i++)
6796  {
6797  for(int j = 0; j < 3; ++j){
6798  if (j==0) out << std::setprecision(6) << trans.mapX(octree.nodes[i][j]) << " ";
6799  if (j==1) out << std::setprecision(6) << trans.mapY(octree.nodes[i][j]) << " ";
6800  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.nodes[i][j]) << " ";
6801  }
6802  if((i+1)%4==0 && i!=nofNodes-1)
6803  out << endl << " ";
6804  }
6805  for(int i = 0; i < nofGhostNodes; i++)
6806  {
6807  for(int j = 0; j < 3; ++j){
6808  if (j==0) out << std::setprecision(6) << trans.mapX(octree.ghostsnodes[i][j]) << " ";
6809  if (j==1) out << std::setprecision(6) << trans.mapY(octree.ghostsnodes[i][j]) << " ";
6810  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.ghostsnodes[i][j]) << " ";
6811  }
6812  if((i+1)%4==0 && i!=nofGhostNodes-1)
6813  out << endl << " ";
6814  }
6815  out << endl << " </DataArray>" << endl
6816  << " </Points>" << endl
6817  << " <Cells>" << endl
6818  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6819  << " ";
6820  for(int i = 0; i < nofOctants; i++)
6821  {
6822  for(int j = 0; j < global3D.nnodes; j++)
6823  {
6824  out << octree.connectivity[i][j] << " ";
6825  }
6826  if((i+1)%3==0 && i!=nofOctants-1)
6827  out << endl << " ";
6828  }
6829  for(int i = 0; i < nofGhostOctants; i++)
6830  {
6831  for(int j = 0; j < global3D.nnodes; j++)
6832  {
6833  out << octree.ghostsconnectivity[i][j] + nofNodes << " ";
6834  }
6835  if((i+1)%3==0 && i!=nofGhostOctants-1)
6836  out << endl << " ";
6837  }
6838  out << endl << " </DataArray>" << endl
6839  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6840  << " ";
6841  for(int i = 0; i < nofAll; i++)
6842  {
6843  out << (i+1)*global3D.nnodes << " ";
6844  if((i+1)%12==0 && i!=nofAll-1)
6845  out << endl << " ";
6846  }
6847  out << endl << " </DataArray>" << endl
6848  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6849  << " ";
6850  for(int i = 0; i < nofAll; i++)
6851  {
6852  int type;
6853  type = 11;
6854  out << type << " ";
6855  if((i+1)%12==0 && i!=nofAll-1)
6856  out << endl << " ";
6857  }
6858  out << endl << " </DataArray>" << endl
6859  << " </Cells>" << endl
6860  << " </Piece>" << endl
6861  << " </UnstructuredGrid>" << endl
6862  << "</VTKFile>" << endl;
6863 
6864 
6865  if(rank == 0){
6866  name.str("");
6867  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6868  ofstream pout(name.str().c_str());
6869  if(!pout.is_open()){
6870  stringstream ss;
6871  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6872  log.writeLog(ss.str());
6873  return;
6874  }
6875 
6876  pout << "<?xml version=\"1.0\"?>" << endl
6877  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6878  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6879  << " <PPointData>" << endl
6880  << " </PPointData>" << endl
6881  << " <PCellData Scalars=\"Data\">" << endl
6882  << " <PDataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\"/>" << endl
6883  << " </PCellData>" << endl
6884  << " <PPoints>" << endl
6885  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6886  << " </PPoints>" << endl;
6887  for(int i = 0; i < nproc; i++)
6888  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6889  pout << " </PUnstructuredGrid>" << endl
6890  << "</VTKFile>";
6891 
6892  pout.close();
6893 
6894  }
6895 #if NOMPI==0
6896  MPI_Barrier(comm);
6897 #endif
6898  if (clear){
6899  octree.clearConnectivity();
6900  octree.clearGhostsConnectivity();
6901  }
6902  }
6903 
6904  // =============================================================================== //
6905 
6906 
6907 };
6908 
6909 
uint64_t computeMorton() const
void getFaceCenter(uint32_t idx, uint8_t iface, vector< double > &center)
Class_Para_Tree(double &X, double &Y, double &Z, double &L, ivector2D &XYZ, ivector &levels, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
Class_Octant< 3 > * getOctant(uint32_t idx)
const u32vector2D & getNodes()
void setBalance(uint32_t idx, bool balance)
bool getBalance(uint32_t idx)
dvector getEdgeCenter(uint8_t iedge)
Bundle char container for communications.
void getFaceCenter(Class_Octant< 3 > *oct, uint8_t iface, vector< double > &center)
vector< double > getEdgeCenter(uint32_t idx, uint8_t iedge)
Parallel Octree Manager Class.
u32vector getOwners(Class_Intersection< 3 > *inter)
void findNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
Class_Local_Tree< 3 > octree
Parallel Octree Manager Class - 3D specialization.
void findNeighbours(Class_Octant< 3 > *oct, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
void assign(uint32_t stride, uint32_t length)
Base class for data communications.
u32vector getOctantConnectivity(uint32_t idx)
void write(string filename)
double getZ(Class_Octant< 3 > *const oct)
uint8_t getBalanceCodimension() const
bool getPbound(uint8_t face) const
void findGhostNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours)
int8_t getMarker(uint32_t idx)
void getNormal(Class_Octant< 3 > *oct, uint8_t &iface, dvector &normal)
double getSize(Class_Octant< 3 > *const oct)
double getSize(uint32_t idx)
uint32_t getPointOwnerIdx(u32vector &point)
uint8_t getLevel(Class_Octant< 3 > *oct)
void setBalanceCodimension(uint8_t b21codim)
Class_Octant< 3 > * getPointOwner(dvector &point)
void getNormal(uint8_t &iface, vector< int8_t > &normal)
bool getBound(Class_Octant< 3 > *oct)
dvector getNormal(uint32_t idx, uint8_t &iface)
void getCenter(Class_Octant< 3 > *oct, dvector &center)
bool getIsNewC(Class_Octant< 3 > *oct)
vector< double > getFaceCenter(Class_Octant< 3 > *oct, uint8_t iface)
void getNodes(Class_Octant< 3 > *oct, dvector2D &nodes)
double getVolume(uint32_t idx)
bool adaptGlobalCoarse(u32vector &mapidx)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, uint8_t &level)
uint8_t getLocalMaxDepth() const
void loadBalance(uint8_t &level)
void resizeGhost(uint32_t newSize)
dvector getNodeCoordinates(uint32_t inode)
void setMarker(uint32_t idx, int8_t marker)
double X0
Definition: Class_Map.hpp:47
u32vector getOctantConnectivity(Class_Octant< 3 > *oct)
bool getNotBalance() const
bool adapt(bool mapper_flag)
double getX(Class_Octant< 3 > *const oct)
bool getPbound(Class_Octant< 3 > *oct)
uint32_t getX() const
vector< double > getNode(uint32_t idx, uint8_t inode)
double mapSize(uint32_t const &size)
Definition: Class_Map.tpp:63
dvector getNormal(Class_Intersection< 3 > *inter)
dvector2D getNodes(uint32_t idx)
uint32_t getNumOctants() const
uint32_t getLogicalPointOwnerIdx(dvector &point)
void scatter(Buffer &buff, const uint32_t e)
uint8_t getLevel(uint32_t idx)
double mapArea(uint64_t const &area)
Definition: Class_Map.tpp:68
bool getPbound(Class_Intersection< 3 > *inter)
void getNode(uint32_t idx, uint8_t inode, vector< double > &node)
bool adaptGlobalRefine(u32vector &mapidx)
void writeTest(string filename, vector< double > data, vector< double > ghostdata)
double getArea(uint32_t idx)
void writeLogical(string filename)
uint32_t getSize() const
u32vector getGhostNodeLogicalCoordinates(uint32_t inode)
Class_Para_Tree(string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
map< int, vector< uint32_t > > bordersPerProc
bool getIsGhost(uint32_t idx)
double getY(uint32_t idx)
void communicate(Class_Data_Comm_Interface< Impl > &userData)
int8_t getMarker(Class_Octant< 3 > *oct)
bool getBound(Class_Intersection< 3 > *inter)
Base class for data communications.
void setMarker(Class_Octant< 3 > *oct, int8_t marker)
void getMapping(uint32_t &idx, u32vector &mapper, vector< bool > &isghost)
void mapNodesIntersection(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
Definition: Class_Map.tpp:197
uint64_t getGlobalIdx(uint32_t idx)
Customized array definition.
Definition: Class_Array.hpp:37
Local octree portion for each process - 3D specialization.
bool getPbound(Class_Octant< 3 > *oct, uint8_t iface)
uint64_t getGhostGlobalIdx(uint32_t idx)
uint8_t getFace(Class_Intersection< 3 > *inter)
double getSize(Class_Intersection< 3 > *inter)
size_t size(const uint32_t e) const
dvector2D getNodes(Class_Octant< 3 > *oct)
void getEdgeCenter(uint32_t idx, uint8_t iedge, vector< double > &center)
void getCenter(uint32_t idx, vector< double > &center)
bool getIsNewR() const
const u32vector2D & getGhostNodes()
bool getIsGhost(Class_Octant< 3 > *oct)
Octant class definition - 3D specialization.
Intersection class definition - 3D specialization.
void scatter(Buffer &buff, const uint32_t e)
void mapCenter(double *&center, vector< double > &mapcenter)
Definition: Class_Map.tpp:78
void mapNodes(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
Definition: Class_Map.tpp:122
uint32_t getY() const
uint8_t getLevel(Class_Intersection< 3 > *inter)
uint64_t getGlobalIdx(Class_Octant< 3 > *oct)
dvector2D getNodes(Class_Intersection< 3 > *inter)
void move(const uint32_t from, const uint32_t to)
Class_Octant< 3 > * getGhostOctant(uint32_t idx)
void setBalance(bool balance)
bool getIsGhost(Class_Intersection< 3 > *inter)
void mapNode(vector< uint32_t > &node, vector< double > &mapnode)
Definition: Class_Map.tpp:179
uint32_t getIdx(Class_Octant< 3 > *oct)
uint64_t getVolume() const
uint32_t getPointOwnerIdx(dvector &point)
vector< double > getCenter(uint32_t idx)
double getY(Class_Octant< 3 > *const oct)
double Z0
Definition: Class_Map.hpp:49
vector< double > getFaceCenter(uint32_t idx, uint8_t iface)
void gather(Buffer &buff, const uint32_t e)
bool adapt(u32vector &mapper)
int8_t getMarker() const
double getVolume(Class_Octant< 3 > *const oct)
Class_Para_Tree(double X, double Y, double Z, double L, ivector2D &XYZ, ivector &levels, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
int findOwner(const uint64_t &morton)
uint64_t getArea() const
double getArea(Class_Intersection< 3 > *inter)
void getNodes(uint32_t idx, dvector2D &nodes)
vector< double > getCenter(Class_Intersection< 3 > *inter)
dvector getFaceCenter(uint8_t iface)
const u32vector2D & getConnectivity()
void resize(uint32_t newSize)
double getZ(uint32_t idx)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, dvector *weight=NULL)
void writeTest(string filename, vector< double > data)
Local octree portion for each process.
Class_Octant< 3 > * getPointOwner(u32vector &point)
bool getBound(uint8_t face) const
void gather(Buffer &buff, const uint32_t e)
uint8_t getLevel() const
dvector getGhostNodeCoordinates(uint32_t inode)
u32vector getGhostOctantConnectivity(uint32_t idx)
bool getIsNewC() const
double Y0
Definition: Class_Map.hpp:48
void getEdgeCenter(Class_Octant< 3 > *oct, uint8_t iedge, vector< double > &center)
Global variables used in PABLO - 3D specialization.
bool getBalance(Class_Octant< 3 > *oct)
uint8_t facenode[6][4]
uint32_t getZ() const
bool getBound(Class_Octant< 3 > *oct, uint8_t iface)
Class_Para_Tree(double X, double Y, double Z, double L, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
Class_Intersection< 3 > * getIntersection(uint32_t idx)
size_t size(const uint32_t e) const
void setMarker(int8_t marker)
u32vector getGhostOctantConnectivity(Class_Octant< 3 > *oct)
const u32vector2D & getGhostConnectivity()
vector< double > getEdgeCenter(Class_Octant< 3 > *oct, uint8_t iedge)
vector< double > getCenter(Class_Octant< 3 > *oct)
uint64_t * partition_range_globalidx
vector< pair< pair< uint32_t, uint32_t >, pair< int, int > > > mapPablos(Class_Para_Tree< 3 > &ptree)
u32vector getNodeLogicalCoordinates(uint32_t inode)
double getX(uint32_t idx)
void getNodes(u32vector2D &nodes)
void mapNormals(vector< int8_t > normal, vector< double > &mapnormal)
Definition: Class_Map.tpp:253
bool getFiner(Class_Intersection< 3 > *inter)
Class_Octant< 3 > * getLogicalPointOwner(dvector &point)
double getArea(Class_Octant< 3 > *const oct)
bool getIsNewR(Class_Octant< 3 > *oct)
void getNormal(uint32_t idx, uint8_t &iface, dvector &normal)
const Class_Global< 3 > & getGlobal()
void setBalance(Class_Octant< 3 > *oct, bool balance)
double mapX(uint32_t const &X)
Definition: Class_Map.tpp:33
bool getIsNewC(uint32_t idx)
uint32_t getNumGhosts() const
double mapZ(uint32_t const &Z)
Definition: Class_Map.tpp:43
dvector getNormal(Class_Octant< 3 > *oct, uint8_t &iface)
double mapVolume(uint64_t const &volume)
Definition: Class_Map.tpp:73
bool getIsNewR(uint32_t idx)
double mapY(uint32_t const &Y)
Definition: Class_Map.tpp:38