PABLO  0.1
PArallel Balanced Linear Octree
 All Classes Functions Variables Pages
Class_Para_Tree_2D.tpp
1 
23 // =================================================================================== //
24 // CLASS SPECIALIZATION //
25 // =================================================================================== //
26 
27 template<>
28 class Class_Para_Tree<2>{
29  // ------------------------------------------------------------------------------- //
30  // TYPEDEFS ----------------------------------------------------------------------- //
31 public:
32  typedef vector<Class_Octant<2> > OctantsType;
33  typedef vector<uint32_t> u32vector;
34  typedef vector<double> dvector;
35  typedef vector<vector<uint32_t> > u32vector2D;
36  typedef vector<vector<uint64_t> > u64vector2D;
37  typedef vector<vector<double> > dvector2D;
38  typedef vector<int> ivector;
39  typedef vector<vector<int> > ivector2D;
40 
41  // ------------------------------------------------------------------------------- //
42  // MEMBERS ----------------------------------------------------------------------- //
43 public:
44  //undistributed members
46  uint64_t* partition_last_desc;
48  uint64_t global_num_octants;
49  map<int,vector<uint32_t> > bordersPerProc;
50  int nproc;
51  uint8_t max_depth;
53  //distributed members
54  int rank;
57  //distributed adpapting memebrs
58  u32vector mapidx;
64  //auxiliary members
65  int error_flag;
66  bool serial;
68  //map member
71  //info member
72  uint64_t status;
75  //log member
78 #if NOMPI==0
79  MPI_Comm comm;
80 #endif
81 
82  // ------------------------------------------------------------------------------- //
83  // CONSTRUCTORS ------------------------------------------------------------------ //
84 public:
89 #if NOMPI==0
90  Class_Para_Tree(string logfile="PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD) : log(logfile,comm_),comm(comm_){
91 #else
92  Class_Para_Tree(string logfile="PABLO.log") : log(logfile){
93 #endif
94  serial = true;
95  error_flag = 0;
96  max_depth = 0;
97  global_num_octants = octree.getNumOctants();
98 #if NOMPI==0
99  error_flag = MPI_Comm_size(comm,&nproc);
100  error_flag = MPI_Comm_rank(comm,&rank);
101 #else
102  rank = 0;
103  nproc = 1;
104 #endif
105  partition_first_desc = new uint64_t[nproc];
106  partition_last_desc = new uint64_t[nproc];
107  partition_range_globalidx = new uint64_t[nproc];
108  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
109  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
110  for(int p = 0; p < nproc; ++p){
111  partition_range_globalidx[p] = 0;
112  partition_last_desc[p] = lastDescMorton;
113  partition_last_desc[p] = firstDescMorton;
114  }
115  // Write info log
116  log.writeLog("---------------------------------------------");
117  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
118  log.writeLog("---------------------------------------------");
119  log.writeLog(" ");
120  log.writeLog("---------------------------------------------");
121  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
122  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(2)));
123  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_2D)));
124  log.writeLog("---------------------------------------------");
125  log.writeLog(" ");
126 #if NOMPI==0
127  MPI_Barrier(comm);
128 #endif
129  };
130 
131  // =============================================================================== //
132 
141 #if NOMPI==0
142  Class_Para_Tree(double X, double Y, double Z, double L,string logfile="PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
143 #else
144  Class_Para_Tree(double X, double Y, double Z, double L,string logfile="PABLO.log"):trans(X,Y,Z,L),log(logfile){
145 #endif
146  serial = true;
147  error_flag = 0;
148  max_depth = 0;
149  global_num_octants = octree.getNumOctants();
150 #if NOMPI==0
151  error_flag = MPI_Comm_size(comm,&nproc);
152  error_flag = MPI_Comm_rank(comm,&rank);
153 #else
154  rank = 0;
155  nproc = 1;
156 #endif
157  partition_first_desc = new uint64_t[nproc];
158  partition_last_desc = new uint64_t[nproc];
159  partition_range_globalidx = new uint64_t[nproc];
160  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
161  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
162  for(int p = 0; p < nproc; ++p){
163  partition_range_globalidx[p] = 0;
164  partition_last_desc[p] = lastDescMorton;
165  partition_last_desc[p] = firstDescMorton;
166  }
167  // Write info log
168  log.writeLog("---------------------------------------------");
169  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
170  log.writeLog("---------------------------------------------");
171  log.writeLog(" ");
172  log.writeLog("---------------------------------------------");
173  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
174  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(2)));
175  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_2D)));
176  log.writeLog(" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
177  log.writeLog(" " + to_string(static_cast<unsigned long long>(Y)));
178  log.writeLog(" " + to_string(static_cast<unsigned long long>(Z)));
179  log.writeLog(" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
180  log.writeLog("---------------------------------------------");
181  log.writeLog(" ");
182 #if NOMPI==0
183  MPI_Barrier(comm);
184 #endif
185  };
186 
187  // =============================================================================== //
188 
199 #if NOMPI==0
200  Class_Para_Tree(double X, double Y, double Z, double L, ivector2D & XY, ivector & levels,string logfile="PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
201 #else
202  Class_Para_Tree(double X, double Y, double Z, double L, ivector2D & XY, ivector & levels,string logfile="PABLO.log"):trans(X,Y,Z,L),log(logfile){
203 #endif
204  uint8_t lev, iface;
205  uint32_t x0, y0;
206  uint32_t NumOctants = XY.size();
207  octree.octants.resize(NumOctants);
208  for (uint32_t i=0; i<NumOctants; i++){
209  lev = uint8_t(levels[i]);
210  x0 = uint32_t(XY[i][0]);
211  y0 = uint32_t(XY[i][1]);
212  Class_Octant<2> oct(lev, x0, y0,false);
213  oct.setBalance(false);
214  if (x0 == 0){
215  iface = 0;
216  oct.setBound(iface);
217  }
218  else if (x0 == global2D.max_length - oct.getSize()){
219  iface = 1;
220  oct.setBound(iface);
221  }
222  if (y0 == 0){
223  iface = 2;
224  oct.setBound(iface);
225  }
226  else if (y0 == global2D.max_length - oct.getSize()){
227  iface = 3;
228  oct.setBound(iface);
229  }
230  octree.octants[i] = oct;
231  }
232 
233  //ATTENTO if nompi deve aver l'else
234 #if NOMPI==0
235  error_flag = MPI_Comm_size(comm,&nproc);
236  error_flag = MPI_Comm_rank(comm,&rank);
237  serial = true;
238  if (nproc > 1 ) serial = false;
239 #else
240  serial = true;
241  nproc = 1;
242  rank = 0;
243 #endif
244  partition_first_desc = new uint64_t[nproc];
245  partition_last_desc = new uint64_t[nproc];
246  partition_range_globalidx = new uint64_t[nproc];
247 
248  setFirstDesc();
249  setLastDesc();
250  octree.updateLocalMaxDepth();
251  updateAdapt();
252 #if NOMPI==0
253  setPboundGhosts();
254 #endif
255  // Write info log
256  log.writeLog("---------------------------------------------");
257  log.writeLog("- PABLO PArallel Balanced Linear Octree -");
258  log.writeLog("---------------------------------------------");
259  log.writeLog(" ");
260  log.writeLog("---------------------------------------------");
261  log.writeLog("- PABLO restart -");
262  log.writeLog("---------------------------------------------");
263  log.writeLog(" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
264  log.writeLog(" Dimension : " + to_string(static_cast<unsigned long long>(2)));
265  log.writeLog(" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_2D)));
266  log.writeLog(" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
267  log.writeLog(" " + to_string(static_cast<unsigned long long>(Y)));
268  log.writeLog(" " + to_string(static_cast<unsigned long long>(Z)));
269  log.writeLog(" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
270  log.writeLog(" Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
271  log.writeLog("---------------------------------------------");
272  log.writeLog(" ");
273 #if NOMPI==0
274  MPI_Barrier(comm);
275 #endif
276  };
277 
278  // =============================================================================== //
279 
280  ~Class_Para_Tree(){
281  log.writeLog("---------------------------------------------");
282  log.writeLog("--------------- R.I.P. PABLO ----------------");
283  log.writeLog("---------------------------------------------");
284  log.writeLog("---------------------------------------------");
285  };
286 
287  // =============================================================================== //
288  // GET/SET METHODS --------------------------------------------------------------- //
289 
290  // Octant get/set Methods
295  return global2D;
296  }
297 
302  double getX(Class_Octant<2>* oct) {
303  return trans.mapX(oct->getX());
304  }
305 
310  double getY(Class_Octant<2>* oct) {
311  return trans.mapY(oct->getY());
312  }
313 
318  double getZ(Class_Octant<2>* oct) {
319  return trans.mapZ(oct->getZ());
320  }
321 
326  double getSize(Class_Octant<2>* oct) {
327  return trans.mapSize(oct->getSize());
328  }
329 
334  double getArea(Class_Octant<2>* oct) {
335  return trans.mapSize(oct->getArea());
336  }
337 
342  double getVolume(Class_Octant<2>* oct) {
343  return trans.mapArea(oct->getVolume());
344  }
345 
351  vector<double>& center) {
352  vector<double> center_ = oct->getCenter();
353  trans.mapCenter(center_, center);
354  }
355 
360  vector<double> getCenter(Class_Octant<2>* oct) {
361  vector<double> center;
362  vector<double> center_ = oct->getCenter();
363  trans.mapCenter(center_, center);
364  return center;
365  }
366 
372  vector<double> getFaceCenter(Class_Octant<2>* oct, uint8_t iface) {
373  vector<double> center;
374  vector<double> center_ = oct->getFaceCenter(iface);
375  trans.mapCenter(center_, center);
376  return center;
377  }
378 
384  void getFaceCenter(Class_Octant<2>* oct, uint8_t iface, vector<double>& center) {
385  vector<double> center_ = oct->getFaceCenter(iface);
386  trans.mapCenter(center_, center);
387  }
388 
394  dvector2D & nodes) {
395  u32vector2D nodes_;
396  oct->getNodes(nodes_);
397  trans.mapNodes(nodes_, nodes);
398  }
399 
404  dvector2D getNodes(Class_Octant<2>* oct){
405  dvector2D nodes;
406  u32vector2D nodes_;
407  oct->getNodes(nodes_);
408  trans.mapNodes(nodes_, nodes);
409  return nodes;
410  }
411 
418  uint8_t & iface,
419  dvector & normal) {
420  vector<int8_t> normal_;
421  oct->getNormal(iface, normal_);
422  trans.mapNormals(normal_, normal);
423  }
424 
431  uint8_t & iface){
432  dvector normal;
433  vector<int8_t> normal_;
434  oct->getNormal(iface, normal_);
435  trans.mapNormals(normal_, normal);
436  return normal;
437  }
438 
443  int8_t getMarker(Class_Octant<2>* oct){ // Get refinement/coarsening marker for idx-th octant
444  return oct->getMarker();
445  };
446 
451  uint8_t getLevel(Class_Octant<2>* oct){ // Get refinement/coarsening marker for idx-th octant
452  return oct->getLevel();
453  };
454 
460  bool getBound(Class_Octant<2>* oct, uint8_t iface){ // Get refinement/coarsening marker for idx-th octant
461  return oct->getBound(iface);
462  };
463 
469  bool getPbound(Class_Octant<2>* oct, uint8_t iface){ // Get refinement/coarsening marker for idx-th octant
470  return oct->getPbound(iface);
471  };
472 
478  int temp = 0;
479  for(int i = 0; i < global2D.nfaces; ++i)
480  temp += oct->getBound(i);
481  return temp != 0;
482  };
483 
488  bool getPbound(Class_Octant<2>* oct){ // Get refinement/coarsening marker for idx-th octant
489  int temp = 0;
490  for(int i = 0; i < global2D.nfaces; ++i)
491  temp += oct->getPbound(i);
492  return temp != 0;
493  };
494 
499  bool getBalance(Class_Octant<2>* oct){ // Get if balancing-blocked idx-th octant
500  return !oct->getNotBalance();
501  };
502 
503 #if NOMPI==0
504 
509  if (serial)
510  return false;
511  return (findOwner(oct->computeMorton()) != rank);
512  };
513 #endif
514 
520  return oct->getIsNewR();
521  };
522 
528  return oct->getIsNewC();
529  };
530 
536 #if NOMPI==0
537  if (getIsGhost(oct)){
538  uint32_t idx = octree.findGhostMorton(oct->computeMorton());
539  return octree.globalidx_ghosts[idx];
540  }
541 #endif
542  uint32_t idx = octree.findMorton(oct->computeMorton());
543  if (rank){
544  return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
545  }
546  return uint64_t(idx);
547  };
548 
553  uint32_t getIdx(Class_Octant<2>* oct){
554 #if NOMPI==0
555  if (getIsGhost(oct)){
556  return octree.findGhostMorton(oct->computeMorton());
557  }
558 #endif
559  return octree.findMorton(oct->computeMorton());
560  };
561 
566  void setMarker(Class_Octant<2>* oct, int8_t marker){ // Set refinement/coarsening marker for idx-th octant
567  oct->setMarker(marker);
568  };
569 
574  void setBalance(Class_Octant<2>* oct, bool balance){ // Set if balancing-blocked idx-th octant
575  oct->setBalance(!balance);
576  };
577 
578 
579 private:
580  // ------------------------------------------------------------------------------- //
581  //No pointer Octants get/set Methods
582 
587  double getX(Class_Octant<2> oct) {
588  return trans.mapX(oct.getX());
589  }
590 
595  double getY(Class_Octant<2> oct) {
596  return trans.mapY(oct.getY());
597  }
598 
603  double getZ(Class_Octant<2> oct) {
604  return trans.mapZ(oct.getZ());
605  }
606 
611  double getSize(Class_Octant<2> oct) {
612  return trans.mapSize(oct.getSize());
613  }
614 
619  double getArea(Class_Octant<2> oct) {
620  return trans.mapSize(oct.getArea());
621  }
622 
627  double getVolume(Class_Octant<2> oct) {
628  return trans.mapArea(oct.getVolume());
629  }
630 
635  void getCenter(Class_Octant<2> oct,
636  vector<double>& center) {
637  vector<double> center_ = oct.getCenter();
638  trans.mapCenter(center_, center);
639  }
640 
645  vector<double> getCenter(Class_Octant<2> oct) {
646  vector<double> center;
647  vector<double> center_ = oct.getCenter();
648  trans.mapCenter(center_, center);
649  return center;
650  }
651 
656  void getNodes(Class_Octant<2> oct,
657  dvector2D & nodes) {
658  u32vector2D nodes_;
659  oct.getNodes(nodes_);
660  trans.mapNodes(nodes_, nodes);
661  }
662 
667  dvector2D getNodes(Class_Octant<2> oct){
668  dvector2D nodes;
669  u32vector2D nodes_;
670  oct.getNodes(nodes_);
671  trans.mapNodes(nodes_, nodes);
672  return nodes;
673  }
674 
680  void getNormal(Class_Octant<2> oct,
681  uint8_t & iface,
682  dvector & normal) {
683  vector<int8_t> normal_;
684  oct.getNormal(iface, normal_);
685  trans.mapNormals(normal_, normal);
686  }
687 
693  dvector getNormal(Class_Octant<2> oct,
694  uint8_t & iface){
695  dvector normal;
696  vector<int8_t> normal_;
697  oct.getNormal(iface, normal_);
698  trans.mapNormals(normal_, normal);
699  return normal;
700  }
701 
706  int8_t getMarker(Class_Octant<2> oct){ // Get refinement/coarsening marker for idx-th octant
707  return oct.getMarker();
708  };
709 
714  uint8_t getLevel(Class_Octant<2> oct){ // Get refinement/coarsening marker for idx-th octant
715  return oct.getLevel();
716  };
717 
722  bool getBalance(Class_Octant<2> oct){ // Get if balancing-blocked idx-th octant
723  return !oct.getNotBalance();
724  };
725 
726 #if NOMPI==0
727 
731  bool getIsGhost(Class_Octant<2> oct){
732  return (findOwner(oct.computeMorton()) != rank);
733  };
734 #endif
735 
740  uint64_t getGlobalIdx(Class_Octant<2> oct){
741 #if NOMPI==0
742  if (getIsGhost(oct)){
743  uint32_t idx = octree.findGhostMorton(oct.computeMorton());
744  return octree.globalidx_ghosts[idx];
745  }
746  else{
747 #endif
748  uint32_t idx = octree.findMorton(oct.computeMorton());
749  if (rank){
750  return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
751  }
752  else{
753  return uint64_t(idx);
754  };
755 #if NOMPI==0
756  };
757 #endif
758  return global_num_octants;
759  };
760 
765  uint32_t getIdx(Class_Octant<2> oct){
766 #if NOMPI==0
767  if (getIsGhost(oct)){
768  return octree.findGhostMorton(oct.computeMorton());
769  }
770  else{
771 #endif
772  return octree.findMorton(oct.computeMorton());
773 #if NOMPI==0
774  };
775 #endif
776  return octree.getNumOctants();
777  };
778 
783  void setMarker(Class_Octant<2> oct, int8_t marker){ // Set refinement/coarsening marker for idx-th octant
784  oct.setMarker(marker);
785  };
786 
791  void setBalance(Class_Octant<2> oct, bool balance){ // Set if balancing-blocked idx-th octant
792  oct.setBalance(!balance);
793  };
794 
795  // ------------------------------------------------------------------------------- //
796  // Index get/set Methods
797 
798 public:
803  double getX(uint32_t idx) {
804  return trans.mapX(octree.octants[idx].getX());
805  }
806 
811  double getY(uint32_t idx) {
812  return trans.mapY(octree.octants[idx].getY());
813  }
814 
819  double getZ(uint32_t idx) {
820  return trans.mapZ(octree.octants[idx].getZ());
821  }
822 
827  double getSize(uint32_t idx) {
828  return trans.mapSize(octree.octants[idx].getSize());
829  }
830 
835  double getArea(uint32_t idx) {
836  return trans.mapSize(octree.octants[idx].getArea());
837  }
838 
843  double getVolume(uint32_t idx) {
844  return trans.mapArea(octree.octants[idx].getVolume());
845  }
846 
851  void getCenter(uint32_t idx,
852  vector<double>& center) {
853  vector<double> center_ = octree.octants[idx].getCenter();
854  trans.mapCenter(center_, center);
855  }
856 
861  vector<double> getCenter(uint32_t idx) {
862  vector<double> center;
863  vector<double> center_ = octree.octants[idx].getCenter();
864  trans.mapCenter(center_, center);
865  return center;
866  }
867 
873  vector<double> getFaceCenter(uint32_t idx, uint8_t iface) {
874  vector<double> center;
875  vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
876  trans.mapCenter(center_, center);
877  return center;
878  }
879 
885  void getFaceCenter(uint32_t idx, uint8_t iface, vector<double>& center) {
886  vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
887  trans.mapCenter(center_, center);
888  }
889 
895  vector<double> getNode(uint32_t idx, uint8_t inode) {
896  vector<double> node;
897  u32vector node_ = octree.octants[idx].getNode(inode);
898  trans.mapNode(node_, node);
899  return node;
900  }
901 
907  void getNode(uint32_t idx, uint8_t inode, vector<double>& node) {
908  u32vector node_ = octree.octants[idx].getNode(inode);
909  trans.mapNode(node_, node);
910  }
911 
916  void getNodes(uint32_t idx,
917  dvector2D & nodes) {
918  u32vector2D nodes_;
919  octree.octants[idx].getNodes(nodes_);
920  trans.mapNodes(nodes_, nodes);
921  }
922 
927  dvector2D getNodes(uint32_t idx){
928  dvector2D nodes;
929  u32vector2D nodes_;
930  octree.octants[idx].getNodes(nodes_);
931  trans.mapNodes(nodes_, nodes);
932  return nodes;
933  }
934 
940  void getNormal(uint32_t idx,
941  uint8_t & iface,
942  dvector & normal) {
943  vector<int8_t> normal_;
944  octree.octants[idx].getNormal(iface, normal_);
945  trans.mapNormals(normal_, normal);
946  }
947 
953  dvector getNormal(uint32_t idx,
954  uint8_t & iface){
955  dvector normal;
956  vector<int8_t> normal_;
957  octree.octants[idx].getNormal(iface, normal_);
958  trans.mapNormals(normal_, normal);
959  return normal;
960  }
961 
966  int8_t getMarker(uint32_t idx){ // Get refinement/coarsening marker for idx-th octant
967  return octree.getMarker(idx);
968  };
969 
974  uint8_t getLevel(uint32_t idx){ // Get refinement/coarsening marker for idx-th octant
975  return octree.getLevel(idx);
976  };
977 
982  bool getBalance(uint32_t idx){ // Get if balancing-blocked idx-th octant
983  return !octree.getBalance(idx);
984  };
985 
986 #if NOMPI==0
987 
991  bool getIsGhost(uint32_t idx){
992  return (findOwner(octree.octants[idx].computeMorton()) != rank);
993  };
994 #endif
995 
1000  bool getIsNewR(uint32_t idx){
1001  return octree.octants[idx].getIsNewR();
1002  };
1003 
1008  bool getIsNewC(uint32_t idx){
1009  return octree.octants[idx].getIsNewC();
1010  };
1011 
1016  uint64_t getGlobalIdx(uint32_t idx){
1017  if (rank){
1018  return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
1019  }
1020  else{
1021  return uint64_t(idx);
1022  };
1023  return global_num_octants;
1024  };
1025 
1030  uint64_t getGhostGlobalIdx(uint32_t idx){
1031  if (idx<octree.size_ghosts){
1032  return octree.globalidx_ghosts[idx];
1033  };
1034  return uint64_t(octree.size_ghosts);
1035  };
1036 
1041  void setMarker(uint32_t idx, int8_t marker){ // Set refinement/coarsening marker for idx-th octant
1042  octree.setMarker(idx, marker);
1043  };
1044 
1049  void setBalance(uint32_t idx, bool balance){ // Set if balancing-blocked idx-th octant
1050  octree.setBalance(idx, !balance);
1051  };
1052 
1053  // ------------------------------------------------------------------------------- //
1054  // Local Tree get/set Methods
1055 
1059  uint64_t getStatus(){
1060  return status;
1061  }
1062 
1066  uint32_t getNumOctants() const{
1067  return octree.getNumOctants();
1068  };
1069 
1073  uint32_t getNumGhosts() const{
1074  return octree.getSizeGhost();
1075  };
1076 
1080  uint8_t getLocalMaxDepth() const{ // Get max depth reached in local tree
1081  return octree.getLocalMaxDepth();
1082  };
1083 
1087  uint8_t getBalanceCodimension() const{
1088  return octree.getBalanceCodim();
1089  };
1090 
1094  void setBalanceCodimension(uint8_t b21codim){
1095  octree.setBalanceCodim(b21codim);
1096  };
1097 
1098 
1099  // --------------------------------
1100 
1101  const Class_Octant<2> & getFirstDesc() const{
1102  return octree.getFirstDesc();
1103  };
1104 
1105  const Class_Octant<2> & getLastDesc() const{
1106  return octree.getLastDesc();
1107  };
1108 
1109  uint64_t getLastDescMorton(uint32_t idx) {
1110  return octree.octants[idx].buildLastDesc().computeMorton();
1111  };
1112 
1113 
1114 private:
1115 
1116  void setFirstDesc(){
1117  octree.setFirstDesc();
1118  };
1119 
1120  void setLastDesc(){
1121  octree.setLastDesc();
1122  };
1123 
1124  Class_Octant<2>& extractOctant(uint32_t idx) {
1125  return octree.extractOctant(idx) ;
1126  };
1127 
1128  // --------------------------------
1129 
1130 public:
1131 
1136  Class_Octant<2>* getOctant(uint32_t idx) {
1137  if (idx < octree.getNumOctants()){
1138  return &octree.octants[idx] ;
1139  }
1140  return NULL;
1141  };
1142 
1148  if (idx < octree.getSizeGhost()){
1149  return &octree.ghosts[idx] ;
1150  }
1151  return NULL;
1152  };
1153 
1163  void findNeighbours(uint32_t idx,
1164  uint8_t iface,
1165  uint8_t codim,
1166  u32vector & neighbours,
1167  vector<bool> & isghost){
1168 
1169  if (codim == 1){
1170  octree.findNeighbours(idx, iface, neighbours, isghost);
1171  }
1172  else if (codim == 2){
1173  octree.findNodeNeighbours(idx, iface, neighbours, isghost);
1174  }
1175  else {
1176  neighbours.clear();
1177  isghost.clear();
1178  }
1179  };
1180 
1191  uint8_t iface,
1192  uint8_t codim,
1193  u32vector & neighbours,
1194  vector<bool> & isghost){
1195 
1196  if (codim == 1){
1197  octree.findNeighbours(oct, iface, neighbours, isghost);
1198  }
1199  else if (codim == 2){
1200  octree.findNodeNeighbours(oct, iface, neighbours, isghost);
1201  }
1202  else {
1203  neighbours.clear();
1204  isghost.clear();
1205  }
1206 
1207  };
1208 
1217  void findGhostNeighbours(uint32_t idx,
1218  uint8_t iface,
1219  uint8_t codim,
1220  u32vector & neighbours){
1221 
1222  if (codim == 1){
1223  octree.findGhostNeighbours(idx, iface, neighbours);
1224  }
1225  else if (codim == 2){
1226  octree.findGhostNodeNeighbours(idx, iface, neighbours);
1227  }
1228  else {
1229  neighbours.clear();
1230  }
1231  };
1232 
1233  // =============================================================================== //
1234 
1242  int findOwner(const uint64_t & morton) {
1243  int p = -1;
1244  int length = nproc;
1245  int beg = 0;
1246  int end = nproc -1;
1247  int seed = nproc/2;
1248  while(beg != end){
1249  if(morton <= partition_last_desc[seed]){
1250  end = seed;
1251  if(morton > partition_last_desc[seed-1])
1252  beg = seed;
1253  }
1254  else{
1255  beg = seed;
1256  if(morton <= partition_last_desc[seed+1])
1257  beg = seed + 1;
1258  }
1259  length = end - beg;
1260  seed = beg + length/2;
1261  }
1262  p = beg;
1263  return p;
1264  }
1265 
1266 private:
1276  void findNeighbours(Class_Octant<2> oct,
1277  uint8_t iface,
1278  uint8_t codim,
1279  u32vector & neighbours,
1280  vector<bool> & isghost){
1281 
1282  if (codim == 1){
1283  octree.findNeighbours(&oct, iface, neighbours, isghost);
1284  }
1285  else if (codim == 2){
1286  octree.findNodeNeighbours(&oct, iface, neighbours, isghost);
1287  }
1288  else {
1289  neighbours.clear();
1290  isghost.clear();
1291  }
1292  };
1293 
1294 public:
1295  //-------------------------------------------------------------------------------- //
1296  // Intersections get Methods
1297 
1301  uint32_t getNumIntersections() {
1302  return octree.intersections.size();
1303  }
1304 
1310  if (idx < octree.intersections.size()){
1311  return &octree.intersections[idx];
1312  }
1313  return NULL;
1314  }
1315 
1321  if(inter->finer && inter->isghost)
1322  return octree.extractGhostOctant(inter->owners[inter->finer]).getLevel();
1323  else
1324  return octree.extractOctant(inter->owners[inter->finer]).getLevel();
1325  }
1326 
1332  return inter->finer;
1333  }
1334 
1340  return inter->getBound();
1341  }
1342 
1348  return inter->getIsGhost();
1349  }
1350 
1356  return inter->getPbound();
1357  }
1358 
1363  uint8_t getFace(Class_Intersection<2>* inter) {
1364  return inter->iface;
1365  }
1366 
1371  u32vector getOwners(Class_Intersection<2>* inter) {
1372  u32vector owners(2);
1373  owners[0] = inter->owners[0];
1374  owners[1] = inter->owners[1];
1375  return owners;
1376  }
1377 
1383  uint32_t Size;
1384  if(inter->finer && inter->isghost)
1385  Size = octree.extractGhostOctant(inter->owners[inter->finer]).getSize();
1386  else
1387  Size = octree.extractOctant(inter->owners[inter->finer]).getSize();
1388  return trans.mapSize(Size);
1389  }
1390 
1396  uint32_t Area;
1397  if(inter->finer && inter->isghost)
1398  Area = octree.extractGhostOctant(inter->owners[1]).getArea();
1399  else
1400  Area = octree.extractOctant(inter->owners[inter->finer]).getArea();
1401  return trans.mapSize(Area);
1402  }
1403 
1408  vector<double> getCenter(Class_Intersection<2>* inter){
1409  vector<double> center;
1410  Class_Octant<2> oct;
1411  if(inter->finer && inter->isghost)
1412  oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1413  else
1414  oct = octree.extractOctant(inter->owners[inter->finer]);
1415  vector<double> center_ = oct.getCenter();
1416  int sign = ( int(2*((inter->iface)%2)) - 1);
1417  double deplace = double (sign * int(oct.getSize())) / 2;
1418  center_[inter->iface/2] = uint32_t(int(center_[inter->iface/2]) + deplace);
1419  trans.mapCenter(center_, center);
1420  return center;
1421  }
1422 
1427  dvector2D getNodes(Class_Intersection<2>* inter){
1428  dvector2D nodes;
1429  Class_Octant<2> oct;
1430  if(inter->finer && inter->isghost)
1431  oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1432  else
1433  oct = octree.extractOctant(inter->owners[inter->finer]);
1434  uint8_t iface = inter->iface;
1435  u32vector2D nodes_all;
1436  oct.getNodes(nodes_all);
1437  u32vector2D nodes_(global2D.nnodesperface, u32vector(3));
1438  for (int i=0; i<global2D.nnodesperface; i++){
1439  for (int j=0; j<3; j++){
1440  nodes_[i][j] = nodes_all[global2D.facenode[iface][i]][j];
1441  }
1442  }
1443  trans.mapNodesIntersection(nodes_, nodes);
1444  return nodes;
1445  }
1446 
1452  dvector normal;
1453  Class_Octant<2> oct;
1454  if(inter->finer && inter->isghost)
1455  oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1456  else
1457  oct = octree.extractOctant(inter->owners[inter->finer]);
1458  uint8_t iface = inter->iface;
1459  vector<int8_t> normal_;
1460  oct.getNormal(iface, normal_);
1461  trans.mapNormals(normal_, normal);
1462  return normal;
1463  }
1464 
1465  //-------------------------------------------------------------------------------- //
1466  // No Pointer Intersections get Methods
1467 
1468 private:
1469  double getSize(Class_Intersection<2> inter) {
1470  uint32_t Size;
1471  if(inter.finer && inter.isghost)
1472  Size = octree.extractGhostOctant(inter.owners[inter.finer]).getSize();
1473  else
1474  Size = octree.extractOctant(inter.owners[inter.finer]).getSize();
1475  return trans.mapSize(Size);
1476  }
1477 
1478  double getArea(Class_Intersection<2> inter) {
1479  uint32_t Area;
1480  if(inter.finer && inter.isghost)
1481  Area = octree.extractGhostOctant(inter.owners[inter.finer]).getArea();
1482  else
1483  Area = octree.extractOctant(inter.owners[inter.finer]).getArea();
1484  return trans.mapSize(Area);
1485  }
1486 
1487  void getCenter(Class_Intersection<2> inter,dvector & center){
1488  Class_Octant<2> oct;
1489  if(inter.finer && inter.isghost)
1490  oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1491  else
1492  oct = octree.extractOctant(inter.owners[inter.finer]);
1493  vector<double>center_ = oct.getCenter();
1494  int sign = ( int(2*((inter.iface)%2)) - 1);
1495  double deplace = double (sign * int(oct.getSize())) / 2;
1496  center_[inter.iface/2] = uint32_t(int(center_[inter.iface/2]) + deplace);
1497  trans.mapCenter(center_, center);
1498  }
1499 
1500  void getNodes(Class_Intersection<2> inter,
1501  dvector2D & nodes) {
1502  Class_Octant<2> oct;
1503  if(inter.finer && inter.isghost)
1504  oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1505  else
1506  oct = octree.extractOctant(inter.owners[inter.finer]);
1507  uint8_t iface = inter.iface;
1508  u32vector2D nodes_all;
1509  oct.getNodes(nodes_all);
1510  u32vector2D nodes_(global2D.nnodesperface, u32vector(3));
1511  for (int i=0; i<global2D.nnodesperface; i++){
1512  for (int j=0; j<3; j++){
1513  nodes_[i][j] = nodes_all[global2D.facenode[iface][i]][j];
1514  }
1515  }
1516  trans.mapNodesIntersection(nodes_, nodes);
1517  }
1518 
1519  void getNormal(Class_Intersection<2> inter,
1520  dvector & normal) {
1521  Class_Octant<2> oct;
1522  if(inter.finer && inter.isghost)
1523  oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1524  else
1525  oct = octree.extractOctant(inter.owners[inter.finer]);
1526  uint8_t iface = inter.iface;
1527  vector<int8_t> normal_;
1528  oct.getNormal(iface, normal_);
1529  trans.mapNormals(normal_, normal);
1530  }
1531 
1532  // =============================================================================== //
1533 
1534 public:
1538  octree.computeIntersections();
1539  }
1540 
1541  // =============================================================================== //
1542 
1543  // TODO Uniform all get point owner
1544 
1549  Class_Octant<2>* getPointOwner(dvector & point){
1550  uint32_t noctants = octree.octants.size();
1551  uint32_t idxtry = noctants/2;
1552  uint32_t x, y;
1553  uint64_t morton, mortontry;
1554  int powner = 0;
1555 
1556  x = trans.mapX(point[0]);
1557  y = trans.mapY(point[1]);
1558 
1559  if ((x > global2D.max_length) || (y > global2D.max_length)
1560  || (point[0] < trans.X0) || (point[1] < trans.Y0))
1561  return NULL;
1562 
1563  if (x == global2D.max_length) x = x - 1;
1564  if (y == global2D.max_length) y = y - 1;
1565  morton = mortonEncode_magicbits(x,y);
1566 
1567 #if NOMPI==0
1568  if (!serial) powner = findOwner(morton);
1569 #else
1570  powner = 0;
1571 #endif
1572  if ((powner!=rank) && (!serial))
1573  return NULL;
1574 
1575  int32_t jump = idxtry;
1576  while(abs(jump) > 0){
1577  mortontry = octree.octants[idxtry].computeMorton();
1578  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1579  idxtry += jump;
1580  if (idxtry > noctants-1){
1581  if (jump > 0){
1582  idxtry = noctants - 1;
1583  jump = 0;
1584  }
1585  else if (jump < 0){
1586  idxtry = 0;
1587  jump = 0;
1588  }
1589  }
1590  }
1591  if(octree.octants[idxtry].computeMorton() == morton){
1592  return &octree.octants[idxtry];
1593  }
1594  else{
1595  // Step until the mortontry lower than morton (one idx of distance)
1596  {
1597  while(octree.octants[idxtry].computeMorton() < morton){
1598  idxtry++;
1599  if(idxtry > noctants-1){
1600  idxtry = noctants-1;
1601  break;
1602  }
1603  }
1604  while(octree.octants[idxtry].computeMorton() > morton){
1605  idxtry--;
1606  if(idxtry > noctants-1){
1607  idxtry = 0;
1608  break;
1609  }
1610  }
1611  }
1612  return &octree.octants[idxtry];
1613  }
1614  }
1615 
1620  uint32_t getPointOwnerIdx(dvector & point){
1621  uint32_t noctants = octree.octants.size();
1622  uint32_t idxtry = noctants/2;
1623  uint32_t x, y;
1624  uint64_t morton, mortontry;
1625  int powner = 0;
1626 
1627  x = trans.mapX(point[0]);
1628  y = trans.mapY(point[1]);
1629 
1630  if ((x > global2D.max_length) || (y > global2D.max_length)
1631  || (point[0] < trans.X0) || (point[1] < trans.Y0))
1632  return -1;
1633 
1634  if (x == global2D.max_length) x = x - 1;
1635  if (y == global2D.max_length) y = y - 1;
1636  morton = mortonEncode_magicbits(x,y);
1637 
1638 #if NOMPI==0
1639  if(!serial) powner = findOwner(morton);
1640 #else
1641  powner = 0;
1642 #endif
1643  //if ((powner!=rank) || (x > global2D.max_length) || (y > global2D.max_length))
1644  if ((powner!=rank) && (!serial))
1645  return -1;
1646 
1647 
1648  int32_t jump = idxtry;
1649  while(abs(jump) > 0){
1650  mortontry = octree.octants[idxtry].computeMorton();
1651  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1652  idxtry += jump;
1653  if (idxtry > noctants-1){
1654  if (jump > 0){
1655  idxtry = noctants - 1;
1656  jump = 0;
1657  }
1658  else if (jump < 0){
1659  idxtry = 0;
1660  jump = 0;
1661  }
1662  }
1663  }
1664  if(octree.octants[idxtry].computeMorton() == morton){
1665  return idxtry;
1666  }
1667  else{
1668  // Step until the mortontry lower than morton (one idx of distance)
1669  {
1670  while(octree.octants[idxtry].computeMorton() < morton){
1671  idxtry++;
1672  if(idxtry > noctants-1){
1673  idxtry = noctants-1;
1674  break;
1675  }
1676  }
1677  while(octree.octants[idxtry].computeMorton() > morton){
1678  idxtry--;
1679  if(idxtry > noctants-1){
1680  //idxtry = noctants-1;
1681  idxtry = 0;
1682  break;
1683  }
1684  }
1685  }
1686  return idxtry;
1687  }
1688  }
1689 
1690 private:
1691  Class_Octant<2> getPointOwner2(dvector & point){
1692  uint32_t noctants = octree.octants.size();
1693  uint32_t idxtry = noctants/2;
1694  uint32_t x, y;
1695  uint64_t morton, mortontry;
1696  int powner = 0;
1697 
1698  x = trans.mapX(point[0]);
1699  y = trans.mapY(point[1]);
1700 
1701  if ((x > global2D.max_length) || (y > global2D.max_length)
1702  || (point[0] < trans.X0) || (point[1] < trans.Y0)){
1703  Class_Octant<2> oct0;
1704  return oct0;
1705  }
1706 
1707  if (x == global2D.max_length) x = x - 1;
1708  if (y == global2D.max_length) y = y - 1;
1709  morton = mortonEncode_magicbits(x,y);
1710 
1711 #if NOMPI==0
1712  if (!serial) powner = findOwner(morton);
1713 #else
1714  powner = 0;
1715 #endif
1716  if ((powner!=rank) && (!serial)){
1717  Class_Octant<2> oct0;
1718  return oct0;
1719  }
1720 
1721  int32_t jump = idxtry;
1722  while(abs(jump) > 0){
1723  mortontry = octree.octants[idxtry].computeMorton();
1724  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1725  idxtry += jump;
1726  if (idxtry > noctants-1){
1727  if (jump > 0){
1728  idxtry = noctants - 1;
1729  jump = 0;
1730  }
1731  else if (jump < 0){
1732  idxtry = 0;
1733  jump = 0;
1734  }
1735  }
1736  }
1737  if(octree.octants[idxtry].computeMorton() == morton){
1738  return octree.octants[idxtry];
1739  }
1740  else{
1741  // Step until the mortontry lower than morton (one idx of distance)
1742  {
1743  while(octree.octants[idxtry].computeMorton() < morton){
1744  idxtry++;
1745  if(idxtry > noctants-1){
1746  idxtry = noctants-1;
1747  break;
1748  }
1749  }
1750  while(octree.octants[idxtry].computeMorton() > morton){
1751  idxtry--;
1752  if(idxtry > noctants-1){
1753  idxtry = noctants-1;
1754  break;
1755  }
1756  }
1757  }
1758  return octree.octants[idxtry];
1759  }
1760  }
1761 
1762 public:
1767  Class_Octant<2>* getPointOwner(u32vector & point){
1768  uint32_t noctants = octree.octants.size();
1769  uint32_t idxtry = noctants/2;
1770  uint32_t x, y;
1771  uint64_t morton, mortontry;
1772  int powner = 0;
1773 
1774  x = point[0];
1775  y = point[1];
1776  if ((x > global2D.max_length) || (y > global2D.max_length))
1777  return NULL;
1778 
1779  if (x == global2D.max_length) x = x - 1;
1780  if (y == global2D.max_length) y = y - 1;
1781  morton = mortonEncode_magicbits(x,y);
1782 
1783 #if NOMPI==0
1784  if (!serial) powner = findOwner(morton);
1785 #else
1786  powner = 0;
1787 #endif
1788  if ((powner!=rank) && (!serial))
1789  return NULL;
1790 
1791  int32_t jump = idxtry;
1792  while(abs(jump) > 0){
1793  mortontry = octree.octants[idxtry].computeMorton();
1794  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1795  idxtry += jump;
1796  if (idxtry > noctants-1){
1797  if (jump > 0){
1798  idxtry = noctants - 1;
1799  jump = 0;
1800  }
1801  else if (jump < 0){
1802  idxtry = 0;
1803  jump = 0;
1804  }
1805  }
1806  }
1807  if(octree.octants[idxtry].computeMorton() == morton){
1808  return &octree.octants[idxtry];
1809  }
1810  else{
1811  // Step until the mortontry lower than morton (one idx of distance)
1812  {
1813  while(octree.octants[idxtry].computeMorton() < morton){
1814  idxtry++;
1815  if(idxtry > noctants-1){
1816  idxtry = noctants-1;
1817  break;
1818  }
1819  }
1820  while(octree.octants[idxtry].computeMorton() > morton){
1821  idxtry--;
1822  if(idxtry > noctants-1){
1823  idxtry = noctants-1;
1824  break;
1825  }
1826  }
1827  }
1828  return &octree.octants[idxtry];
1829  }
1830  }
1831 
1836  uint32_t getPointOwnerIdx(u32vector & point){
1837  uint32_t noctants = octree.octants.size();
1838  uint32_t idxtry = noctants/2;
1839  uint32_t x, y;
1840  uint64_t morton, mortontry;
1841  int powner = 0;
1842 
1843  x = point[0];
1844  y = point[1];
1845  if ((x > global2D.max_length) || (y > global2D.max_length))
1846  return -1;
1847 
1848  if (x == global2D.max_length) x = x - 1;
1849  if (y == global2D.max_length) y = y - 1;
1850  morton = mortonEncode_magicbits(x,y);
1851 
1852 #if NOMPI==0
1853  if (!serial) powner = findOwner(morton);
1854 #else
1855  powner = 0;
1856 #endif
1857  if ((powner!=rank) && (!serial))
1858  return -1;
1859 
1860  int32_t jump = idxtry;
1861  while(abs(jump) > 0){
1862  mortontry = octree.octants[idxtry].computeMorton();
1863  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1864  idxtry += jump;
1865  if (idxtry > noctants-1){
1866  if (jump > 0){
1867  idxtry = noctants - 1;
1868  jump = 0;
1869  }
1870  else if (jump < 0){
1871  idxtry = 0;
1872  jump = 0;
1873  }
1874  }
1875  }
1876  if(octree.octants[idxtry].computeMorton() == morton){
1877  return idxtry;
1878  }
1879  else{
1880  // Step until the mortontry lower than morton (one idx of distance)
1881  {
1882  while(octree.octants[idxtry].computeMorton() < morton){
1883  idxtry++;
1884  if(idxtry > noctants-1){
1885  idxtry = noctants-1;
1886  break;
1887  }
1888  }
1889  while(octree.octants[idxtry].computeMorton() > morton){
1890  idxtry--;
1891  if(idxtry > noctants-1){
1892  idxtry = noctants-1;
1893  break;
1894  }
1895  }
1896  }
1897  return idxtry;
1898  }
1899  }
1900 
1906  uint32_t noctants = octree.octants.size();
1907  uint32_t idxtry = noctants/2;
1908  uint32_t x, y;
1909  uint64_t morton, mortontry;
1910  int powner = 0;
1911 
1912  x = uint32_t(point[0]);
1913  y = uint32_t(point[1]);
1914 
1915  if ((point[0] < 0) || (point[0] > double(global2D.max_length)) || (point[1] < 0) || (point[1] > double(global2D.max_length)))
1916  return NULL;
1917 
1918  if (x == global2D.max_length) x = x - 1;
1919  if (y == global2D.max_length) y = y - 1;
1920  morton = mortonEncode_magicbits(x,y);
1921 
1922 #if NOMPI==0
1923  if (!serial) powner = findOwner(morton);
1924 #else
1925  powner = 0;
1926 #endif
1927  if ((powner!=rank) && (!serial))
1928  return NULL;
1929 
1930  int32_t jump = idxtry;
1931  while(abs(jump) > 0){
1932  mortontry = octree.octants[idxtry].computeMorton();
1933  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1934  idxtry += jump;
1935  if (idxtry > noctants-1){
1936  if (jump > 0){
1937  idxtry = noctants - 1;
1938  jump = 0;
1939  }
1940  else if (jump < 0){
1941  idxtry = 0;
1942  jump = 0;
1943  }
1944  }
1945  }
1946  if(octree.octants[idxtry].computeMorton() == morton){
1947  return &octree.octants[idxtry];
1948  }
1949  else{
1950  // Step until the mortontry lower than morton (one idx of distance)
1951  {
1952  while(octree.octants[idxtry].computeMorton() < morton){
1953  idxtry++;
1954  if(idxtry > noctants-1){
1955  idxtry = noctants-1;
1956  break;
1957  }
1958  }
1959  while(octree.octants[idxtry].computeMorton() > morton){
1960  idxtry--;
1961  if(idxtry > noctants-1){
1962  idxtry = noctants-1;
1963  break;
1964  }
1965  }
1966  }
1967  return &octree.octants[idxtry];
1968  }
1969  }
1970 
1975  uint32_t getLogicalPointOwnerIdx(dvector & point){
1976  uint32_t noctants = octree.octants.size();
1977  uint32_t idxtry = noctants/2;
1978  uint32_t x, y;
1979  uint64_t morton, mortontry;
1980  int powner;
1981 
1982  x = uint32_t(point[0]);
1983  y = uint32_t(point[1]);
1984  if ((point[0] < 0) || (point[0] > double(global2D.max_length)) || (point[1] < 0) || (point[1] > double(global2D.max_length)))
1985  return -1;
1986 
1987  if (x == global2D.max_length) x = x - 1;
1988  if (y == global2D.max_length) y = y - 1;
1989  morton = mortonEncode_magicbits(x,y);
1990 
1991 #if NOMPI==0
1992  if (!serial) powner = findOwner(morton);
1993 #else
1994  powner = 0;
1995 #endif
1996  if ((powner!=rank) && (!serial))
1997  return -1;
1998 
1999  int32_t jump = idxtry;
2000  while(abs(jump) > 0){
2001  mortontry = octree.octants[idxtry].computeMorton();
2002  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2003  idxtry += jump;
2004  if (idxtry > noctants-1){
2005  if (jump > 0){
2006  idxtry = noctants - 1;
2007  jump = 0;
2008  }
2009  else if (jump < 0){
2010  idxtry = 0;
2011  jump = 0;
2012  }
2013  }
2014  }
2015  if(octree.octants[idxtry].computeMorton() == morton){
2016  return idxtry;
2017  }
2018  else{
2019  // Step until the mortontry lower than morton (one idx of distance)
2020  {
2021  while(octree.octants[idxtry].computeMorton() < morton){
2022  idxtry++;
2023  if(idxtry > noctants-1){
2024  idxtry = noctants-1;
2025  break;
2026  }
2027  }
2028  while(octree.octants[idxtry].computeMorton() > morton){
2029  idxtry--;
2030  if(idxtry > noctants-1){
2031  idxtry = noctants-1;
2032  break;
2033  }
2034  }
2035  }
2036  return idxtry;
2037  }
2038  }
2039 
2040 private:
2041  Class_Octant<2> getPointOwner2(u32vector & point){
2042  uint32_t noctants = octree.octants.size();
2043  uint32_t idxtry = noctants/2;
2044  uint32_t x, y;
2045  uint64_t morton, mortontry;
2046  int powner;
2047 
2048  x = point[0];
2049  y = point[1];
2050 
2051  if ((x > global2D.max_length) || (y > global2D.max_length)){
2052  Class_Octant<2> oct0;
2053  return oct0;
2054  }
2055 
2056  if (x == global2D.max_length) x = x - 1;
2057  if (y == global2D.max_length) y = y - 1;
2058  morton = mortonEncode_magicbits(x,y);
2059 
2060 #if NOMPI==0
2061  powner = findOwner(morton);
2062 #else
2063  powner = 0;
2064 #endif
2065  if ((powner!=rank) && (!serial)){
2066  Class_Octant<2> oct0;
2067  return oct0;
2068  }
2069 
2070  int32_t jump = idxtry;
2071  while(abs(jump) > 0){
2072  mortontry = octree.octants[idxtry].computeMorton();
2073  jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2074  idxtry += jump;
2075  if (idxtry > noctants-1){
2076  if (jump > 0){
2077  idxtry = noctants - 1;
2078  jump = 0;
2079  }
2080  else if (jump < 0){
2081  idxtry = 0;
2082  jump = 0;
2083  }
2084  }
2085  }
2086  if(octree.octants[idxtry].computeMorton() == morton){
2087  return octree.octants[idxtry];
2088  }
2089  else{
2090  // Step until the mortontry lower than morton (one idx of distance)
2091  {
2092  while(octree.octants[idxtry].computeMorton() < morton){
2093  idxtry++;
2094  if(idxtry > noctants-1){
2095  idxtry = noctants-1;
2096  break;
2097  }
2098  }
2099  while(octree.octants[idxtry].computeMorton() > morton){
2100  idxtry--;
2101  if(idxtry > noctants-1){
2102  idxtry = noctants-1;
2103  break;
2104  }
2105  }
2106  }
2107  return octree.octants[idxtry];
2108  }
2109  }
2110 
2111  // =============================================================================== //
2112  // PARATREE METHODS ----------------------------------------------------------------------- //
2113 
2114 #if NOMPI==0
2115  void computePartition(uint32_t* partition) {
2116  uint32_t division_result = 0;
2117  uint32_t remind = 0;
2118  division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2119  remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2120  for(uint32_t i = 0; i < (uint32_t)nproc; ++i)
2121  if(i<remind)
2122  partition[i] = division_result + 1;
2123  else
2124  partition[i] = division_result;
2125 
2126  }
2127 
2128  // =============================================================================== //
2129 
2130  void computePartition(uint32_t* partition, dvector* weight){ // compute octant partition giving the same number of octant to each process and redistributing the reminder
2131 
2132  if(serial){
2133 
2134  double division_result = 0;
2135  double global_weight = 0.0;
2136  for (int i=0; i<weight->size(); i++){
2137  global_weight += (*weight)[i];
2138  }
2139  division_result = global_weight/(double)nproc;
2140 
2141  //Estimate resulting weight distribution starting from proc 0 (sending tail)
2142  //Estimate sending weight by each proc in initial conf (sending tail)
2143  uint32_t i = 0, tot = 0;
2144  int iproc = 0;
2145  while (iproc < nproc-1){
2146  double partial_weight = 0.0;
2147  partition[iproc] = 0;
2148  while(partial_weight < division_result){
2149  partial_weight += (*weight)[i];
2150  tot++;
2151  partition[iproc]++;
2152  i++;
2153  }
2154  iproc++;
2155  }
2156  partition[nproc-1] = weight->size() - tot;
2157  }
2158  else{
2159 
2160  double division_result = 0;
2161  double remind = 0;
2162  dvector local_weight(nproc,0.0);
2163  dvector temp_local_weight(nproc,0.0);
2164  dvector2D sending_weight(nproc, dvector(nproc,0.0));
2165  double* rbuff = new double[nproc];
2166  double global_weight = 0.0;
2167  for (int i=0; i<weight->size(); i++){
2168  local_weight[rank] += (*weight)[i];
2169  }
2170  error_flag = MPI_Allgather(&local_weight[rank],1,MPI_DOUBLE,rbuff,1,MPI_DOUBLE,comm);
2171  for (int i=0; i<nproc; i++){
2172  local_weight[i] = rbuff[i];
2173  global_weight += rbuff[i];
2174  }
2175  delete [] rbuff; rbuff = NULL;
2176  division_result = global_weight/(double)nproc;
2177 
2178  //Estimate resulting weight distribution starting from proc 0 (sending tail)
2179 
2180  temp_local_weight = local_weight;
2181  //Estimate sending weight by each proc in initial conf (sending tail)
2182 
2183  for (int iter = 0; iter < 1; iter++){
2184 
2185  vector<double> delta(nproc);
2186  for (int i=0; i<nproc; i++){
2187  delta[i] = temp_local_weight[i] - division_result;
2188  }
2189 
2190  for (int i=0; i<nproc-1; i++){
2191 
2192  double post_weight = 0.0;
2193  for (int j=i+1; j<nproc; j++){
2194  post_weight += temp_local_weight[j];
2195  }
2196  if (temp_local_weight[i] > division_result){
2197 
2198  delta[i] = temp_local_weight[i] - division_result;
2199  if (post_weight < division_result*(nproc-i-1)){
2200 
2201  double post_delta = division_result*(nproc-i-1) - post_weight;
2202  double delta_sending = min(local_weight[i], min(delta[i], post_delta));
2203  int jproc = i+1;
2204  double sending = 0;
2205  while (delta_sending > 0 && jproc<nproc){
2206  sending = min(division_result, delta_sending);
2207  sending = min(sending, (division_result-temp_local_weight[jproc]));
2208  sending = max(sending, 0.0);
2209  sending_weight[i][jproc] += sending;
2210  temp_local_weight[jproc] += sending;
2211  temp_local_weight[i] -= sending;
2212  delta_sending -= sending;
2213  delta[i] -= delta_sending;
2214  jproc++;
2215  }
2216  } //post
2217  }//weight>
2218  }//iproc
2219 
2220  for (int i = nproc-1; i>0; i--){
2221 
2222  double pre_weight = 0.0;
2223  for (int j=i-1; j>=0; j--){
2224  pre_weight += temp_local_weight[j];
2225  }
2226  if (temp_local_weight[i] > division_result){
2227 
2228  delta[i] = temp_local_weight[i] - division_result;
2229  if (pre_weight < division_result*(i)){
2230 
2231  double pre_delta = division_result*(i) - pre_weight;
2232  double delta_sending = min(local_weight[i], min(temp_local_weight[i], min(delta[i], pre_delta)));
2233  int jproc = i-1;
2234  double sending = 0;
2235  while (delta_sending > 0 && jproc >=0){
2236  sending = min(division_result, delta_sending);
2237  sending = min(sending, (division_result-temp_local_weight[jproc]));
2238  sending = max(sending, 0.0);
2239  sending_weight[i][jproc] += sending;
2240  temp_local_weight[jproc] += sending;
2241  temp_local_weight[i] -= sending;
2242  delta_sending -= sending;
2243  delta[i] -= delta_sending;
2244  jproc--;
2245  }
2246  }//pre
2247  }//weight>
2248  }//iproc
2249  }//iter
2250 
2251  //Update partition locally
2252  //to send
2253  u32vector sending_cell(nproc,0);
2254  int i = getNumOctants();;
2255  for (int jproc=nproc-1; jproc>rank; jproc--){
2256  double pack_weight = 0.0;
2257  while(pack_weight < sending_weight[rank][jproc] && i > 0){
2258  i--;
2259  pack_weight += (*weight)[i];
2260  sending_cell[jproc]++;
2261  }
2262  }
2263  partition[rank] = i;
2264  i = 0;
2265  for (int jproc=0; jproc<rank; jproc++){
2266  double pack_weight = 0.0;
2267  while(pack_weight < sending_weight[rank][jproc] && i < getNumOctants()-1){
2268  i++;
2269  pack_weight += (*weight)[i];
2270  sending_cell[jproc]++;
2271  }
2272  }
2273  partition[rank] -= i;
2274 
2275  //to receive
2276  u32vector rec_cell(nproc,0);
2277  MPI_Request* req = new MPI_Request[nproc*10];
2278  MPI_Status* stats = new MPI_Status[nproc*10];
2279  int nReq = 0;
2280  for (int iproc=0; iproc<nproc; iproc++){
2281  error_flag = MPI_Irecv(&rec_cell[iproc],1,MPI_UINT32_T,iproc,rank,comm,&req[nReq]);
2282  ++nReq;
2283  }
2284  for (int iproc=0; iproc<nproc; iproc++){
2285  error_flag = MPI_Isend(&sending_cell[iproc],1,MPI_UINT32_T,iproc,iproc,comm,&req[nReq]);
2286  ++nReq;
2287  }
2288  MPI_Waitall(nReq,req,stats);
2289 
2290  delete [] req; req = NULL;
2291  delete [] stats; stats = NULL;
2292 
2293  i = 0;
2294  for (int jproc=0; jproc<nproc; jproc++){
2295  i+= rec_cell[jproc];
2296  }
2297  partition[rank] += i;
2298  uint32_t part = partition[rank];
2299  error_flag = MPI_Allgather(&part,1,MPI_UINT32_T,partition,1,MPI_UINT32_T,comm);
2300  }
2301  };
2302 
2303  //=================================================================================//
2304 
2305  void computePartition(uint32_t* partition, uint8_t & level_) {
2306  uint8_t level = uint8_t(min(int(max(int(max_depth) - int(level_), int(1))) , MAX_LEVEL_2D));
2307  uint32_t* partition_temp = new uint32_t[nproc];
2308  uint8_t* boundary_proc = new uint8_t[nproc-1];
2309  uint8_t dimcomm, indcomm;
2310  uint8_t* glbdimcomm = new uint8_t[nproc];
2311  uint8_t* glbindcomm = new uint8_t[nproc];
2312 
2313  uint32_t division_result = 0;
2314  uint32_t remind = 0;
2315  uint32_t Dh = uint32_t(pow(double(2),double(MAX_LEVEL_2D-level)));
2316  uint32_t istart, nocts, rest, forw, backw;
2317  uint32_t i = 0, iproc, j;
2318  uint64_t sum;
2319  int32_t* pointercomm;
2320  int32_t* deplace = new int32_t[nproc-1];
2321  division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2322  remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2323  for(uint32_t i = 0; i < (uint32_t)nproc; ++i)
2324  if(i<remind)
2325  partition_temp[i] = division_result + 1;
2326  else
2327  partition_temp[i] = division_result;
2328 
2329  j = 0;
2330  sum = 0;
2331  for (iproc=0; iproc<(uint32_t)(nproc-1); iproc++){
2332  sum += partition_temp[iproc];
2333  while(sum > partition_range_globalidx[j]){
2334  j++;
2335  }
2336  boundary_proc[iproc] = j;
2337  }
2338  nocts = octree.octants.size();
2339  sum = 0;
2340  dimcomm = 0;
2341  indcomm = 0;
2342  for (iproc=0; iproc<(uint32_t)(nproc-1); iproc++){
2343  deplace[iproc] = 1;
2344  sum += partition_temp[iproc];
2345  if (boundary_proc[iproc] == rank){
2346  if (dimcomm == 0){
2347  indcomm = iproc;
2348  }
2349  dimcomm++;
2350  if (rank!=0)
2351  istart = sum - partition_range_globalidx[rank-1] - 1;
2352  else
2353  istart = sum;
2354 
2355  i = istart;
2356  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2357  while(rest!=0){
2358  if (i==nocts){
2359  i = istart + nocts;
2360  break;
2361  }
2362  i++;
2363  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2364  }
2365  forw = i - istart;
2366  i = istart;
2367  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2368  while(rest!=0){
2369  if (i==0){
2370  i = istart - nocts;
2371  break;
2372  }
2373  i--;
2374  rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2375  }
2376  backw = istart - i;
2377  if (forw<backw)
2378  deplace[iproc] = forw;
2379  else
2380  deplace[iproc] = -(int32_t)backw;
2381  }
2382  }
2383 
2384  error_flag = MPI_Allgather(&dimcomm,1,MPI_UINT8_T,glbdimcomm,1,MPI_UINT8_T,comm);
2385  error_flag = MPI_Allgather(&indcomm,1,MPI_UINT8_T,glbindcomm,1,MPI_UINT8_T,comm);
2386  for (iproc=0; iproc<(uint32_t)(nproc); iproc++){
2387  pointercomm = &deplace[glbindcomm[iproc]];
2388  error_flag = MPI_Bcast(pointercomm, glbdimcomm[iproc], MPI_INT32_T, iproc, comm);
2389  }
2390 
2391  for (iproc=0; iproc<(uint32_t)(nproc); iproc++){
2392  if (iproc < (uint32_t)(nproc-1))
2393  partition[iproc] = partition_temp[iproc] + deplace[iproc];
2394  else
2395  partition[iproc] = partition_temp[iproc];
2396  if (iproc !=0)
2397  partition[iproc] = partition[iproc] - deplace[iproc-1];
2398  }
2399 
2400  delete [] partition_temp; partition_temp = NULL;
2401  delete [] boundary_proc; boundary_proc = NULL;
2402  delete [] glbdimcomm; glbdimcomm = NULL;
2403  delete [] glbindcomm; glbindcomm = NULL;
2404  delete [] deplace; deplace = NULL;
2405  }
2406 
2407  // =============================================================================== //
2408 
2409  void updateLoadBalance() {
2410  octree.updateLocalMaxDepth();
2411  uint64_t* rbuff = new uint64_t[nproc];
2412  uint64_t local_num_octants = octree.getNumOctants();
2413  error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
2414  for(int p = 0; p < nproc; ++p){
2415  partition_range_globalidx[p] = 0;
2416  for(int pp = 0; pp <=p; ++pp)
2417  partition_range_globalidx[p] += rbuff[pp];
2418  --partition_range_globalidx[p];
2419  }
2420  //update first last descendant
2421  octree.setFirstDesc();
2422  octree.setLastDesc();
2423  //update partition_range_position
2424  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
2425  error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
2426  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
2427  error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
2428  serial = false;
2429  delete [] rbuff; rbuff = NULL;
2430  }
2431 
2432  // =============================================================================== //
2433 
2434  void setPboundGhosts() {
2435  //BUILD BORDER OCTANT INDECES VECTOR (map value) TO BE SENT TO THE RIGHT PROCESS (map key)
2436  //find local octants to be sent as ghost to the right processes
2437  //it visits the local octants building virtual neighbors on each octant face
2438  //find the owner of these virtual neighbor and build a map (process,border octants)
2439  //this map contains the local octants as ghosts for neighbor processes
2440 
2441  // NO PBORDERS !
2442  Class_Local_Tree<2>::OctantsType::iterator end = octree.octants.end();
2443  Class_Local_Tree<2>::OctantsType::iterator begin = octree.octants.begin();
2444  bordersPerProc.clear();
2445  for(Class_Local_Tree<2>::OctantsType::iterator it = begin; it != end; ++it){
2446  set<int> procs;
2447  //Virtual Face Neighbors
2448  for(uint8_t i = 0; i < global2D.nfaces; ++i){
2449  if(it->getBound(i) == false){
2450  uint32_t virtualNeighborsSize = 0;
2451  vector<uint64_t> virtualNeighbors = it->computeVirtualMorton(i,max_depth,virtualNeighborsSize);
2452  uint32_t maxDelta = virtualNeighborsSize/2;
2453  for(uint32_t j = 0; j <= maxDelta; ++j){
2454  int pBegin = findOwner(virtualNeighbors[j]);
2455  int pEnd = findOwner(virtualNeighbors[virtualNeighborsSize - 1 - j]);
2456  procs.insert(pBegin);
2457  procs.insert(pEnd);
2458  if(pBegin != rank || pEnd != rank){
2459  it->setPbound(i,true);
2460  }
2461  else{
2462  it->setPbound(i,false);
2463  }
2464  }
2465  }
2466  }
2467  //Virtual Corner Neighbors
2468  for(uint8_t c = 0; c < global2D.nnodes; ++c){
2469  if(!it->getBound(global2D.nodeface[c][0]) && !it->getBound(global2D.nodeface[c][1])){
2470  uint32_t virtualCornerNeighborSize = 0;
2471  uint64_t virtualCornerNeighbor = it ->computeNodeVirtualMorton(c,max_depth,virtualCornerNeighborSize);
2472  if(virtualCornerNeighborSize){
2473  int proc = findOwner(virtualCornerNeighbor);
2474  procs.insert(proc);
2475  }
2476  }
2477  }
2478 
2479  set<int>::iterator pitend = procs.end();
2480  for(set<int>::iterator pit = procs.begin(); pit != pitend; ++pit){
2481  int p = *pit;
2482  if(p != rank){
2483  //TODO better reserve to avoid if
2484  bordersPerProc[p].push_back(distance(begin,it));
2485  vector<uint32_t> & bordersSingleProc = bordersPerProc[p];
2486  if(bordersSingleProc.capacity() - bordersSingleProc.size() < 2)
2487  bordersSingleProc.reserve(2*bordersSingleProc.size());
2488  }
2489  }
2490  }
2491  MPI_Barrier(comm);
2492 
2493  //PACK (mpi) BORDER OCTANTS IN CHAR BUFFERS WITH SIZE (map value) TO BE SENT TO THE RIGHT PROCESS (map key)
2494  //it visits every element in bordersPerProc (one for every neighbor proc)
2495  //for every element it visits the border octants it contains and pack them in a new structure, sendBuffers
2496  //this map has an entry Class_Comm_Buffer for every proc containing the size in bytes of the buffer and the octants
2497  //to be sent to that proc packed in a char* buffer
2498  uint64_t global_index;
2499  uint32_t x,y;
2500  uint8_t l;
2501  int8_t m;
2502  bool info[12];
2503  map<int,Class_Comm_Buffer> sendBuffers;
2504  map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
2505  uint32_t pbordersOversize = 0;
2506  for(map<int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
2507  pbordersOversize += bit->second.size();
2508  int buffSize = bit->second.size() * (int)ceil((double)(global2D.octantBytes + global2D.globalIndexBytes) / (double)(CHAR_BIT/8));
2509  int key = bit->first;
2510  const vector<uint32_t> & value = bit->second;
2511  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
2512  int pos = 0;
2513  int nofBorders = value.size();
2514  for(int i = 0; i < nofBorders; ++i){
2515  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
2516  const Class_Octant<2> & octant = octree.octants[value[i]];
2517  x = octant.getX();
2518  y = octant.getY();
2519  l = octant.getLevel();
2520  m = octant.getMarker();
2521  global_index = getGlobalIdx(value[i]);
2522  for(int i = 0; i < 12; ++i)
2523  info[i] = octant.info[i];
2524  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2525  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2526  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2527  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2528  for(int j = 0; j < 12; ++j){
2529  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2530  }
2531  error_flag = MPI_Pack(&global_index,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2532  }
2533  }
2534 
2535  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
2536  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
2537  //and stored in the recvBufferSizePerProc structure
2538  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
2539  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
2540  int nReq = 0;
2541  map<int,int> recvBufferSizePerProc;
2542  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2543  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2544  recvBufferSizePerProc[sit->first] = 0;
2545  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
2546  ++nReq;
2547  }
2548  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
2549  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2550  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
2551  ++nReq;
2552  }
2553  MPI_Waitall(nReq,req,stats);
2554 
2555  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
2556  //recvBuffers structure is declared and each buffer is initialized to the right size
2557  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
2558  //at the same time every process compute the size in bytes of all the ghost octants
2559  uint32_t nofBytesOverProc = 0;
2560  map<int,Class_Comm_Buffer> recvBuffers;
2561  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
2562  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
2563  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
2564  }
2565  nReq = 0;
2566  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2567  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
2568  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
2569  ++nReq;
2570  }
2571  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2572  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
2573  ++nReq;
2574  }
2575  MPI_Waitall(nReq,req,stats);
2576 
2577  //COMPUTE GHOSTS SIZE IN BYTES
2578  //number of ghosts in every process is obtained through the size in bytes of the single octant
2579  //and ghost vector in local tree is resized
2580  uint32_t nofGhosts = nofBytesOverProc / (uint32_t)(global2D.octantBytes + global2D.globalIndexBytes);
2581  octree.size_ghosts = nofGhosts;
2582  octree.ghosts.clear();
2583  octree.ghosts.resize(nofGhosts);
2584  octree.globalidx_ghosts.resize(nofGhosts);
2585 
2586  //UNPACK BUFFERS AND BUILD GHOSTS CONTAINER OF CLASS_LOCAL_TREE
2587  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked octant by octant.
2588  //every ghost octant is built and put in the ghost vector
2589  uint32_t ghostCounter = 0;
2590  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
2591  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
2592  int pos = 0;
2593  int nofGhostsPerProc = int(rrit->second.commBufferSize / (uint32_t) (global2D.octantBytes + global2D.globalIndexBytes));
2594  for(int i = 0; i < nofGhostsPerProc; ++i){
2595  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
2596  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
2597  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
2598  octree.ghosts[ghostCounter] = Class_Octant<2>(l,x,y);
2599  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
2600  octree.ghosts[ghostCounter].setMarker(m);
2601  for(int j = 0; j < 12; ++j){
2602  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
2603  octree.ghosts[ghostCounter].info[j] = info[j];
2604  }
2605  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&global_index,1,MPI_UINT64_T,comm);
2606  octree.globalidx_ghosts[ghostCounter] = global_index;
2607  ++ghostCounter;
2608  }
2609  }
2610  recvBuffers.clear();
2611  sendBuffers.clear();
2612  recvBufferSizePerProc.clear();
2613 
2614  delete [] req; req = NULL;
2615  delete [] stats; stats = NULL;
2616 
2617  }
2618 
2619  // =============================================================================== //
2620 
2621 public:
2626  void loadBalance(){
2627 
2628  //Write info on log
2629  log.writeLog("---------------------------------------------");
2630  log.writeLog(" LOAD BALANCE ");
2631 
2632  uint32_t* partition = new uint32_t [nproc];
2633  computePartition(partition);
2634  if(serial)
2635  {
2636  log.writeLog(" ");
2637  log.writeLog(" Initial Serial distribution : ");
2638  for(int ii=0; ii<nproc; ii++){
2639  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
2640  }
2641 
2642  uint32_t stride = 0;
2643  for(int i = 0; i < rank; ++i)
2644  stride += partition[i];
2645  Class_Local_Tree<2>::OctantsType octantsCopy = octree.octants;
2646  Class_Local_Tree<2>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
2647  Class_Local_Tree<2>::OctantsType::const_iterator last = first + partition[rank];
2648  octree.octants.assign(first, last);
2649 #if defined(__INTEL_COMPILER) || defined(__ICC)
2650 #else
2651  octree.octants.shrink_to_fit();
2652 #endif
2653  first = octantsCopy.end();
2654  last = octantsCopy.end();
2655 
2656  //Update and ghosts here
2657  updateLoadBalance();
2658  setPboundGhosts();
2659 
2660  }
2661  else
2662  {
2663  log.writeLog(" ");
2664  log.writeLog(" Initial Parallel partition : ");
2665  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
2666  for(int ii=1; ii<nproc; ii++){
2667  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
2668  }
2669 
2670  //empty ghosts
2671  octree.ghosts.clear();
2672  octree.size_ghosts = 0;
2673  //compute new partition range globalidx
2674  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
2675  for(int p = 0; p < nproc; ++p){
2676  newPartitionRangeGlobalidx[p] = 0;
2677  for(int pp = 0; pp <= p; ++pp)
2678  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
2679  --newPartitionRangeGlobalidx[p];
2680  }
2681 
2682  //find resident octants local offset lastHead(lh) and firstTail(ft)
2683  int32_t lh,ft;
2684  if(rank == 0)
2685  lh = -1;
2686  else{
2687  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
2688  }
2689  if(lh < 0)
2690  lh = - 1;
2691  else if(lh > (int32_t)(octree.octants.size() - 1))
2692  lh = octree.octants.size() - 1;
2693 
2694  if(rank == nproc - 1)
2695  ft = octree.octants.size();
2696  else if(rank == 0)
2697  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
2698  else{
2699  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
2700  }
2701  if(ft > (int32_t)(octree.octants.size() - 1))
2702  ft = octree.octants.size();
2703  else if(ft < 0)
2704  ft = 0;
2705 
2706  //compute size Head and size Tail
2707  uint32_t headSize = (uint32_t)(lh + 1);
2708  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
2709  uint32_t headOffset = headSize;
2710  uint32_t tailOffset = tailSize;
2711 
2712  //build send buffers
2713  map<int,Class_Comm_Buffer> sendBuffers;
2714 
2715  //Compute first predecessor and first successor to send buffers to
2716  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
2717  int64_t globalLastHead = (int64_t) lh;
2718  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
2719  int firstPredecessor = -1;
2720  int firstSuccessor = nproc;
2721  if(rank != 0){
2722  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
2723  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
2724  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
2725  for(int pre = rank - 1; pre >=0; --pre){
2726  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
2727  firstPredecessor = pre;
2728  }
2729  for(int post = rank + 1; post < nproc; ++post){
2730  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
2731  firstSuccessor = post;
2732  }
2733  }
2734  else if(rank == 0){
2735  firstSuccessor = 1;
2736  }
2737  MPI_Barrier(comm); //da spostare prima della prima comunicazione
2738 
2739  uint32_t x,y;
2740  uint8_t l;
2741  int8_t m;
2742  bool info[12];
2743  int intBuffer = 0;
2744  int contatore = 0;
2745  //build send buffers from Head
2746  uint32_t nofElementsFromSuccessiveToPrevious = 0;
2747  if(headSize != 0){
2748  for(int p = firstPredecessor; p >= 0; --p){
2749  if(headSize < partition[p]){
2750 
2751  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
2752  intBuffer = abs(intBuffer);
2753  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
2754  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
2755  nofElementsFromSuccessiveToPrevious = headSize;
2756 
2757  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
2758  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2759  int pos = 0;
2760  //for(uint32_t i = 0; i <= (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); ++i){
2761  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2762  //PACK octants from 0 to lh in sendBuffer[p]
2763  const Class_Octant<2> & octant = octree.octants[i];
2764  x = octant.getX();
2765  y = octant.getY();
2766  l = octant.getLevel();
2767  m = octant.getMarker();
2768  for(int ii = 0; ii < 12; ++ii)
2769  info[ii] = octant.info[ii];
2770  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2771  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2772  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2773  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2774  for(int j = 0; j < 12; ++j){
2775  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2776  }
2777  }
2778 
2779  if(nofElementsFromSuccessiveToPrevious == headSize)
2780  break;
2781 
2782  lh -= nofElementsFromSuccessiveToPrevious;
2783  globalLastHead -= nofElementsFromSuccessiveToPrevious;
2784  headSize = lh + 1;
2785  ++contatore;
2786 
2787  }
2788  else{
2789  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
2790  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
2791  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2792  int pos = 0;
2793  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2794  //pack octants from lh - partition[p] to lh
2795  const Class_Octant<2> & octant = octree.octants[i];
2796  x = octant.getX();
2797  y = octant.getY();
2798  l = octant.getLevel();
2799  m = octant.getMarker();
2800  for(int i = 0; i < 12; ++i)
2801  info[i] = octant.info[i];
2802  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2803  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2804  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2805  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2806  for(int j = 0; j < 12; ++j){
2807  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2808  }
2809  }
2810  lh -= nofElementsFromSuccessiveToPrevious;
2811  globalLastHead -= nofElementsFromSuccessiveToPrevious;
2812  headSize = lh + 1;
2813  if(headSize == 0)
2814  break;
2815  }
2816  }
2817 
2818  }
2819  uint32_t nofElementsFromPreviousToSuccessive = 0;
2820  contatore = 0;
2821  //build send buffers from Tail
2822  if(tailSize != 0){
2823  for(int p = firstSuccessor; p < nproc; ++p){
2824  if(tailSize < partition[p]){
2825 
2826  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2827  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
2828  nofElementsFromPreviousToSuccessive = tailSize;
2829 
2830  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
2831  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2832  int pos = 0;
2833  uint32_t octantsSize = (uint32_t)octree.octants.size();
2834  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
2835  //PACK octants from ft to octantsSize-1
2836  const Class_Octant<2> & octant = octree.octants[i];
2837  x = octant.getX();
2838  y = octant.getY();
2839  l = octant.getLevel();
2840  m = octant.getMarker();
2841  for(int ii = 0; ii < 12; ++ii)
2842  info[ii] = octant.info[ii];
2843  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2844  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2845  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2846  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2847  for(int j = 0; j < 12; ++j){
2848  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2849  }
2850  }
2851 
2852  if(nofElementsFromPreviousToSuccessive == tailSize)
2853  break;
2854  ft += nofElementsFromPreviousToSuccessive;
2855  globalFirstTail += nofElementsFromPreviousToSuccessive;
2856  tailSize -= nofElementsFromPreviousToSuccessive;
2857  ++contatore;
2858  }
2859  else{
2860  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2861  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
2862  //int buffSize = partition[p] * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
2863  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
2864  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
2865  int pos = 0;
2866  for(uint32_t i = ft; i <= endOctants; ++i ){
2867  //PACK octants from ft to ft + partition[p] -1
2868  const Class_Octant<2> & octant = octree.octants[i];
2869  x = octant.getX();
2870  y = octant.getY();
2871  l = octant.getLevel();
2872  m = octant.getMarker();
2873  for(int i = 0; i < 12; ++i)
2874  info[i] = octant.info[i];
2875  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2876  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2877  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2878  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2879  for(int j = 0; j < 12; ++j){
2880  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2881  }
2882  }
2883  //ft += partition[p];
2884  //tailSize -= partition[p];
2885  ft += nofElementsFromPreviousToSuccessive;
2886  globalFirstTail += nofElementsFromPreviousToSuccessive;
2887  tailSize -= nofElementsFromPreviousToSuccessive;
2888  if(tailSize == 0)
2889  break;
2890  }
2891  }
2892  }
2893 
2894  //Build receiver sources
2895  vector<Class_Array> recvs(nproc);
2896  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
2897  recvs[rank].array[0] = rank;
2898  int counter = 1;
2899  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2900  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2901  recvs[rank].array[counter] = sit->first;
2902  ++counter;
2903  }
2904  int* nofRecvsPerProc = new int[nproc];
2905  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
2906  int globalRecvsBuffSize = 0;
2907  int* displays = new int[nproc];
2908  for(int pp = 0; pp < nproc; ++pp){
2909  displays[pp] = 0;
2910  globalRecvsBuffSize += nofRecvsPerProc[pp];
2911  for(int ppp = 0; ppp < pp; ++ppp){
2912  displays[pp] += nofRecvsPerProc[ppp];
2913  }
2914  }
2915  int* globalRecvsBuff = new int[globalRecvsBuffSize];
2916  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
2917 
2918  vector<set<int> > sendersPerProc(nproc);
2919  for(int pin = 0; pin < nproc; ++pin){
2920  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
2921  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
2922  }
2923  }
2924 
2925  //Communicate Octants (size)
2926  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
2927  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
2928  int nReq = 0;
2929  map<int,int> recvBufferSizePerProc;
2930  set<int>::iterator senditend = sendersPerProc[rank].end();
2931  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
2932  recvBufferSizePerProc[*sendit] = 0;
2933  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
2934  ++nReq;
2935  }
2936  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
2937  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2938  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
2939  ++nReq;
2940  }
2941  MPI_Waitall(nReq,req,stats);
2942 
2943  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
2944  //recvBuffers structure is declared and each buffer is initialized to the right size
2945  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
2946  uint32_t nofNewHead = 0;
2947  uint32_t nofNewTail = 0;
2948  map<int,Class_Comm_Buffer> recvBuffers;
2949  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
2950  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
2951  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
2952  uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8)));
2953  if(rit->first < rank)
2954  nofNewHead += nofNewPerProc;
2955  else if(rit->first > rank)
2956  nofNewTail += nofNewPerProc;
2957  }
2958  nReq = 0;
2959  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
2960  //nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
2961  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
2962  ++nReq;
2963  }
2964  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2965  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
2966  ++nReq;
2967  }
2968  MPI_Waitall(nReq,req,stats);
2969 
2970  //MOVE RESIDENT TO BEGIN IN OCTANTS
2971  uint32_t resEnd = octree.getNumOctants() - tailOffset;
2972  uint32_t nofResidents = resEnd - headOffset;
2973  int octCounter = 0;
2974  for(uint32_t i = headOffset; i < resEnd; ++i){
2975  octree.octants[octCounter] = octree.octants[i];
2976  ++octCounter;
2977  }
2978  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
2979  octree.octants.resize(newCounter);
2980  //MOVE RESIDENTS IN RIGHT POSITION
2981  uint32_t resCounter = nofNewHead + nofResidents - 1;
2982  for(uint32_t k = 0; k < nofResidents ; ++k){
2983  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
2984  }
2985 
2986  //UNPACK BUFFERS AND BUILD NEW OCTANTS
2987  newCounter = 0;
2988  bool jumpResident = false;
2989  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
2990  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
2991  uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8)));
2992  int pos = 0;
2993  if(rbit->first > rank && !jumpResident){
2994  newCounter += nofResidents ;
2995  jumpResident = true;
2996  }
2997  for(int i = nofNewPerProc - 1; i >= 0; --i){
2998  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
2999  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3000  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3001  octree.octants[newCounter] = Class_Octant<2>(l,x,y);
3002  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3003  octree.octants[newCounter].setMarker(m);
3004  for(int j = 0; j < 12; ++j){
3005  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3006  octree.octants[newCounter].info[j] = info[j];
3007  }
3008  ++newCounter;
3009  }
3010  }
3011 #if defined(__INTEL_COMPILER) || defined(__ICC)
3012 #else
3013  octree.octants.shrink_to_fit();
3014 #endif
3015  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3016  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3017  delete [] displays; displays = NULL;
3018  delete [] req; req = NULL;
3019  delete [] stats; stats = NULL;
3020  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3021  //Update and ghosts here
3022  updateLoadBalance();
3023  setPboundGhosts();
3024 
3025  }
3026  delete [] partition;
3027  partition = NULL;
3028 
3029  //Write info of final partition on log
3030  log.writeLog(" ");
3031  log.writeLog(" Final Parallel partition : ");
3032  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3033  for(int ii=1; ii<nproc; ii++){
3034  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3035  }
3036  log.writeLog(" ");
3037  log.writeLog("---------------------------------------------");
3038 
3039  }
3040 
3041  // =============================================================================== //
3042 
3048  void loadBalance(uint8_t & level){
3049 
3050  //Write info on log
3051  log.writeLog("---------------------------------------------");
3052  log.writeLog(" LOAD BALANCE ");
3053 
3054  uint32_t* partition = new uint32_t [nproc];
3055  computePartition(partition, level);
3056  if(serial)
3057  {
3058  log.writeLog(" ");
3059  log.writeLog(" Initial Serial distribution : ");
3060  for(int ii=0; ii<nproc; ii++){
3061  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3062  }
3063 
3064  uint32_t stride = 0;
3065  for(int i = 0; i < rank; ++i)
3066  stride += partition[i];
3067  Class_Local_Tree<2>::OctantsType octantsCopy = octree.octants;
3068  Class_Local_Tree<2>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
3069  Class_Local_Tree<2>::OctantsType::const_iterator last = first + partition[rank];
3070  octree.octants.assign(first, last);
3071 #if defined(__INTEL_COMPILER) || defined(__ICC)
3072 #else
3073  octree.octants.shrink_to_fit();
3074 #endif
3075  first = octantsCopy.end();
3076  last = octantsCopy.end();
3077 
3078  //Update and ghosts here
3079  updateLoadBalance();
3080  setPboundGhosts();
3081 
3082  }
3083  else
3084  {
3085  log.writeLog(" ");
3086  log.writeLog(" Initial Parallel partition : ");
3087  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3088  for(int ii=1; ii<nproc; ii++){
3089  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3090  }
3091 
3092  //empty ghosts
3093  octree.ghosts.clear();
3094  octree.size_ghosts = 0;
3095  //compute new partition range globalidx
3096  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
3097  for(int p = 0; p < nproc; ++p){
3098  newPartitionRangeGlobalidx[p] = 0;
3099  for(int pp = 0; pp <= p; ++pp)
3100  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3101  --newPartitionRangeGlobalidx[p];
3102  }
3103 
3104  //find resident octants local offset lastHead(lh) and firstTail(ft)
3105  int32_t lh,ft;
3106  if(rank == 0)
3107  lh = -1;
3108  else{
3109  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3110  }
3111  if(lh < 0)
3112  lh = - 1;
3113  else if(lh > (int32_t)(octree.octants.size() - 1))
3114  lh = octree.octants.size() - 1;
3115 
3116  if(rank == nproc - 1)
3117  ft = octree.octants.size();
3118  else if(rank == 0)
3119  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3120  else{
3121  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3122  }
3123  if(ft > (int32_t)(octree.octants.size() - 1))
3124  ft = octree.octants.size();
3125  else if(ft < 0)
3126  ft = 0;
3127 
3128  //compute size Head and size Tail
3129  uint32_t headSize = (uint32_t)(lh + 1);
3130  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3131  uint32_t headOffset = headSize;
3132  uint32_t tailOffset = tailSize;
3133 
3134  //build send buffers
3135  map<int,Class_Comm_Buffer> sendBuffers;
3136 
3137  //Compute first predecessor and first successor to send buffers to
3138  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
3139  int64_t globalLastHead = (int64_t) lh;
3140  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
3141  int firstPredecessor = -1;
3142  int firstSuccessor = nproc;
3143  if(rank != 0){
3144  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3145  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3146  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3147  for(int pre = rank - 1; pre >=0; --pre){
3148  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3149  firstPredecessor = pre;
3150  }
3151  for(int post = rank + 1; post < nproc; ++post){
3152  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3153  firstSuccessor = post;
3154  }
3155  }
3156  else if(rank == 0){
3157  firstSuccessor = 1;
3158  }
3159  MPI_Barrier(comm); //da spostare prima della prima comunicazione
3160 
3161  uint32_t x,y;
3162  uint8_t l;
3163  int8_t m;
3164  bool info[12];
3165  int intBuffer = 0;
3166  int contatore = 0;
3167  //build send buffers from Head
3168  uint32_t nofElementsFromSuccessiveToPrevious = 0;
3169  if(headSize != 0){
3170  for(int p = firstPredecessor; p >= 0; --p){
3171  if(headSize < partition[p]){
3172  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3173  intBuffer = abs(intBuffer);
3174  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3175  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3176  nofElementsFromSuccessiveToPrevious = headSize;
3177 
3178  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3179  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3180  int pos = 0;
3181  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3182  //PACK octants from 0 to lh in sendBuffer[p]
3183  const Class_Octant<2> & octant = octree.octants[i];
3184  x = octant.getX();
3185  y = octant.getY();
3186  l = octant.getLevel();
3187  m = octant.getMarker();
3188  for(int ii = 0; ii < 12; ++ii)
3189  info[ii] = octant.info[ii];
3190  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3191  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3192  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3193  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3194  for(int j = 0; j < 12; ++j){
3195  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3196  }
3197  }
3198  if(nofElementsFromSuccessiveToPrevious == headSize)
3199  break;
3200 
3201  lh -= nofElementsFromSuccessiveToPrevious;
3202  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3203  headSize = lh + 1;
3204  ++contatore;
3205  }
3206  else{
3207  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3208  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3209  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3210  int pos = 0;
3211  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3212  //pack octants from lh - partition[p] to lh
3213  const Class_Octant<2> & octant = octree.octants[i];
3214  x = octant.getX();
3215  y = octant.getY();
3216  l = octant.getLevel();
3217  m = octant.getMarker();
3218  for(int i = 0; i < 12; ++i)
3219  info[i] = octant.info[i];
3220  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3221  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3222  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3223  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3224  for(int j = 0; j < 12; ++j){
3225  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3226  }
3227  }
3228  lh -= nofElementsFromSuccessiveToPrevious;
3229  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3230  headSize = lh + 1;
3231  if(headSize == 0)
3232  break;
3233  }
3234  }
3235 
3236  }
3237  uint32_t nofElementsFromPreviousToSuccessive = 0;
3238  contatore = 0;
3239  //build send buffers from Tail
3240  if(tailSize != 0){
3241  for(int p = firstSuccessor; p < nproc; ++p){
3242  if(tailSize < partition[p]){
3243  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3244  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3245  nofElementsFromPreviousToSuccessive = tailSize;
3246 
3247  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3248  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3249  int pos = 0;
3250  uint32_t octantsSize = (uint32_t)octree.octants.size();
3251  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3252  //PACK octants from ft to octantsSize-1
3253  const Class_Octant<2> & octant = octree.octants[i];
3254  x = octant.getX();
3255  y = octant.getY();
3256  l = octant.getLevel();
3257  m = octant.getMarker();
3258  for(int ii = 0; ii < 12; ++ii)
3259  info[ii] = octant.info[ii];
3260  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3261  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3262  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3263  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3264  for(int j = 0; j < 12; ++j){
3265  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3266  }
3267  }
3268  if(nofElementsFromPreviousToSuccessive == tailSize)
3269  break;
3270  ft += nofElementsFromPreviousToSuccessive;
3271  globalFirstTail += nofElementsFromPreviousToSuccessive;
3272  tailSize -= nofElementsFromPreviousToSuccessive;
3273  ++contatore;
3274  }
3275  else{
3276  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3277  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3278  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3279  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3280  int pos = 0;
3281  for(uint32_t i = ft; i <= endOctants; ++i ){
3282  //PACK octants from ft to ft + partition[p] -1
3283  const Class_Octant<2> & octant = octree.octants[i];
3284  x = octant.getX();
3285  y = octant.getY();
3286  l = octant.getLevel();
3287  m = octant.getMarker();
3288  for(int ii = 0; ii < 12; ++ii)
3289  info[ii] = octant.info[ii];
3290  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3291  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3292  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3293  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3294  for(int j = 0; j < 12; ++j){
3295  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3296  }
3297  }
3298  ft += nofElementsFromPreviousToSuccessive;
3299  globalFirstTail += nofElementsFromPreviousToSuccessive;
3300  tailSize -= nofElementsFromPreviousToSuccessive;
3301  if(tailSize == 0)
3302  break;
3303  }
3304  }
3305  }
3306 
3307  //Build receiver sources
3308  vector<Class_Array> recvs(nproc);
3309  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
3310  recvs[rank].array[0] = rank;
3311  int counter = 1;
3312  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3313  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3314  recvs[rank].array[counter] = sit->first;
3315  ++counter;
3316  }
3317  int* nofRecvsPerProc = new int[nproc];
3318  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3319  int globalRecvsBuffSize = 0;
3320  int* displays = new int[nproc];
3321  for(int pp = 0; pp < nproc; ++pp){
3322  displays[pp] = 0;
3323  globalRecvsBuffSize += nofRecvsPerProc[pp];
3324  for(int ppp = 0; ppp < pp; ++ppp){
3325  displays[pp] += nofRecvsPerProc[ppp];
3326  }
3327  }
3328  int* globalRecvsBuff = new int[globalRecvsBuffSize];
3329  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3330 
3331  vector<set<int> > sendersPerProc(nproc);
3332  for(int pin = 0; pin < nproc; ++pin){
3333  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3334  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3335  }
3336  }
3337 
3338  //Communicate Octants (size)
3339  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3340  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3341  int nReq = 0;
3342  map<int,int> recvBufferSizePerProc;
3343  set<int>::iterator senditend = sendersPerProc[rank].end();
3344  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3345  recvBufferSizePerProc[*sendit] = 0;
3346  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3347  ++nReq;
3348  }
3349  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3350  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3351  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3352  ++nReq;
3353  }
3354  MPI_Waitall(nReq,req,stats);
3355 
3356  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
3357  //recvBuffers structure is declared and each buffer is initialized to the right size
3358  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
3359  uint32_t nofNewHead = 0;
3360  uint32_t nofNewTail = 0;
3361  map<int,Class_Comm_Buffer> recvBuffers;
3362  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3363  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3364  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
3365  uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8)));
3366  if(rit->first < rank)
3367  nofNewHead += nofNewPerProc;
3368  else if(rit->first > rank)
3369  nofNewTail += nofNewPerProc;
3370  }
3371  nReq = 0;
3372  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3373  //nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
3374  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3375  ++nReq;
3376  }
3377  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3378  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3379  ++nReq;
3380  }
3381  MPI_Waitall(nReq,req,stats);
3382 
3383  //MOVE RESIDENT TO BEGIN IN OCTANTS
3384  uint32_t resEnd = octree.getNumOctants() - tailOffset;
3385  uint32_t nofResidents = resEnd - headOffset;
3386  int octCounter = 0;
3387  for(uint32_t i = headOffset; i < resEnd; ++i){
3388  octree.octants[octCounter] = octree.octants[i];
3389  ++octCounter;
3390  }
3391  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3392  octree.octants.resize(newCounter);
3393  //MOVE RESIDENTS IN RIGHT POSITION
3394  uint32_t resCounter = nofNewHead + nofResidents - 1;
3395  for(uint32_t k = 0; k < nofResidents ; ++k){
3396  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3397  }
3398 
3399  //UNPACK BUFFERS AND BUILD NEW OCTANTS
3400  newCounter = 0;
3401  bool jumpResident = false;
3402  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3403  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3404  uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8)));
3405  int pos = 0;
3406  if(rbit->first > rank && !jumpResident){
3407  newCounter += nofResidents ;
3408  jumpResident = true;
3409  }
3410  for(int i = nofNewPerProc - 1; i >= 0; --i){
3411  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
3412  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3413  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3414  octree.octants[newCounter] = Class_Octant<2>(l,x,y);
3415  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3416  octree.octants[newCounter].setMarker(m);
3417  for(int j = 0; j < 12; ++j){
3418  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3419  octree.octants[newCounter].info[j] = info[j];
3420  }
3421  ++newCounter;
3422  }
3423  }
3424 #if defined(__INTEL_COMPILER) || defined(__ICC)
3425 #else
3426  octree.octants.shrink_to_fit();
3427 #endif
3428  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3429  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3430  delete [] displays; displays = NULL;
3431  delete [] req; req = NULL;
3432  delete [] stats; stats = NULL;
3433  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3434  //Update and ghosts here
3435  updateLoadBalance();
3436  setPboundGhosts();
3437 
3438  }
3439  delete [] partition;
3440  partition = NULL;
3441 
3442  //Write info of final partition on log
3443  log.writeLog(" ");
3444  log.writeLog(" Final Parallel partition : ");
3445  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3446  for(int ii=1; ii<nproc; ii++){
3447  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3448  }
3449  log.writeLog(" ");
3450  log.writeLog("---------------------------------------------");
3451 
3452  }
3453 
3454  // =============================================================================== //
3455 
3460  template<class Impl>
3461  void loadBalance(Class_Data_LB_Interface<Impl> & userData, dvector* weight = NULL){
3462  //Write info on log
3463  log.writeLog("---------------------------------------------");
3464  log.writeLog(" LOAD BALANCE ");
3465 
3466  uint32_t* partition = new uint32_t [nproc];
3467  if (weight == NULL)
3468  computePartition(partition);
3469  else
3470  computePartition(partition, weight);
3471 
3472  weight = NULL;
3473 
3474  if(serial)
3475  {
3476  log.writeLog(" ");
3477  log.writeLog(" Initial Serial distribution : ");
3478  for(int ii=0; ii<nproc; ii++){
3479  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3480  }
3481 
3482  uint32_t stride = 0;
3483  for(int i = 0; i < rank; ++i)
3484  stride += partition[i];
3485  Class_Local_Tree<2>::OctantsType octantsCopy = octree.octants;
3486  Class_Local_Tree<2>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
3487  Class_Local_Tree<2>::OctantsType::const_iterator last = first + partition[rank];
3488  octree.octants.assign(first, last);
3489 #if defined(__INTEL_COMPILER) || defined(__ICC)
3490 #else
3491  octree.octants.shrink_to_fit();
3492 #endif
3493  first = octantsCopy.end();
3494  last = octantsCopy.end();
3495 
3496  userData.assign(stride,partition[rank]);
3497 
3498  //Update and build ghosts here
3499  updateLoadBalance();
3500  setPboundGhosts();
3501  }
3502  else
3503  {
3504  log.writeLog(" ");
3505  log.writeLog(" Initial Parallel partition : ");
3506  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3507  for(int ii=1; ii<nproc; ii++){
3508  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3509  }
3510 
3511  //empty ghosts
3512  octree.ghosts.clear();
3513  octree.size_ghosts = 0;
3514  //compute new partition range globalidx
3515  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
3516  for(int p = 0; p < nproc; ++p){
3517  newPartitionRangeGlobalidx[p] = 0;
3518  for(int pp = 0; pp <= p; ++pp)
3519  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3520  --newPartitionRangeGlobalidx[p];
3521  }
3522 
3523  //find resident octants local offset lastHead(lh) and firstTail(ft)
3524  int32_t lh,ft;
3525  if(rank == 0)
3526  lh = -1;
3527  else{
3528  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3529  }
3530  if(lh < 0)
3531  lh = - 1;
3532  else if(lh > octree.octants.size() - 1)
3533  lh = octree.octants.size() - 1;
3534 
3535  if(rank == nproc - 1)
3536  ft = octree.octants.size();
3537  else if(rank == 0)
3538  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3539  else{
3540  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3541  }
3542  if(ft > (int32_t)(octree.octants.size() - 1))
3543  ft = octree.octants.size();
3544  else if(ft < 0)
3545  ft = 0;
3546 
3547  //compute size Head and size Tail
3548  uint32_t headSize = (uint32_t)(lh + 1);
3549  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3550  uint32_t headOffset = headSize;
3551  uint32_t tailOffset = tailSize;
3552 
3553  //build send buffers
3554  map<int,Class_Comm_Buffer> sendBuffers;
3555 
3556  //Compute first predecessor and first successor to send buffers to
3557  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
3558  int64_t globalLastHead = (int64_t) lh;
3559  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
3560  int firstPredecessor = -1;
3561  int firstSuccessor = nproc;
3562  if(rank != 0){
3563  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3564  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3565  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3566  for(int pre = rank - 1; pre >=0; --pre){
3567  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3568  firstPredecessor = pre;
3569  }
3570  for(int post = rank + 1; post < nproc; ++post){
3571  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3572  firstSuccessor = post;
3573  }
3574  }
3575  else if(rank == 0){
3576  firstSuccessor = 1;
3577  }
3578  MPI_Barrier(comm); //da spostare prima della prima comunicazione
3579 
3580  uint32_t x,y;
3581  uint8_t l;
3582  int8_t m;
3583  bool info[12];
3584  int intBuffer = 0;
3585  int contatore = 0;
3586  //build send buffers from Head
3587  uint32_t nofElementsFromSuccessiveToPrevious = 0;
3588  if(headSize != 0){
3589  for(int p = firstPredecessor; p >= 0; --p){
3590  if(headSize < partition[p]){
3591  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3592  intBuffer = abs(intBuffer);
3593  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3594  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3595  nofElementsFromSuccessiveToPrevious = headSize;
3596 
3597  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3598  //compute size of data in buffers
3599  if(userData.fixedSize()){
3600  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
3601  }
3602  else{
3603  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3604  buffSize += userData.size(i);
3605  }
3606  }
3607  //add room for int, number of octants in this buffer
3608  buffSize += sizeof(int);
3609  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3610  //store the number of octants at the beginning of the buffer
3611  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3612  //USE BUFFER POS
3613  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3614  //PACK octants from 0 to lh in sendBuffer[p]
3615  const Class_Octant<2> & octant = octree.octants[i];
3616  x = octant.getX();
3617  y = octant.getY();
3618  l = octant.getLevel();
3619  m = octant.getMarker();
3620  for(int j = 0; j < 12; ++j)
3621  info[j] = octant.info[j];
3622  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3623  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3624  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3625  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3626  for(int j = 0; j < 12; ++j){
3627  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3628 
3629  }
3630  userData.gather(sendBuffers[p],i);
3631  }
3632  if(nofElementsFromSuccessiveToPrevious == headSize)
3633  break;
3634 
3635  lh -= nofElementsFromSuccessiveToPrevious;
3636  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3637  headSize = lh + 1;
3638  ++contatore;
3639  }
3640  else{
3641  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3642  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3643  //compute size of data in buffers
3644  if(userData.fixedSize()){
3645  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
3646  }
3647  else{
3648  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3649  buffSize += userData.size(i);
3650  }
3651  }
3652  //add room for int, number of octants in this buffer
3653  buffSize += sizeof(int);
3654  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3655  //store the number of octants at the beginning of the buffer
3656  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3657  //USE BUFFER POS
3658  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3659  //pack octants from lh - partition[p] to lh
3660  const Class_Octant<2> & octant = octree.octants[i];
3661  x = octant.getX();
3662  y = octant.getY();
3663  l = octant.getLevel();
3664  m = octant.getMarker();
3665  for(int j = 0; j < 12; ++j)
3666  info[j] = octant.info[j];
3667  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3668  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3669  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3670  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3671  for(int j = 0; j < 12; ++j){
3672  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3673  }
3674  userData.gather(sendBuffers[p],i);
3675  }
3676  lh -= nofElementsFromSuccessiveToPrevious;
3677  globalLastHead -= nofElementsFromSuccessiveToPrevious;
3678  headSize = lh + 1;
3679  if(headSize == 0)
3680  break;
3681  }
3682  }
3683 
3684  }
3685  uint32_t nofElementsFromPreviousToSuccessive = 0;
3686  contatore = 0;
3687  //build send buffers from Tail
3688  if(tailSize != 0){
3689  for(int p = firstSuccessor; p < nproc; ++p){
3690  if(tailSize < partition[p]){
3691  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3692  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3693  nofElementsFromPreviousToSuccessive = tailSize;
3694 
3695  uint32_t octantsSize = (uint32_t)octree.octants.size();
3696  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3697  //compute size of data in buffers
3698  if(userData.fixedSize()){
3699  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
3700  }
3701  else{
3702  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3703  buffSize += userData.size(i);
3704  }
3705  }
3706  //add room for int, number of octants in this buffer
3707  buffSize += sizeof(int);
3708  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3709  //store the number of octants at the beginning of the buffer
3710  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3711  //USE BUFFER POS
3712  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3713  //PACK octants from ft to octantsSize-1
3714  const Class_Octant<2> & octant = octree.octants[i];
3715  x = octant.getX();
3716  y = octant.getY();
3717  l = octant.getLevel();
3718  m = octant.getMarker();
3719  for(int j = 0; j < 12; ++j)
3720  info[j] = octant.info[j];
3721  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3722  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3723  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3724  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3725  for(int j = 0; j < 12; ++j){
3726  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3727  }
3728  userData.gather(sendBuffers[p],i);
3729  }
3730  if(nofElementsFromPreviousToSuccessive == tailSize)
3731  break;
3732  ft += nofElementsFromPreviousToSuccessive;
3733  globalFirstTail += nofElementsFromPreviousToSuccessive;
3734  tailSize -= nofElementsFromPreviousToSuccessive;
3735  ++contatore;
3736  }
3737  else{
3738  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3739  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3740  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
3741  //compute size of data in buffers
3742  if(userData.fixedSize()){
3743  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
3744  }
3745  else{
3746  for(uint32_t i = ft; i <= endOctants; ++i){
3747  buffSize += userData.size(i);
3748  }
3749  }
3750  //add room for int, number of octants in this buffer
3751  buffSize += sizeof(int);
3752  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
3753  //store the number of octants at the beginning of the buffer
3754  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3755  for(uint32_t i = ft; i <= endOctants; ++i ){
3756  //PACK octants from ft to ft + partition[p] -1
3757  const Class_Octant<2> & octant = octree.octants[i];
3758  x = octant.getX();
3759  y = octant.getY();
3760  l = octant.getLevel();
3761  m = octant.getMarker();
3762  for(int j = 0; j < 12; ++j)
3763  info[j] = octant.info[j];
3764  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3765  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3766  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3767  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3768  for(int j = 0; j < 12; ++j){
3769  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3770  }
3771  userData.gather(sendBuffers[p],i);
3772  }
3773  ft += nofElementsFromPreviousToSuccessive;
3774  globalFirstTail += nofElementsFromPreviousToSuccessive;
3775  tailSize -= nofElementsFromPreviousToSuccessive;
3776  if(tailSize == 0)
3777  break;
3778  }
3779  }
3780  }
3781 
3782  //Build receiver sources
3783  vector<Class_Array> recvs(nproc);
3784  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
3785  recvs[rank].array[0] = rank;
3786  int counter = 1;
3787  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3788  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3789  recvs[rank].array[counter] = sit->first;
3790  ++counter;
3791  }
3792  int* nofRecvsPerProc = new int[nproc];
3793  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3794  int globalRecvsBuffSize = 0;
3795  int* displays = new int[nproc];
3796  for(int pp = 0; pp < nproc; ++pp){
3797  displays[pp] = 0;
3798  globalRecvsBuffSize += nofRecvsPerProc[pp];
3799  for(int ppp = 0; ppp < pp; ++ppp){
3800  displays[pp] += nofRecvsPerProc[ppp];
3801  }
3802  }
3803  //int globalRecvsBuff[globalRecvsBuffSize];
3804  int* globalRecvsBuff = new int[globalRecvsBuffSize];
3805  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3806 
3807  vector<set<int> > sendersPerProc(nproc);
3808  for(int pin = 0; pin < nproc; ++pin){
3809  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3810  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3811  }
3812  }
3813 
3814  //Communicate Octants (size)
3815  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3816  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3817  int nReq = 0;
3818  map<int,int> recvBufferSizePerProc;
3819  set<int>::iterator senditend = sendersPerProc[rank].end();
3820  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3821  recvBufferSizePerProc[*sendit] = 0;
3822  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3823  ++nReq;
3824  }
3825  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3826  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3827  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3828  ++nReq;
3829  }
3830  MPI_Waitall(nReq,req,stats);
3831 
3832  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
3833  //recvBuffers structure is declared and each buffer is initialized to the right size
3834  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
3835  uint32_t nofNewHead = 0;
3836  uint32_t nofNewTail = 0;
3837  map<int,Class_Comm_Buffer> recvBuffers;
3838 
3839  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3840  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3841  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
3842  }
3843 
3844  nReq = 0;
3845  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3846  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3847  ++nReq;
3848  }
3849  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3850  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3851  ++nReq;
3852  }
3853  MPI_Waitall(nReq,req,stats);
3854 
3855  //Unpack number of octants per sender
3856  map<int,uint32_t> nofNewOverProcs;
3857  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3858  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3859  uint32_t nofNewPerProc;
3860  MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
3861  nofNewOverProcs[rbit->first] = nofNewPerProc;
3862  if(rbit->first < rank)
3863  nofNewHead += nofNewPerProc;
3864  else if(rbit->first > rank)
3865  nofNewTail += nofNewPerProc;
3866  }
3867 
3868  //MOVE RESIDENT TO BEGIN IN OCTANTS
3869  uint32_t resEnd = octree.getNumOctants() - tailOffset;
3870  uint32_t nofResidents = resEnd - headOffset;
3871  uint32_t octCounter = 0;
3872  for(uint32_t i = headOffset; i < resEnd; ++i){
3873  octree.octants[octCounter] = octree.octants[i];
3874  userData.move(i,octCounter);
3875  ++octCounter;
3876  }
3877  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3878  octree.octants.resize(newCounter);
3879  userData.resize(newCounter);
3880  //MOVE RESIDENTS IN RIGHT POSITION
3881  uint32_t resCounter = nofNewHead + nofResidents - 1;
3882  for(uint32_t k = 0; k < nofResidents ; ++k){
3883  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3884  userData.move(nofResidents - k - 1,resCounter - k);
3885  }
3886 
3887  //UNPACK BUFFERS AND BUILD NEW OCTANTS
3888  newCounter = 0;
3889  bool jumpResident = false;
3890 
3891  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3892  uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];
3893  if(rbit->first > rank && !jumpResident){
3894  newCounter += nofResidents ;
3895  jumpResident = true;
3896  }
3897  for(int i = nofNewPerProc - 1; i >= 0; --i){
3898  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
3899  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
3900  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
3901  octree.octants[newCounter] = Class_Octant<2>(l,x,y);
3902  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
3903  octree.octants[newCounter].setMarker(m);
3904  for(int j = 0; j < 12; ++j){
3905  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
3906  octree.octants[newCounter].info[j] = info[j];
3907  }
3908  userData.scatter(rbit->second,newCounter);
3909  ++newCounter;
3910  }
3911  }
3912 #if defined(__INTEL_COMPILER) || defined(__ICC)
3913 #else
3914  octree.octants.shrink_to_fit();
3915 #endif
3916  userData.shrink();
3917 
3918  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3919  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3920  delete [] displays; displays = NULL;
3921  delete [] req; req = NULL;
3922  delete [] stats; stats = NULL;
3923  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3924 
3925  //Update and ghosts here
3926  updateLoadBalance();
3927  setPboundGhosts();
3928  uint32_t nofGhosts = getNumGhosts();
3929  userData.resizeGhost(nofGhosts);
3930 
3931  }
3932  delete [] partition;
3933  partition = NULL;
3934 
3935  //Write info of final partition on log
3936  log.writeLog(" ");
3937  log.writeLog(" Final Parallel partition : ");
3938  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3939  for(int ii=1; ii<nproc; ii++){
3940  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3941  }
3942  log.writeLog(" ");
3943  log.writeLog("---------------------------------------------");
3944 
3945 
3946  }
3947 
3948  // =============================================================================== //
3949 
3955  template<class Impl>
3956  void loadBalance(Class_Data_LB_Interface<Impl> & userData, uint8_t & level){
3957 
3958  //Write info on log
3959  log.writeLog("---------------------------------------------");
3960  log.writeLog(" LOAD BALANCE ");
3961 
3962  uint32_t* partition = new uint32_t [nproc];
3963  computePartition(partition, level);
3964  if(serial)
3965  {
3966  log.writeLog(" ");
3967  log.writeLog(" Initial Serial distribution : ");
3968  for(int ii=0; ii<nproc; ii++){
3969  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3970  }
3971 
3972  uint32_t stride = 0;
3973  for(int i = 0; i < rank; ++i)
3974  stride += partition[i];
3975  Class_Local_Tree<2>::OctantsType octantsCopy = octree.octants;
3976  Class_Local_Tree<2>::OctantsType::const_iterator first = octantsCopy.begin() + stride;
3977  Class_Local_Tree<2>::OctantsType::const_iterator last = first + partition[rank];
3978  octree.octants.assign(first, last);
3979 #if defined(__INTEL_COMPILER) || defined(__ICC)
3980 #else
3981  octree.octants.shrink_to_fit();
3982 #endif
3983  first = octantsCopy.end();
3984  last = octantsCopy.end();
3985 
3986  userData.assign(stride,partition[rank]);
3987 
3988  //Update and build ghosts here
3989  updateLoadBalance();
3990  setPboundGhosts();
3991 
3992  }
3993  else
3994  {
3995  log.writeLog(" ");
3996  log.writeLog(" Initial Parallel partition : ");
3997  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3998  for(int ii=1; ii<nproc; ii++){
3999  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4000  }
4001 
4002  //empty ghosts
4003  octree.ghosts.clear();
4004  octree.size_ghosts = 0;
4005  //compute new partition range globalidx
4006  uint64_t* newPartitionRangeGlobalidx = new uint64_t[nproc];
4007  for(int p = 0; p < nproc; ++p){
4008  newPartitionRangeGlobalidx[p] = 0;
4009  for(int pp = 0; pp <= p; ++pp)
4010  newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
4011  --newPartitionRangeGlobalidx[p];
4012  }
4013 
4014  //find resident octants local offset lastHead(lh) and firstTail(ft)
4015  int32_t lh,ft;
4016  if(rank == 0)
4017  lh = -1;
4018  else{
4019  lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
4020  }
4021  if(lh < 0)
4022  lh = - 1;
4023  else if(lh > octree.octants.size() - 1)
4024  lh = octree.octants.size() - 1;
4025 
4026  if(rank == nproc - 1)
4027  ft = octree.octants.size();
4028  else if(rank == 0)
4029  ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
4030  else{
4031  ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
4032  }
4033  if(ft > (int32_t)(octree.octants.size() - 1))
4034  ft = octree.octants.size();
4035  else if(ft < 0)
4036  ft = 0;
4037 
4038  //compute size Head and size Tail
4039  uint32_t headSize = (uint32_t)(lh + 1);
4040  uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
4041  uint32_t headOffset = headSize;
4042  uint32_t tailOffset = tailSize;
4043 
4044  //build send buffers
4045  map<int,Class_Comm_Buffer> sendBuffers;
4046 
4047  //Compute first predecessor and first successor to send buffers to
4048  int64_t firstOctantGlobalIdx = 0;// offset to compute global index of each octant in every process
4049  int64_t globalLastHead = (int64_t) lh;
4050  int64_t globalFirstTail = (int64_t) ft; //lastHead and firstTail in global ordering
4051  int firstPredecessor = -1;
4052  int firstSuccessor = nproc;
4053  if(rank != 0){
4054  firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
4055  globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
4056  globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
4057  for(int pre = rank - 1; pre >=0; --pre){
4058  if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
4059  firstPredecessor = pre;
4060  }
4061  for(int post = rank + 1; post < nproc; ++post){
4062  if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
4063  firstSuccessor = post;
4064  }
4065  }
4066  else if(rank == 0){
4067  firstSuccessor = 1;
4068  }
4069  MPI_Barrier(comm); //da spostare prima della prima comunicazione
4070 
4071  uint32_t x,y;
4072  uint8_t l;
4073  int8_t m;
4074  bool info[12];
4075  int intBuffer = 0;
4076  int contatore = 0;
4077  //build send buffers from Head
4078  uint32_t nofElementsFromSuccessiveToPrevious = 0;
4079  if(headSize != 0){
4080  for(int p = firstPredecessor; p >= 0; --p){
4081  if(headSize < partition[p]){
4082  intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
4083  intBuffer = abs(intBuffer);
4084  nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
4085  if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
4086  nofElementsFromSuccessiveToPrevious = headSize;
4087 
4088  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
4089  //compute size of data in buffers
4090  if(userData.fixedSize()){
4091  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
4092  }
4093  else{
4094  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4095  buffSize += userData.size(i);
4096  }
4097  }
4098  //add room for int, number of octants in this buffer
4099  buffSize += sizeof(int);
4100  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4101  //store the number of octants at the beginning of the buffer
4102  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4103  //USE BUFFER POS
4104  for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4105  //PACK octants from 0 to lh in sendBuffer[p]
4106  const Class_Octant<2> & octant = octree.octants[i];
4107  x = octant.getX();
4108  y = octant.getY();
4109  l = octant.getLevel();
4110  m = octant.getMarker();
4111  for(int j = 0; j < 12; ++j)
4112  info[j] = octant.info[j];
4113  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4114  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4115  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4116  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4117  for(int j = 0; j < 12; ++j){
4118  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4119 
4120  }
4121  userData.gather(sendBuffers[p],i);
4122  }
4123  if(nofElementsFromSuccessiveToPrevious == headSize)
4124  break;
4125 
4126  lh -= nofElementsFromSuccessiveToPrevious;
4127  globalLastHead -= nofElementsFromSuccessiveToPrevious;
4128  headSize = lh + 1;
4129  ++contatore;
4130  }
4131  else{
4132  nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
4133  int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
4134  //compute size of data in buffers
4135  if(userData.fixedSize()){
4136  buffSize += userData.fixedSize() * nofElementsFromSuccessiveToPrevious;
4137  }
4138  else{
4139  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4140  buffSize += userData.size(i);
4141  }
4142  }
4143  //add room for int, number of octants in this buffer
4144  buffSize += sizeof(int);
4145  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4146  //store the number of octants at the beginning of the buffer
4147  MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4148  //USE BUFFER POS
4149  for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4150  //pack octants from lh - partition[p] to lh
4151  const Class_Octant<2> & octant = octree.octants[i];
4152  x = octant.getX();
4153  y = octant.getY();
4154  l = octant.getLevel();
4155  m = octant.getMarker();
4156  for(int j = 0; j < 12; ++j)
4157  info[j] = octant.info[j];
4158  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4159  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4160  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4161  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4162  for(int j = 0; j < 12; ++j){
4163  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4164  }
4165  userData.gather(sendBuffers[p],i);
4166  }
4167  lh -= nofElementsFromSuccessiveToPrevious;
4168  globalLastHead -= nofElementsFromSuccessiveToPrevious;
4169  headSize = lh + 1;
4170  if(headSize == 0)
4171  break;
4172  }
4173  }
4174 
4175  }
4176  uint32_t nofElementsFromPreviousToSuccessive = 0;
4177  contatore = 0;
4178  //build send buffers from Tail
4179  if(tailSize != 0){
4180  for(int p = firstSuccessor; p < nproc; ++p){
4181  if(tailSize < partition[p]){
4182  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4183  if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
4184  nofElementsFromPreviousToSuccessive = tailSize;
4185 
4186  uint32_t octantsSize = (uint32_t)octree.octants.size();
4187  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
4188  //compute size of data in buffers
4189  if(userData.fixedSize()){
4190  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
4191  }
4192  else{
4193  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4194  buffSize += userData.size(i);
4195  }
4196  }
4197  //add room for int, number of octants in this buffer
4198  buffSize += sizeof(int);
4199  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4200  //store the number of octants at the beginning of the buffer
4201  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4202  //USE BUFFER POS
4203  for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4204  //PACK octants from ft to octantsSize-1
4205  const Class_Octant<2> & octant = octree.octants[i];
4206  x = octant.getX();
4207  y = octant.getY();
4208  l = octant.getLevel();
4209  m = octant.getMarker();
4210  for(int j = 0; j < 12; ++j)
4211  info[j] = octant.info[j];
4212  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4213  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4214  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4215  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4216  for(int j = 0; j < 12; ++j){
4217  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4218  }
4219  userData.gather(sendBuffers[p],i);
4220  }
4221  if(nofElementsFromPreviousToSuccessive == tailSize)
4222  break;
4223  ft += nofElementsFromPreviousToSuccessive;
4224  globalFirstTail += nofElementsFromPreviousToSuccessive;
4225  tailSize -= nofElementsFromPreviousToSuccessive;
4226  ++contatore;
4227  }
4228  else{
4229  nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4230  uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
4231  int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((double)global2D.octantBytes / (double)(CHAR_BIT/8));
4232  //compute size of data in buffers
4233  if(userData.fixedSize()){
4234  buffSize += userData.fixedSize() * nofElementsFromPreviousToSuccessive;
4235  }
4236  else{
4237  for(uint32_t i = ft; i <= endOctants; ++i){
4238  buffSize += userData.size(i);
4239  }
4240  }
4241  //add room for int, number of octants in this buffer
4242  buffSize += sizeof(int);
4243  sendBuffers[p] = Class_Comm_Buffer(buffSize,'a',comm);
4244  //store the number of octants at the beginning of the buffer
4245  MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4246  for(uint32_t i = ft; i <= endOctants; ++i ){
4247  //PACK octants from ft to ft + partition[p] -1
4248  const Class_Octant<2> & octant = octree.octants[i];
4249  x = octant.getX();
4250  y = octant.getY();
4251  l = octant.getLevel();
4252  m = octant.getMarker();
4253  for(int j = 0; j < 12; ++j)
4254  info[j] = octant.info[j];
4255  error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4256  error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4257  error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4258  error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4259  for(int j = 0; j < 12; ++j){
4260  MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4261  }
4262  userData.gather(sendBuffers[p],i);
4263  }
4264  ft += nofElementsFromPreviousToSuccessive;
4265  globalFirstTail += nofElementsFromPreviousToSuccessive;
4266  tailSize -= nofElementsFromPreviousToSuccessive;
4267  if(tailSize == 0)
4268  break;
4269  }
4270  }
4271  }
4272 
4273  //Build receiver sources
4274  vector<Class_Array> recvs(nproc);
4275  recvs[rank] = Class_Array((uint32_t)sendBuffers.size()+1,-1);
4276  recvs[rank].array[0] = rank;
4277  int counter = 1;
4278  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4279  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4280  recvs[rank].array[counter] = sit->first;
4281  ++counter;
4282  }
4283  int* nofRecvsPerProc = new int[nproc];
4284  error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
4285  int globalRecvsBuffSize = 0;
4286  int* displays = new int[nproc];
4287  for(int pp = 0; pp < nproc; ++pp){
4288  displays[pp] = 0;
4289  globalRecvsBuffSize += nofRecvsPerProc[pp];
4290  for(int ppp = 0; ppp < pp; ++ppp){
4291  displays[pp] += nofRecvsPerProc[ppp];
4292  }
4293  }
4294  int* globalRecvsBuff = new int[globalRecvsBuffSize];
4295  error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
4296 
4297  vector<set<int> > sendersPerProc(nproc);
4298  for(int pin = 0; pin < nproc; ++pin){
4299  for(int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
4300  sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
4301  }
4302  }
4303 
4304  //Communicate Octants (size)
4305  MPI_Request* req = new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
4306  MPI_Status* stats = new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
4307  int nReq = 0;
4308  map<int,int> recvBufferSizePerProc;
4309  set<int>::iterator senditend = sendersPerProc[rank].end();
4310  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4311  recvBufferSizePerProc[*sendit] = 0;
4312  error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
4313  ++nReq;
4314  }
4315  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4316  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4317  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4318  ++nReq;
4319  }
4320  MPI_Waitall(nReq,req,stats);
4321 
4322  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
4323  //recvBuffers structure is declared and each buffer is initialized to the right size
4324  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
4325  uint32_t nofNewHead = 0;
4326  uint32_t nofNewTail = 0;
4327  map<int,Class_Comm_Buffer> recvBuffers;
4328 
4329  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4330  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4331  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
4332  }
4333 
4334  nReq = 0;
4335  for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4336  error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
4337  ++nReq;
4338  }
4339  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4340  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4341  ++nReq;
4342  }
4343  MPI_Waitall(nReq,req,stats);
4344 
4345  //Unpack number of octants per sender
4346  map<int,uint32_t> nofNewOverProcs;
4347  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
4348  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4349  uint32_t nofNewPerProc;
4350  MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
4351  nofNewOverProcs[rbit->first] = nofNewPerProc;
4352  if(rbit->first < rank)
4353  nofNewHead += nofNewPerProc;
4354  else if(rbit->first > rank)
4355  nofNewTail += nofNewPerProc;
4356  }
4357 
4358  //MOVE RESIDENT TO BEGIN IN OCTANTS
4359  uint32_t resEnd = octree.getNumOctants() - tailOffset;
4360  uint32_t nofResidents = resEnd - headOffset;
4361  uint32_t octCounter = 0;
4362  for(uint32_t i = headOffset; i < resEnd; ++i){
4363  octree.octants[octCounter] = octree.octants[i];
4364  //TODO move data - DONE
4365  userData.move(i,octCounter);
4366  ++octCounter;
4367  }
4368  uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
4369  octree.octants.resize(newCounter);
4370  userData.resize(newCounter);
4371  //MOVE RESIDENTS IN RIGHT POSITION
4372  uint32_t resCounter = nofNewHead + nofResidents - 1;
4373  for(uint32_t k = 0; k < nofResidents ; ++k){
4374  octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
4375  //TODO move data - DON
4376  userData.move(nofResidents - k - 1,resCounter - k);
4377  }
4378 
4379  //UNPACK BUFFERS AND BUILD NEW OCTANTS
4380  newCounter = 0;
4381  bool jumpResident = false;
4382 
4383  for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4384  //TODO change new octants counting, probably you have to communicate the number of news per proc
4385  uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];
4386  if(rbit->first > rank && !jumpResident){
4387  newCounter += nofResidents ;
4388  jumpResident = true;
4389  }
4390  for(int i = nofNewPerProc - 1; i >= 0; --i){
4391  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
4392  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
4393  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
4394  octree.octants[newCounter] = Class_Octant<2>(l,x,y);
4395  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
4396  octree.octants[newCounter].setMarker(m);
4397  for(int j = 0; j < 12; ++j){
4398  error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
4399  octree.octants[newCounter].info[j] = info[j];
4400  }
4401  //TODO Unpack data
4402  userData.scatter(rbit->second,newCounter);
4403  ++newCounter;
4404  }
4405  }
4406 #if defined(__INTEL_COMPILER) || defined(__ICC)
4407 #else
4408  octree.octants.shrink_to_fit();
4409 #endif
4410  userData.shrink();
4411 
4412  delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
4413  delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
4414  delete [] displays; displays = NULL;
4415  delete [] req; req = NULL;
4416  delete [] stats; stats = NULL;
4417  delete [] globalRecvsBuff; globalRecvsBuff = NULL;
4418 
4419  //Update and ghosts here
4420  updateLoadBalance();
4421  setPboundGhosts();
4422  uint32_t nofGhosts = getNumGhosts();
4423  userData.resizeGhost(nofGhosts);
4424 
4425  }
4426  delete [] partition;
4427  partition = NULL;
4428 
4429  //Write info of final partition on log
4430  log.writeLog(" ");
4431  log.writeLog(" Final Parallel partition : ");
4432  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4433  for(int ii=1; ii<nproc; ii++){
4434  log.writeLog(" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4435  }
4436  log.writeLog(" ");
4437  log.writeLog("---------------------------------------------");
4438 
4439 
4440  }
4441 #endif /* NOMPI */
4442  // =============================================================================== //
4443 
4444 private:
4445  void updateAdapt() {
4446 #if NOMPI==0
4447  if(serial)
4448  {
4449 #endif
4450  max_depth = octree.local_max_depth;
4451  global_num_octants = octree.getNumOctants();
4452  for(int p = 0; p < nproc; ++p){
4453  partition_range_globalidx[p] = global_num_octants - 1;
4454  }
4455 #if NOMPI==0
4456  }
4457  else
4458  {
4459  //update max_depth
4460  error_flag = MPI_Allreduce(&octree.local_max_depth,&max_depth,1,MPI_UINT8_T,MPI_MAX,comm);
4461  //update global_num_octants
4462  uint64_t local_num_octants = octree.getNumOctants();
4463  error_flag = MPI_Allreduce(&local_num_octants,&global_num_octants,1,MPI_UINT64_T,MPI_SUM,comm);
4464  //update partition_range_globalidx
4465  uint64_t* rbuff = new uint64_t[nproc];
4466  error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
4467  for(int p = 0; p < nproc; ++p){
4468  partition_range_globalidx[p] = 0;
4469  for(int pp = 0; pp <=p; ++pp)
4470  partition_range_globalidx[p] += rbuff[pp];
4471  --partition_range_globalidx[p];
4472  }
4473  //update partition_range_position
4474  uint64_t lastDescMorton = octree.getLastDesc().computeMorton();
4475  error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
4476  uint64_t firstDescMorton = octree.getFirstDesc().computeMorton();
4477  error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
4478  delete [] rbuff; rbuff = NULL;
4479  }
4480 #endif
4481  }
4482 
4483  // =============================================================================== //
4484 
4485  void updateAfterCoarse(){
4486 #if NOMPI==0
4487  if(serial){
4488 #endif
4489  updateAdapt();
4490 #if NOMPI==0
4491  }
4492  else{
4493  //Only if parallel
4494  updateAdapt();
4495  uint64_t lastDescMortonPre, firstDescMortonPost;
4496  lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4497  firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4498  octree.checkCoarse(lastDescMortonPre, firstDescMortonPost);
4499  updateAdapt();
4500  }
4501 #endif
4502  }
4503 
4504  // =============================================================================== //
4505 
4506  void updateAfterCoarse(u32vector & mapidx){
4507 #if NOMPI==0
4508  if(serial){
4509 #endif
4510  updateAdapt();
4511 #if NOMPI==0
4512  }
4513  else{
4514  //Only if parallel
4515  updateAdapt();
4516  uint64_t lastDescMortonPre, firstDescMortonPost;
4517  lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4518  firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4519  octree.checkCoarse(lastDescMortonPre, firstDescMortonPost, mapidx);
4520  updateAdapt();
4521  }
4522 
4523 #endif
4524  }
4525 
4526  //TODO Update after coarse with intersections
4527 
4528  // =============================================================================== //
4529 
4530 #if NOMPI==0
4531  void commMarker() {
4532  //PACK (mpi) LEVEL AND MARKER OF BORDER OCTANTS IN CHAR BUFFERS WITH SIZE (map value) TO BE SENT TO THE RIGHT PROCESS (map key)
4533  //it visits every element in bordersPerProc (one for every neighbor proc)
4534  //for every element it visits the border octants it contains and pack its marker in a new structure, sendBuffers
4535  //this map has an entry Class_Comm_Buffer for every proc containing the size in bytes of the buffer and the octants marker
4536  //to be sent to that proc packed in a char* buffer
4537  int8_t marker;
4538  bool mod;
4539  map<int,Class_Comm_Buffer> sendBuffers;
4540  map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
4541  uint32_t pbordersOversize = 0;
4542  for(map<int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
4543  pbordersOversize += bit->second.size();
4544  int buffSize = bit->second.size() * (int)ceil((double)(global2D.markerBytes + global2D.boolBytes) / (double)(CHAR_BIT/8));
4545  int key = bit->first;
4546  const vector<uint32_t> & value = bit->second;
4547  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
4548  int pos = 0;
4549  int nofBorders = value.size();
4550  for(int i = 0; i < nofBorders; ++i){
4551  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
4552  const Class_Octant<2> & octant = octree.octants[value[i]];
4553  marker = octant.getMarker();
4554  mod = octant.info[11];
4555  error_flag = MPI_Pack(&marker,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4556  error_flag = MPI_Pack(&mod,1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4557  }
4558  }
4559 
4560  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
4561  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
4562  //and stored in the recvBufferSizePerProc structure
4563  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
4564  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
4565  int nReq = 0;
4566  map<int,int> recvBufferSizePerProc;
4567  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4568  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4569  recvBufferSizePerProc[sit->first] = 0;
4570  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
4571  ++nReq;
4572  }
4573  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4574  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4575  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4576  ++nReq;
4577  }
4578  MPI_Waitall(nReq,req,stats);
4579 
4580  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
4581  //recvBuffers structure is declared and each buffer is initialized to the right size
4582  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
4583  //at the same time every process compute the size in bytes of all the level and marker of ghost octants
4584  uint32_t nofBytesOverProc = 0;
4585  map<int,Class_Comm_Buffer> recvBuffers;
4586  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4587  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4588  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
4589  }
4590  nReq = 0;
4591  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4592  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
4593  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
4594  ++nReq;
4595  }
4596  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4597  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4598  ++nReq;
4599  }
4600  MPI_Waitall(nReq,req,stats);
4601 
4602  //UNPACK BUFFERS AND BUILD GHOSTS CONTAINER OF CLASS_LOCAL_TREE
4603  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked octant by octant.
4604  //every ghost octant is built and put in the ghost vector
4605  uint32_t ghostCounter = 0;
4606  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
4607  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
4608  int pos = 0;
4609  int nofGhostsPerProc = int(rrit->second.commBufferSize / ((uint32_t) (global2D.markerBytes + global2D.boolBytes)));
4610  for(int i = 0; i < nofGhostsPerProc; ++i){
4611  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&marker,1,MPI_INT8_T,comm);
4612  octree.ghosts[ghostCounter].setMarker(marker);
4613  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&mod,1,MPI::BOOL,comm);
4614  octree.ghosts[ghostCounter].info[11] = mod;
4615  ++ghostCounter;
4616  }
4617  }
4618  recvBuffers.clear();
4619  sendBuffers.clear();
4620  recvBufferSizePerProc.clear();
4621  delete [] req; req = NULL;
4622  delete [] stats; stats = NULL;
4623 
4624  }
4625 #endif
4626 
4627  //==============================================================
4628 
4629  void balance21(bool const first){
4630 #if NOMPI==0
4631  bool globalDone = true, localDone = false;
4632  int iteration = 0;
4633 
4634  commMarker();
4635  octree.preBalance21(true);
4636 
4637  if (first){
4638  log.writeLog("---------------------------------------------");
4639  log.writeLog(" 2:1 BALANCE (balancing Marker before Adapt)");
4640  log.writeLog(" ");
4641  log.writeLog(" Iterative procedure ");
4642  log.writeLog(" ");
4643  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4644 
4645  commMarker();
4646 
4647  localDone = octree.localBalance(true);
4648  commMarker();
4649  octree.preBalance21(false);
4650  MPI_Barrier(comm);
4651  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4652 
4653  while(globalDone){
4654  iteration++;
4655  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4656  commMarker();
4657  localDone = octree.localBalance(false);
4658  commMarker();
4659  octree.preBalance21(false);
4660  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4661  }
4662 
4663  commMarker();
4664  log.writeLog(" Iteration : Finalizing ");
4665  log.writeLog(" ");
4666  //localDone = octree.localBalance(false);
4667  //commMarker();
4668  //octree.preBalance21(true);
4669  //commMarker();
4670 
4671  log.writeLog(" 2:1 Balancing reached ");
4672  log.writeLog(" ");
4673  log.writeLog("---------------------------------------------");
4674 
4675  }
4676  else{
4677 
4678  commMarker();
4679  MPI_Barrier(comm);
4680  localDone = octree.localBalanceAll(true);
4681  commMarker();
4682  octree.preBalance21(false);
4683  MPI_Barrier(comm);
4684  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4685 
4686  while(globalDone){
4687  iteration++;
4688  commMarker();
4689  localDone = octree.localBalanceAll(false);
4690  commMarker();
4691  octree.preBalance21(false);
4692  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4693  }
4694 
4695  commMarker();
4696 // localDone = octree.localBalance(false);
4697 // commMarker();
4698 // octree.preBalance21(false);
4699 // commMarker();
4700 
4701  }
4702 #else
4703  bool localDone = false;
4704  int iteration = 0;
4705 
4706  octree.preBalance21(true);
4707 
4708  if (first){
4709  log.writeLog("---------------------------------------------");
4710  log.writeLog(" 2:1 BALANCE (balancing Marker before Adapt)");
4711  log.writeLog(" ");
4712  log.writeLog(" Iterative procedure ");
4713  log.writeLog(" ");
4714  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4715 
4716 
4717  localDone = octree.localBalance(true);
4718  octree.preBalance21(false);
4719 
4720  while(localDone){
4721  iteration++;
4722  log.writeLog(" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4723  localDone = octree.localBalance(false);
4724  octree.preBalance21(false);
4725  }
4726 
4727  log.writeLog(" Iteration : Finalizing ");
4728  log.writeLog(" ");
4729 // localDone = octree.localBalance(false);
4730 // octree.preBalance21(false);
4731 
4732  log.writeLog(" 2:1 Balancing reached ");
4733  log.writeLog(" ");
4734  log.writeLog("---------------------------------------------");
4735 
4736  }
4737  else{
4738 
4739  localDone = octree.localBalanceAll(true);
4740  octree.preBalance21(false);
4741 
4742  while(localDone){
4743  iteration++;
4744  localDone = octree.localBalanceAll(false);
4745  octree.preBalance21(false);
4746  }
4747 
4748 // localDone = octree.localBalance(false);
4749 // octree.preBalance21(false);
4750 
4751  }
4752 
4753 #endif /* NOMPI */
4754  }
4755 
4756  // =============================================================================== //
4757 
4758 public:
4761  bool adapt() {
4762  bool globalDone = false, localDone = false, cDone = false;
4763  uint32_t nocts = octree.getNumOctants();
4764  vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
4765 
4766  for (iter = octree.octants.begin(); iter != iterend; iter++){
4767  iter->info[8] = false;
4768  iter->info[9] = false;
4769  iter->info[11] = false;
4770  }
4771 #if NOMPI==0
4772  if(serial){
4773 #endif
4774  log.writeLog("---------------------------------------------");
4775  log.writeLog(" ADAPT (Refine/Coarse)");
4776  log.writeLog(" ");
4777 
4778  // 2:1 Balance
4779  balance21(true);
4780 
4781  log.writeLog(" ");
4782  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4783 
4784  // Refine
4785  while(octree.refine());
4786 
4787  if (octree.getNumOctants() > nocts)
4788  localDone = true;
4789  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4790  nocts = octree.getNumOctants();
4791  updateAdapt();
4792 
4793  // Coarse
4794  while(octree.coarse());
4795  updateAfterCoarse();
4796 // balance21(false);
4797 // while(octree.refine());
4798 // updateAdapt();
4799  if (octree.getNumOctants() < nocts){
4800  localDone = true;
4801  }
4802  nocts = octree.getNumOctants();
4803 
4804  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
4805 #if NOMPI==0
4806  MPI_Barrier(comm);
4807  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4808 #endif
4809  log.writeLog(" ");
4810  log.writeLog("---------------------------------------------");
4811 #if NOMPI==0
4812  }
4813  else{
4814  log.writeLog("---------------------------------------------");
4815  log.writeLog(" ADAPT (Refine/Coarse)");
4816  log.writeLog(" ");
4817 
4818  // 2:1 Balance
4819  balance21(true);
4820 
4821  log.writeLog(" ");
4822  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4823 
4824  // Refine
4825  while(octree.refine());
4826  if (octree.getNumOctants() > nocts)
4827  localDone = true;
4828  updateAdapt();
4829  setPboundGhosts();
4830  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4831  nocts = octree.getNumOctants();
4832 
4833  // Coarse
4834  while(octree.coarse());
4835  updateAfterCoarse();
4836  setPboundGhosts();
4837 // balance21(false);
4838 // while(octree.refine());
4839 // updateAdapt();
4840 // setPboundGhosts();
4841  if (octree.getNumOctants() < nocts){
4842  localDone = true;
4843  }
4844  nocts = octree.getNumOctants();
4845 
4846  MPI_Barrier(comm);
4847  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4848  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4849  log.writeLog(" ");
4850  log.writeLog("---------------------------------------------");
4851  }
4852  status += globalDone;
4853  return globalDone;
4854 #else
4855  status += localDone;
4856  return localDone;
4857 #endif
4858  }
4859 
4860  // =============================================================================== //
4861 
4862 private:
4866  bool adapt_mapidx() {
4867  //TODO recoding for adapting with abs(marker) > 1
4868 
4869  bool globalDone = false, localDone = false;
4870  uint32_t nocts = octree.getNumOctants();
4871  vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
4872 
4873  for (iter = octree.octants.begin(); iter != iterend; iter++){
4874  iter->info[8] = false;
4875  iter->info[9] = false;
4876  iter->info[11] = false;
4877  }
4878 
4879  // mapidx init
4880  mapidx.clear();
4881  mapidx.resize(nocts);
4882 #if defined(__INTEL_COMPILER) || defined(__ICC)
4883 #else
4884  mapidx.shrink_to_fit();
4885 #endif
4886  for (uint32_t i=0; i<nocts; i++){
4887  mapidx[i] = i;
4888  }
4889 #if NOMPI==0
4890  if(serial){
4891 #endif
4892  log.writeLog("---------------------------------------------");
4893  log.writeLog(" ADAPT (Refine/Coarse)");
4894  log.writeLog(" ");
4895 
4896  // 2:1 Balance
4897  balance21(true);
4898 
4899  log.writeLog(" ");
4900  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4901 
4902  // Refine
4903  while(octree.refine(mapidx));
4904 
4905  if (octree.getNumOctants() > nocts)
4906  localDone = true;
4907  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4908  nocts = octree.getNumOctants();
4909  updateAdapt();
4910 
4911  // Coarse
4912  while(octree.coarse(mapidx));
4913  updateAfterCoarse(mapidx);
4914 // balance21(false);
4915 // while(octree.refine(mapidx));
4916 // updateAdapt();
4917  if (octree.getNumOctants() < nocts){
4918  localDone = true;
4919  }
4920  nocts = octree.getNumOctants();
4921 
4922  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
4923 #if NOMPI==0
4924  MPI_Barrier(comm);
4925  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4926 #endif
4927  log.writeLog(" ");
4928  log.writeLog("---------------------------------------------");
4929 #if NOMPI==0
4930  }
4931  else{
4932  log.writeLog("---------------------------------------------");
4933  log.writeLog(" ADAPT (Refine/Coarse)");
4934  log.writeLog(" ");
4935 
4936  // 2:1 Balance
4937  balance21(true);
4938 
4939  log.writeLog(" ");
4940  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4941 
4942  // Refine
4943  while(octree.refine(mapidx));
4944  if (octree.getNumOctants() > nocts)
4945  localDone = true;
4946  updateAdapt();
4947  setPboundGhosts();
4948  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4949  nocts = octree.getNumOctants();
4950 
4951 
4952  // Coarse
4953  while(octree.coarse(mapidx));
4954  updateAfterCoarse(mapidx);
4955  setPboundGhosts();
4956 // balance21(false);
4957 // while(octree.refine(mapidx));
4958 // updateAdapt();
4959 // setPboundGhosts();
4960  if (octree.getNumOctants() < nocts){
4961  localDone = true;
4962  }
4963  nocts = octree.getNumOctants();
4964 
4965  MPI_Barrier(comm);
4966  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4967  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4968  log.writeLog(" ");
4969  log.writeLog("---------------------------------------------");
4970  }
4971  return globalDone;
4972 #else
4973  return localDone;
4974 #endif
4975  }
4976 
4977  // =============================================================================== //
4978 public:
4983  bool adapt(bool mapper_flag){
4984 
4985  bool done = false;
4986 
4987  if (mapper_flag){
4988  done = adapt_mapidx();
4989  status += done;
4990  return done;
4991  }
4992  else{
4993  done = adapt();
4994  status += done;
4995  return done;
4996  }
4997 
4998  };
4999 
5000  // =============================================================================== //
5001 // TODO TEMPORARY!!!!
5008  bool adapt(u32vector & mapper){
5009 
5010  bool done = false;
5011  done = adapt_mapidx();
5012  status += done;
5013  mapper.clear();
5014  mapper = mapidx;
5015  return done;
5016 
5017  };
5018 
5019  // =============================================================================== //
5020 
5029  void getMapping(uint32_t & idx, u32vector & mapper, vector<bool> & isghost){
5030 
5031  uint32_t i, nocts = getNumOctants();
5032  uint32_t nghbro = octree.last_ghost_bros.size();;
5033 
5034  mapper.clear();
5035  isghost.clear();
5036 
5037  mapper.push_back(mapidx[idx]);
5038  isghost.push_back(false);
5039  if (getIsNewC(idx)){
5040  if (idx < nocts-1 || !nghbro){
5041  for (i=1; i<global2D.nchildren; i++){
5042  mapper.push_back(mapidx[idx]+i);
5043  isghost.push_back(false);
5044  }
5045  }
5046  else if (idx == nocts-1 && nghbro){
5047  for (i=1; i<global2D.nchildren-nghbro; i++){
5048  mapper.push_back(mapidx[idx]+i);
5049  isghost.push_back(false);
5050  }
5051  for (i=0; i<nghbro; i++){
5052  mapper.push_back(octree.last_ghost_bros[i]);
5053  isghost.push_back(true);
5054  }
5055  }
5056  }
5057 
5058  };
5059 
5060  // =============================================================================== //
5061 
5065  bool globalDone = false, localDone = false, cDone = false;
5066  uint32_t nocts = octree.getNumOctants();
5067  vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5068 
5069  for (iter = octree.octants.begin(); iter != iterend; iter++){
5070  iter->info[8] = false;
5071  iter->info[9] = false;
5072  iter->info[11] = false;
5073  }
5074 #if NOMPI==0
5075  if(serial){
5076 #endif
5077  log.writeLog("---------------------------------------------");
5078  log.writeLog(" ADAPT (GlobalRefine)");
5079  log.writeLog(" ");
5080 
5081  log.writeLog(" ");
5082  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5083 
5084  // Refine
5085  while(octree.globalRefine());
5086 
5087  if (octree.getNumOctants() > nocts)
5088  localDone = true;
5089  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5090  nocts = octree.getNumOctants();
5091  updateAdapt();
5092 
5093 #if NOMPI==0
5094  MPI_Barrier(comm);
5095  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5096 #endif
5097  log.writeLog(" ");
5098  log.writeLog("---------------------------------------------");
5099 #if NOMPI==0
5100  }
5101  else{
5102  log.writeLog("---------------------------------------------");
5103  log.writeLog(" ADAPT (Global Refine)");
5104  log.writeLog(" ");
5105 
5106  log.writeLog(" ");
5107  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5108 
5109  // Refine
5110  while(octree.globalRefine());
5111  if (octree.getNumOctants() > nocts)
5112  localDone = true;
5113  updateAdapt();
5114  setPboundGhosts();
5115  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5116  nocts = octree.getNumOctants();
5117 
5118  MPI_Barrier(comm);
5119  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5120  log.writeLog(" ");
5121  log.writeLog("---------------------------------------------");
5122  }
5123  return globalDone;
5124 #else
5125  return localDone;
5126 #endif
5127  }
5128 
5129  // =============================================================================== //
5130 
5138  bool adaptGlobalRefine(u32vector & mapidx) {
5139  //TODO recoding for adapting with abs(marker) > 1
5140  bool globalDone = false, localDone = false;
5141  uint32_t nocts = octree.getNumOctants();
5142  vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5143 
5144  for (iter = octree.octants.begin(); iter != iterend; iter++){
5145  iter->info[8] = false;
5146  iter->info[9] = false;
5147  iter->info[11] = false;
5148  }
5149 
5150  // mapidx init
5151  mapidx.clear();
5152  mapidx.resize(nocts);
5153 #if defined(__INTEL_COMPILER) || defined(__ICC)
5154 #else
5155  mapidx.shrink_to_fit();
5156 #endif
5157  for (uint32_t i=0; i<nocts; i++){
5158  mapidx[i] = i;
5159  }
5160 #if NOMPI==0
5161  if(serial){
5162 #endif
5163  log.writeLog("---------------------------------------------");
5164  log.writeLog(" ADAPT (Global Refine)");
5165  log.writeLog(" ");
5166 
5167  log.writeLog(" ");
5168  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5169 
5170  // Refine
5171  while(octree.globalRefine(mapidx));
5172 
5173  if (octree.getNumOctants() > nocts)
5174  localDone = true;
5175  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5176  nocts = octree.getNumOctants();
5177  updateAdapt();
5178 
5179 #if NOMPI==0
5180  MPI_Barrier(comm);
5181  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5182 #endif
5183  log.writeLog(" ");
5184  log.writeLog("---------------------------------------------");
5185 #if NOMPI==0
5186  }
5187  else{
5188  log.writeLog("---------------------------------------------");
5189  log.writeLog(" ADAPT (Global Refine)");
5190  log.writeLog(" ");
5191 
5192  log.writeLog(" ");
5193  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5194 
5195  // Refine
5196  while(octree.globalRefine(mapidx));
5197  if (octree.getNumOctants() > nocts)
5198  localDone = true;
5199  updateAdapt();
5200  setPboundGhosts();
5201  log.writeLog(" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5202  nocts = octree.getNumOctants();
5203 
5204  MPI_Barrier(comm);
5205  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5206  log.writeLog(" ");
5207  log.writeLog("---------------------------------------------");
5208  }
5209  return globalDone;
5210 #else
5211  return localDone;
5212 #endif
5213  }
5214 
5215  // =============================================================================== //
5216 
5220  bool globalDone = false, localDone = false, cDone = false;
5221  uint32_t nocts = octree.getNumOctants();
5222  vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5223 
5224  for (iter = octree.octants.begin(); iter != iterend; iter++){
5225  iter->info[8] = false;
5226  iter->info[9] = false;
5227  iter->info[11] = false;
5228  }
5229 #if NOMPI==0
5230  if(serial){
5231 #endif
5232  log.writeLog("---------------------------------------------");
5233  log.writeLog(" ADAPT (Global Coarse)");
5234  log.writeLog(" ");
5235 
5236  // 2:1 Balance
5237  balance21(true);
5238 
5239  log.writeLog(" ");
5240  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5241 
5242  // Coarse
5243  while(octree.globalCoarse());
5244  updateAfterCoarse();
5245  balance21(false);
5246  while(octree.refine());
5247  updateAdapt();
5248  if (octree.getNumOctants() < nocts){
5249  localDone = true;
5250  }
5251  nocts = octree.getNumOctants();
5252 
5253  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5254 #if NOMPI==0
5255  MPI_Barrier(comm);
5256  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5257 #endif
5258  log.writeLog(" ");
5259  log.writeLog("---------------------------------------------");
5260 #if NOMPI==0
5261  }
5262  else{
5263  log.writeLog("---------------------------------------------");
5264  log.writeLog(" ADAPT (Global Coarse)");
5265  log.writeLog(" ");
5266 
5267  // 2:1 Balance
5268  balance21(true);
5269 
5270  log.writeLog(" ");
5271  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5272 
5273  // Coarse
5274  while(octree.globalCoarse());
5275  updateAfterCoarse();
5276  setPboundGhosts();
5277  balance21(false);
5278  while(octree.refine());
5279  updateAdapt();
5280  setPboundGhosts();
5281  if (octree.getNumOctants() < nocts){
5282  localDone = true;
5283  }
5284  nocts = octree.getNumOctants();
5285 
5286  MPI_Barrier(comm);
5287  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5288  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5289  log.writeLog(" ");
5290  log.writeLog("---------------------------------------------");
5291  }
5292  return globalDone;
5293 #else
5294  return localDone;
5295 #endif
5296  }
5297 
5298  // =============================================================================== //
5299 
5307  bool adaptGlobalCoarse(u32vector & mapidx) {
5308  //TODO recoding for adapting with abs(marker) > 1
5309  bool globalDone = false, localDone = false;
5310  uint32_t nocts = octree.getNumOctants();
5311  vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5312 
5313  for (iter = octree.octants.begin(); iter != iterend; iter++){
5314  iter->info[8] = false;
5315  iter->info[9] = false;
5316  iter->info[11] = false;
5317  }
5318 
5319  // mapidx init
5320  mapidx.clear();
5321  mapidx.resize(nocts);
5322 #if defined(__INTEL_COMPILER) || defined(__ICC)
5323 #else
5324  mapidx.shrink_to_fit();
5325 #endif
5326  for (uint32_t i=0; i<nocts; i++){
5327  mapidx[i] = i;
5328  }
5329 #if NOMPI==0
5330  if(serial){
5331 #endif
5332  log.writeLog("---------------------------------------------");
5333  log.writeLog(" ADAPT (Global Coarse)");
5334  log.writeLog(" ");
5335 
5336  // 2:1 Balance
5337  balance21(true);
5338 
5339  log.writeLog(" ");
5340  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5341 
5342  // Coarse
5343  while(octree.globalCoarse(mapidx));
5344  updateAfterCoarse(mapidx);
5345  balance21(false);
5346  while(octree.refine(mapidx));
5347  updateAdapt();
5348  if (octree.getNumOctants() < nocts){
5349  localDone = true;
5350  }
5351  nocts = octree.getNumOctants();
5352 
5353  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5354 #if NOMPI==0
5355  MPI_Barrier(comm);
5356  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5357 #endif
5358  log.writeLog(" ");
5359  log.writeLog("---------------------------------------------");
5360 #if NOMPI==0
5361  }
5362  else{
5363  log.writeLog("---------------------------------------------");
5364  log.writeLog(" ADAPT (Global Coarse)");
5365  log.writeLog(" ");
5366 
5367  // 2:1 Balance
5368  balance21(true);
5369 
5370  log.writeLog(" ");
5371  log.writeLog(" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5372 
5373  // Coarse
5374  while(octree.globalCoarse(mapidx));
5375  updateAfterCoarse(mapidx);
5376  setPboundGhosts();
5377  balance21(false);
5378  while(octree.refine(mapidx));
5379  updateAdapt();
5380  setPboundGhosts();
5381  if (octree.getNumOctants() < nocts){
5382  localDone = true;
5383  }
5384  nocts = octree.getNumOctants();
5385 
5386  MPI_Barrier(comm);
5387  error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5388  log.writeLog(" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5389  log.writeLog(" ");
5390  log.writeLog("---------------------------------------------");
5391  }
5392  return globalDone;
5393 #else
5394  return localDone;
5395 #endif
5396  }
5397 
5398 #if NOMPI==0
5399  // =============================================================================== //
5400 
5403  template<class Impl>
5405  //BUILD SEND BUFFERS
5406  map<int,Class_Comm_Buffer> sendBuffers;
5407  size_t fixedDataSize = userData.fixedSize();
5408  map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
5409  map<int,vector<uint32_t> >::iterator bitbegin = bordersPerProc.begin();
5410  for(map<int,vector<uint32_t> >::iterator bit = bitbegin; bit != bitend; ++bit){
5411  const int & key = bit->first;
5412  const vector<uint32_t> & pborders = bit->second;
5413  size_t buffSize = 0;
5414  size_t nofPbordersPerProc = pborders.size();
5415  if(fixedDataSize != 0){
5416  buffSize = fixedDataSize*nofPbordersPerProc;
5417  }
5418  else{
5419  for(size_t i = 0; i < nofPbordersPerProc; ++i){
5420  buffSize += userData.size(pborders[i]);
5421  }
5422  }
5423  //enlarge buffer to store number of pborders from this proc
5424  buffSize += sizeof(int);
5425  //build buffer for this proc
5426  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
5427  //store number of pborders from this proc at the begining
5428  MPI_Pack(&nofPbordersPerProc,1,MPI_INT,sendBuffers[key].commBuffer,sendBuffers[key].commBufferSize,&sendBuffers[key].pos,comm);
5429 
5430  //WRITE SEND BUFFERS
5431  for(size_t j = 0; j < nofPbordersPerProc; ++j){
5432  userData.gather(sendBuffers[key],pborders[j]);
5433  }
5434  }
5435 
5436  //Communicate Buffers Size
5437  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
5438  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
5439  int nReq = 0;
5440  map<int,int> recvBufferSizePerProc;
5441  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5442  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5443  recvBufferSizePerProc[sit->first] = 0;
5444  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5445  ++nReq;
5446  }
5447  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5448  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5449  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5450  ++nReq;
5451  }
5452  MPI_Waitall(nReq,req,stats);
5453 
5454  //Communicate Buffers
5455  map<int,Class_Comm_Buffer> recvBuffers;
5456  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5457  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5458  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
5459  }
5460  nReq = 0;
5461  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5462  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5463  ++nReq;
5464  }
5465  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5466  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5467  ++nReq;
5468  }
5469  MPI_Waitall(nReq,req,stats);
5470 
5471  //READ RECEIVE BUFFERS
5472  int ghostOffset = 0;
5473  map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
5474  map<int,Class_Comm_Buffer>::iterator rbitbegin = recvBuffers.begin();
5475  for(map<int,Class_Comm_Buffer>::iterator rbit = rbitbegin; rbit != rbitend; ++rbit){
5476  int nofGhostFromThisProc = 0;
5477  MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofGhostFromThisProc,1,MPI_INT,comm);
5478  for(int k = 0; k < nofGhostFromThisProc; ++k){
5479  userData.scatter(rbit->second, k+ghostOffset);
5480  }
5481  ghostOffset += nofGhostFromThisProc;
5482  }
5483  delete [] req; req = NULL;
5484  delete [] stats; stats = NULL;
5485 
5486  };
5487 #endif /* NOMPI */
5488  // =============================================================================== //
5489 
5493  octree.computeConnectivity();
5494  }
5495 
5496  // =================================================================================== //
5497 
5501  octree.clearConnectivity();
5502  }
5503 
5504  // =================================================================================== //
5505 
5509  octree.updateConnectivity();
5510  }
5511 
5512  // =================================================================================== //
5513 
5517  octree.computeghostsConnectivity();
5518  }
5519 
5520  // =================================================================================== //
5521 
5525  octree.clearghostsConnectivity();
5526  }
5527 
5528  // =================================================================================== //
5529 
5533  octree.updateghostsConnectivity();
5534  }
5535 
5536  // =================================================================================== //
5537 
5540  uint32_t getNumNodes() {
5541  return octree.nodes.size();
5542  }
5543 
5544  // =============================================================================== //
5545 
5549  const u32vector2D & getConnectivity(){
5550  return octree.connectivity;
5551  }
5552 
5553  // =============================================================================== //
5554 
5558  const u32vector2D & getGhostConnectivity(){
5559  return octree.ghostsconnectivity;
5560  }
5561 
5562  // =============================================================================== //
5567  u32vector getOctantConnectivity(uint32_t idx){
5568  return octree.connectivity[idx];
5569  }
5570 
5571  // =============================================================================== //
5577  return octree.connectivity[getIdx(oct)];
5578  }
5579 
5580  // =============================================================================== //
5581 
5586  u32vector getGhostOctantConnectivity(uint32_t idx){
5587  return octree.ghostsconnectivity[idx];
5588  }
5589 
5590  // =============================================================================== //
5591 
5597  return octree.ghostsconnectivity[getIdx(oct)];
5598  }
5599 
5600  // =============================================================================== //
5601 
5605  const u32vector2D & getNodes(){
5606  return octree.nodes;
5607  }
5608 
5609  // =============================================================================== //
5610 
5615  u32vector getNodeLogicalCoordinates(uint32_t inode){
5616  return octree.nodes[inode];
5617  }
5618 
5619  // =============================================================================== //
5620 
5624  const u32vector2D & getGhostNodes(){
5625  return octree.ghostsnodes;
5626  }
5627 
5628  // =============================================================================== //
5629 
5634  dvector getNodeCoordinates(uint32_t inode){
5635  vector<double> coords(3,0);
5636  coords[0] = trans.mapX(octree.nodes[inode][0]);
5637  coords[1] = trans.mapY(octree.nodes[inode][1]);
5638  coords[2] = trans.mapZ(octree.nodes[inode][2]);
5639  return coords;
5640  }
5641 
5642  // =============================================================================== //
5643 
5648  u32vector getGhostNodeLogicalCoordinates(uint32_t inode){
5649  return octree.ghostsnodes[inode];
5650  }
5651 
5652  // =============================================================================== //
5653 
5658  dvector getGhostNodeCoordinates(uint32_t inode){
5659  vector<double> coords(3,0);
5660  coords[0] = trans.mapX(octree.ghostsnodes[inode][0]);
5661  coords[1] = trans.mapY(octree.ghostsnodes[inode][1]);
5662  coords[2] = trans.mapZ(octree.ghostsnodes[inode][2]);
5663  return coords;
5664  }
5665 
5666  // =============================================================================== //
5667 
5677  vector<pair<pair<uint32_t, uint32_t>, pair<int, int> > > mapPablos(Class_Para_Tree<2> & ptree){
5678  //TODO DO IT WITH ITERATORS
5679  vector<pair<pair<uint32_t, uint32_t>, pair<int, int> > > mapper;
5680  uint64_t morton2 = 0, morton1 = 0, mortonlastdesc = 0, mortonfirstdesc = 0;
5681  uint32_t idx1 = 0, idx2 = 0;
5682  uint32_t nocts = octree.getNumOctants();
5683  uint32_t nocts2 = ptree.octree.getNumOctants();
5684  int owner;
5685  mapper.resize(nocts);
5686 #if NOMPI==0
5687  if (ptree.serial){
5688 #endif
5689  for (uint32_t i=0; i<nocts; i++){
5690  mapper[i].first.first = idx1;
5691  mapper[i].first.second = idx2;
5692  mapper[i].second.first = rank;
5693  mapper[i].second.second = rank;
5694  mortonfirstdesc = octree.octants[i].computeMorton();
5695  mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5696  while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5697  mapper[i].first.first = idx1;
5698  idx1++;
5699  if (idx1 < nocts2)
5700  morton1 = ptree.getOctant(idx1)->computeMorton();
5701  }
5702  if(idx1 > 0){
5703  idx1--;
5704  morton1 = ptree.getOctant(idx1)->computeMorton();
5705  }
5706  while(morton2 <= mortonlastdesc && idx2 < nocts2){
5707  mapper[i].first.second = idx2;
5708  idx2++;
5709  if (idx2 < nocts2)
5710  morton2 = ptree.getOctant(idx2)->computeMorton();
5711  }
5712  if (idx2 > 0){
5713  idx2--;
5714  morton2 = ptree.getOctant(idx2)->computeMorton();
5715  }
5716  }
5717 #if NOMPI==0
5718  }
5719  else{
5720  map<int,vector<uint64_t> > FirstMortonperproc, SecondMortonperproc;
5721  map<int,vector<uint64_t> > FirstMortonReceived, SecondMortonReceived;
5722  map<int,vector<uint32_t> > FirstIndexperproc, SecondIndexperproc;
5723  map<int,vector<uint32_t> > FirstLocalIndex, SecondLocalIndex;
5724  idx1 = 0;
5725  morton1 = 0;
5726  idx2 = 0;
5727  morton2 = 0;
5728  for (uint32_t i=0; i<nocts; i++){
5729  mortonfirstdesc = octree.octants[i].computeMorton();
5730  owner = ptree.findOwner(mortonfirstdesc);
5731  if (rank == owner){
5732  mapper[i].second.first = rank;
5733  while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5734  mapper[i].first.first = idx1;
5735  idx1++;
5736  if (idx1 < nocts2)
5737  morton1 = ptree.getOctant(idx1)->computeMorton();
5738  }
5739  if(idx1 > 0){
5740  idx1--;
5741  morton1 = ptree.getOctant(idx1)->computeMorton();
5742  }
5743  mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5744  owner = ptree.findOwner(mortonlastdesc);
5745  if (rank == owner){
5746  mapper[i].second.second = rank;
5747  mapper[i].first.second = idx2;
5748  while(morton2 <= mortonlastdesc && idx2 < nocts2){
5749  mapper[i].first.second = idx2;
5750  idx2++;
5751  if (idx2 < nocts2)
5752  morton2 = ptree.getOctant(idx2)->computeMorton();
5753  }
5754  if(idx2 > 0){
5755  idx2--;
5756  morton2 = ptree.getOctant(idx2)->computeMorton();
5757  }
5758  }
5759  else{
5760  mapper[i].second.second = owner;
5761  SecondMortonperproc[owner].push_back(mortonfirstdesc);
5762  SecondLocalIndex[owner].push_back(i);
5763  }
5764  }
5765  else{
5766  mapper[i].second.first = owner;
5767  FirstMortonperproc[owner].push_back(mortonfirstdesc);
5768  FirstLocalIndex[owner].push_back(i);
5769  mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5770  owner = ptree.findOwner(mortonlastdesc);
5771  if (rank == owner){
5772  mapper[i].second.second = rank;
5773  mapper[i].first.second = idx2;
5774  while(morton2 <= mortonlastdesc && idx2 < nocts2){
5775  mapper[i].first.second = idx2;
5776  idx2++;
5777  if (idx2 < nocts2)
5778  morton2 = ptree.getOctant(idx2)->computeMorton();
5779  }
5780  if(idx2 > 0){
5781  idx2--;
5782  morton2 = ptree.getOctant(idx2)->computeMorton();
5783  }
5784  }
5785  else{
5786  mapper[i].second.second = owner;
5787  SecondMortonperproc[owner].push_back(mortonfirstdesc);
5788  SecondLocalIndex[owner].push_back(i);
5789  }
5790  }
5791  }
5792 
5793  MPI_Barrier(comm);
5794 
5795 
5796  for(int iproc=0; iproc<nproc; iproc++){
5797  FirstMortonperproc[iproc].push_back(-1);
5798  SecondMortonperproc[iproc].push_back(-1);
5799  }
5800 
5801  {
5802 
5803  //COMM FIRST MORTON PER PROC
5804  map<int,Class_Comm_Buffer> sendBuffers;
5805  map<int,vector<uint64_t> >::iterator bitend = FirstMortonperproc.end();
5806  for(map<int,vector<uint64_t> >::iterator bit = FirstMortonperproc.begin(); bit != bitend; ++bit){
5807  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint64_t)) / (double)(CHAR_BIT/8));
5808  int key = bit->first;
5809  vector<uint64_t> & value = bit->second;
5810  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
5811  int pos = 0;
5812  int nofMortons = value.size();
5813  for(int i = 0; i < nofMortons; ++i){
5814  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
5815  uint64_t Morton = value[i];
5816  error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
5817  }
5818  }
5819 
5820  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
5821  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
5822  //and stored in the recvBufferSizePerProc structure
5823  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
5824  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
5825  int nReq = 0;
5826  map<int,int> recvBufferSizePerProc;
5827  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5828  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5829  recvBufferSizePerProc[sit->first] = 0;
5830  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5831  ++nReq;
5832  }
5833  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5834  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5835  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5836  ++nReq;
5837  }
5838  MPI_Waitall(nReq,req,stats);
5839 
5840  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
5841  //recvBuffers structure is declared and each buffer is initialized to the right size
5842  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
5843  //at the same time every process compute the size in bytes
5844  uint32_t nofBytesOverProc = 0;
5845  map<int,Class_Comm_Buffer> recvBuffers;
5846  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5847  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5848  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
5849  }
5850  nReq = 0;
5851  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5852  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5853  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5854  ++nReq;
5855  }
5856  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5857  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5858  ++nReq;
5859  }
5860  MPI_Waitall(nReq,req,stats);
5861 
5862  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
5863  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
5864  //every Morton is built and put in the MorontReceived vector
5865  uint32_t Mortoncounter = 0;
5866  uint64_t Morton = 0;
5867  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
5868  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
5869  int pos = 0;
5870  int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint64_t)));
5871  for(int i = 0; i < nofMortonPerProc-1; ++i){
5872  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
5873  FirstMortonReceived[rrit->first].push_back(Morton);
5874  ++Mortoncounter;
5875  }
5876  }
5877 
5878  recvBuffers.clear();
5879  sendBuffers.clear();
5880  recvBufferSizePerProc.clear();
5881  delete [] req; req = NULL;
5882  delete [] stats; stats = NULL;
5883 
5884  }
5885 
5886  {
5887  //COMM SECOND MORTON PER PROC
5888  map<int,Class_Comm_Buffer> sendBuffers;
5889  map<int,vector<uint64_t> >::iterator bitend = SecondMortonperproc.end();
5890  uint32_t pbordersOversize = 0;
5891  for(map<int,vector<uint64_t> >::iterator bit = SecondMortonperproc.begin(); bit != bitend; ++bit){
5892  pbordersOversize += bit->second.size();
5893  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint64_t)) / (double)(CHAR_BIT/8));
5894  int key = bit->first;
5895  vector<uint64_t> & value = bit->second;
5896  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
5897  int pos = 0;
5898  int nofMortons = value.size();
5899  for(int i = 0; i < nofMortons; ++i){
5900  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
5901  uint64_t Morton = value[i];
5902  error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
5903  }
5904  }
5905 
5906  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
5907  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
5908  //and stored in the recvBufferSizePerProc structure
5909  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
5910  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
5911  int nReq = 0;
5912  map<int,int> recvBufferSizePerProc;
5913  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5914  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5915  recvBufferSizePerProc[sit->first] = 0;
5916  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5917  ++nReq;
5918  }
5919  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5920  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5921  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5922  ++nReq;
5923  }
5924  MPI_Waitall(nReq,req,stats);
5925 
5926  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
5927  //recvBuffers structure is declared and each buffer is initialized to the right size
5928  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
5929  //at the same time every process compute the size in bytes
5930  uint32_t nofBytesOverProc = 0;
5931  map<int,Class_Comm_Buffer> recvBuffers;
5932  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5933  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5934  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
5935  }
5936  nReq = 0;
5937  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5938  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5939  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5940  ++nReq;
5941  }
5942  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5943  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5944  ++nReq;
5945  }
5946  MPI_Waitall(nReq,req,stats);
5947 
5948  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
5949  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
5950  //every Morton is built and put in the MorontReceived vector
5951  uint32_t Mortoncounter = 0;
5952  uint64_t Morton = 0;
5953  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
5954  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
5955  int pos = 0;
5956  int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint64_t)));
5957  for(int i = 0; i < nofMortonPerProc-1; ++i){
5958  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
5959  SecondMortonReceived[rrit->first].push_back(Morton);
5960  ++Mortoncounter;
5961  }
5962  }
5963  recvBuffers.clear();
5964  sendBuffers.clear();
5965  recvBufferSizePerProc.clear();
5966  delete [] req; req = NULL;
5967  delete [] stats; stats = NULL;
5968  }
5969 
5970  //FIND FIRST INDEX FOR FIRST MORTONS IN EACH PROCESS
5971  for (int iproc=0; iproc<nproc; iproc++){
5972  vector<Class_Octant<2> >::iterator oend = octree.octants.end();
5973  vector<Class_Octant<2> >::iterator obegin = octree.octants.begin();
5974  vector<Class_Octant<2> >::iterator it = obegin;
5975  int nmortons = FirstMortonReceived[iproc].size();
5976  FirstIndexperproc[iproc].resize(nmortons);
5977  for (int idx=0; idx<nmortons; idx++){
5978  FirstIndexperproc[iproc][idx] = octree.getNumOctants()-1;
5979  uint32_t idx1 = 0;
5980  mortonfirstdesc = FirstMortonReceived[iproc][idx];
5981  morton1 = it->computeMorton();
5982  while(morton1 <= mortonfirstdesc && it != oend){
5983  idx1++;
5984  FirstIndexperproc[iproc][idx] = idx1;
5985  it++;
5986  if (it != oend)
5987  morton1 = it->computeMorton();
5988  }
5989  if(idx1 > 0){
5990  idx1--;
5991  it--;
5992  morton1 = ptree.getOctant(idx1)->computeMorton();
5993  }
5994  }
5995  }
5996 
5997  //FIND SECOND INDEX FOR SECOND MORTONS IN EACH PROCESS
5998  for (int iproc=0; iproc<nproc; iproc++){
5999  vector<Class_Octant<2> >::iterator oend = octree.octants.end();
6000  vector<Class_Octant<2> >::iterator obegin = octree.octants.begin();
6001  vector<Class_Octant<2> >::iterator it = obegin;
6002  int nmortons = SecondMortonReceived[iproc].size();
6003  SecondIndexperproc[iproc].resize(nmortons);
6004  for (int idx=0; idx<nmortons; idx++){
6005  SecondIndexperproc[iproc][idx] = octree.getNumOctants()-1;
6006  uint32_t idx2 = 0;
6007  mortonlastdesc = SecondMortonReceived[iproc][idx];
6008  morton2 = it->computeMorton();
6009  while(morton2 <= mortonlastdesc && it != oend){
6010  SecondIndexperproc[iproc][idx] = idx2;
6011  idx2++;
6012  it++;
6013  if (it != oend)
6014  morton2 = it->computeMorton();
6015  }
6016  if(idx2 > 0){
6017  idx2--;
6018  it--;
6019  morton2 = ptree.getOctant(idx2)->computeMorton();
6020  }
6021  }
6022  }
6023 
6024  for(int iproc=0; iproc<nproc; iproc++){
6025  FirstIndexperproc[iproc].push_back(-1);
6026  SecondIndexperproc[iproc].push_back(-1);
6027  }
6028 
6029 
6030  {
6031  //COMM BACK FIRST INDEX PER PROC
6032  map<int,Class_Comm_Buffer> sendBuffers;
6033  map<int,vector<uint32_t> >::iterator bitend = FirstIndexperproc.end();
6034  for(map<int,vector<uint32_t> >::iterator bit = FirstIndexperproc.begin(); bit != bitend; ++bit){
6035  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6036  int key = bit->first;
6037  vector<uint32_t> & value = bit->second;
6038  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
6039  int pos = 0;
6040  int nofIndices = value.size();
6041  for(int i = 0; i < nofIndices; ++i){
6042  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
6043  uint32_t Index = value[i];
6044  error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6045  }
6046  }
6047 
6048  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
6049  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
6050  //and stored in the recvBufferSizePerProc structure
6051  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
6052  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
6053  int nReq = 0;
6054  map<int,int> recvBufferSizePerProc;
6055  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6056  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6057  recvBufferSizePerProc[sit->first] = 0;
6058  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6059  ++nReq;
6060  }
6061  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6062  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6063  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6064  ++nReq;
6065  }
6066  MPI_Waitall(nReq,req,stats);
6067 
6068  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
6069  //recvBuffers structure is declared and each buffer is initialized to the right size
6070  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
6071  //at the same time every process compute the size in bytes
6072  uint32_t nofBytesOverProc = 0;
6073  map<int,Class_Comm_Buffer> recvBuffers;
6074  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6075  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6076  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
6077  }
6078  nReq = 0;
6079  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6080  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6081  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6082  ++nReq;
6083  }
6084  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6085  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6086  ++nReq;
6087  }
6088  MPI_Waitall(nReq,req,stats);
6089 
6090  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
6091  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
6092  //every Index is built and put in the mapper
6093  uint32_t Indexcounter = 0;
6094  uint32_t Index = 0;
6095  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6096  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6097  int pos = 0;
6098  int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint32_t)));
6099  for(int i = 0; i < nofIndexPerProc-1; ++i){
6100  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6101  mapper[FirstLocalIndex[rrit->first][i]].first.first = Index;
6102  ++Indexcounter;
6103  }
6104 
6105  }
6106  recvBuffers.clear();
6107  sendBuffers.clear();
6108  recvBufferSizePerProc.clear();
6109  delete [] req; req = NULL;
6110  delete [] stats; stats = NULL;
6111  }
6112 
6113  {
6114  //COMM BACK SECOND INDEX PER PROC
6115  map<int,Class_Comm_Buffer> sendBuffers;
6116  map<int,vector<uint32_t> >::iterator bitend = SecondIndexperproc.end();
6117  uint32_t pbordersOversize = 0;
6118  for(map<int,vector<uint32_t> >::iterator bit = SecondIndexperproc.begin(); bit != bitend; ++bit){
6119  pbordersOversize += bit->second.size();
6120  int buffSize = bit->second.size() * (int)ceil((double)(sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6121  int key = bit->first;
6122  vector<uint32_t> & value = bit->second;
6123  sendBuffers[key] = Class_Comm_Buffer(buffSize,'a',comm);
6124  int pos = 0;
6125  int nofIndices = value.size();
6126  for(int i = 0; i < nofIndices; ++i){
6127  //the use of auxiliary variable can be avoided passing to MPI_Pack the members of octant but octant in that case cannot be const
6128  uint64_t Index = value[i];
6129  error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6130  }
6131  }
6132 
6133  //COMMUNICATE THE SIZE OF BUFFER TO THE RECEIVERS
6134  //the size of every borders buffer is communicated to the right process in order to build the receive buffer
6135  //and stored in the recvBufferSizePerProc structure
6136  MPI_Request* req = new MPI_Request[sendBuffers.size()*2];
6137  MPI_Status* stats = new MPI_Status[sendBuffers.size()*2];
6138  int nReq = 0;
6139  map<int,int> recvBufferSizePerProc;
6140  map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6141  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6142  recvBufferSizePerProc[sit->first] = 0;
6143  error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6144  ++nReq;
6145  }
6146  map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6147  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6148  error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6149  ++nReq;
6150  }
6151  MPI_Waitall(nReq,req,stats);
6152 
6153  //COMMUNICATE THE BUFFERS TO THE RECEIVERS
6154  //recvBuffers structure is declared and each buffer is initialized to the right size
6155  //then, sendBuffers are communicated by senders and stored in recvBuffers in the receivers
6156  //at the same time every process compute the size in bytes
6157  uint32_t nofBytesOverProc = 0;
6158  map<int,Class_Comm_Buffer> recvBuffers;
6159  map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6160  for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6161  recvBuffers[rit->first] = Class_Comm_Buffer(rit->second,'a',comm);
6162  }
6163  nReq = 0;
6164  for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6165  nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6166  error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6167  ++nReq;
6168  }
6169  for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6170  error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6171  ++nReq;
6172  }
6173  MPI_Waitall(nReq,req,stats);
6174 
6175  //UNPACK BUFFERS AND BUILD CONTAINER OF RECEIVED MORTON
6176  //every entry in recvBuffers is visited, each buffers from neighbor processes is unpacked.
6177  //every Index is built and put in the mapper
6178  uint32_t Indexcounter = 0;
6179  uint32_t Index = 0;
6180  map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6181  for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6182  int pos = 0;
6183  int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (sizeof(uint32_t)));
6184  for(int i = 0; i < nofIndexPerProc-1; ++i){
6185  error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6186  mapper[SecondLocalIndex[rrit->first][i]].first.second = Index;
6187  ++Indexcounter;
6188  }
6189  }
6190  recvBuffers.clear();
6191  sendBuffers.clear();
6192  recvBufferSizePerProc.clear();
6193  delete [] req; req = NULL;
6194  delete [] stats; stats = NULL;
6195  }
6196  }
6197 #endif /* NOMPI */
6198  //TODO PARALLEL VERSION - (BUGS!!!)
6199  return mapper;
6200  }
6201 
6202  // =============================================================================== //
6203 
6209  void writeLogical(string filename) {
6210 
6211  bool clear = false;
6212  if (octree.connectivity.size() == 0) {
6213  octree.computeConnectivity();
6214  clear = true;
6215  }
6216 
6217  stringstream name;
6218  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6219 
6220  ofstream out(name.str().c_str());
6221  if(!out.is_open()){
6222  stringstream ss;
6223  ss << filename << "*.vtu cannot be opened and it won't be written.";
6224  log.writeLog(ss.str());
6225  return;
6226  }
6227  int nofNodes = octree.nodes.size();
6228  int nofGhostNodes = octree.ghostsnodes.size();
6229  int nofOctants = octree.connectivity.size();
6230  int nofGhosts = octree.ghostsconnectivity.size();
6231  int nofAll = nofGhosts + nofOctants;
6232  out << "<?xml version=\"1.0\"?>" << endl
6233  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6234  << " <UnstructuredGrid>" << endl
6235  << " <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() << "\">" << endl;
6236  out << " <Points>" << endl
6237  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6238  << " " << std::fixed;
6239  for(int i = 0; i < nofNodes; i++)
6240  {
6241  for(int j = 0; j < 3; ++j)
6242  out << std::setprecision(6) << octree.nodes[i][j] << " ";
6243  if((i+1)%4==0 && i!=nofNodes-1)
6244  out << endl << " ";
6245  }
6246  for(int i = 0; i < nofGhostNodes; i++)
6247  {
6248  for(int j = 0; j < 3; ++j)
6249  out << std::setprecision(6) << octree.ghostsnodes[i][j] << " ";
6250  if((i+1)%4==0 && i!=nofNodes-1)
6251  out << endl << " ";
6252  }
6253  out << endl << " </DataArray>" << endl
6254  << " </Points>" << endl
6255  << " <Cells>" << endl
6256  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6257  << " ";
6258  for(int i = 0; i < nofOctants; i++)
6259  {
6260  for(int j = 0; j < global2D.nnodes; j++)
6261  {
6262  int jj;
6263  if (j<2){
6264  jj = j;
6265  }
6266  else if(j==2){
6267  jj = 3;
6268  }
6269  else if(j==3){
6270  jj = 2;
6271  }
6272  out << octree.connectivity[i][jj] << " ";
6273  }
6274  if((i+1)%3==0 && i!=nofOctants-1)
6275  out << endl << " ";
6276  }
6277  for(int i = 0; i < nofGhosts; i++)
6278  {
6279  for(int j = 0; j < global2D.nnodes; j++)
6280  {
6281  int jj;
6282  if (j<2){
6283  jj = j;
6284  }
6285  else if(j==2){
6286  jj = 3;
6287  }
6288  else if(j==3){
6289  jj = 2;
6290  }
6291  out << octree.ghostsconnectivity[i][jj] + nofNodes << " ";
6292  }
6293  if((i+1)%3==0 && i!=nofGhosts-1)
6294  out << endl << " ";
6295  }
6296  out << endl << " </DataArray>" << endl
6297  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6298  << " ";
6299  for(int i = 0; i < nofAll; i++)
6300  {
6301  out << (i+1)*global2D.nnodes << " ";
6302  if((i+1)%12==0 && i!=nofAll-1)
6303  out << endl << " ";
6304  }
6305  out << endl << " </DataArray>" << endl
6306  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6307  << " ";
6308  for(int i = 0; i < nofAll; i++)
6309  {
6310  int type;
6311  type = 9;
6312  out << type << " ";
6313  if((i+1)%12==0 && i!=nofAll-1)
6314  out << endl << " ";
6315  }
6316  out << endl << " </DataArray>" << endl
6317  << " </Cells>" << endl
6318  << " </Piece>" << endl
6319  << " </UnstructuredGrid>" << endl
6320  << "</VTKFile>" << endl;
6321 
6322 
6323  if(rank == 0){
6324  name.str("");
6325  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6326  ofstream pout(name.str().c_str());
6327  if(!pout.is_open()){
6328  stringstream ss;
6329  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6330  log.writeLog(ss.str());
6331  return;
6332  }
6333 
6334  pout << "<?xml version=\"1.0\"?>" << endl
6335  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6336  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6337  << " <PPointData>" << endl
6338  << " </PPointData>" << endl
6339  << " <PCellData Scalars=\"\">" << endl;
6340  pout << " </PCellData>" << endl
6341  << " <PPoints>" << endl
6342  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6343  << " </PPoints>" << endl;
6344  for(int i = 0; i < nproc; i++)
6345  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6346  pout << " </PUnstructuredGrid>" << endl
6347  << "</VTKFile>";
6348 
6349  pout.close();
6350 
6351  }
6352 #if NOMPI==0
6353  MPI_Barrier(comm);
6354 #endif
6355 
6356  if (clear){
6357  octree.clearConnectivity();
6358  }
6359 
6360 
6361  }
6362 
6363  // ----------------------------------------------------------------------------------- //
6364 
6370  void write(string filename) {
6371 
6372  bool clear = false;
6373  if (octree.connectivity.size() == 0) {
6374  octree.computeConnectivity();
6375  clear = true;
6376  }
6377 
6378  stringstream name;
6379  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6380 
6381  ofstream out(name.str().c_str());
6382  if(!out.is_open()){
6383  stringstream ss;
6384  ss << filename << "*.vtu cannot be opened and it won't be written.";
6385  log.writeLog(ss.str());
6386  return;
6387  }
6388  int nofNodes = octree.nodes.size();
6389  int nofGhostNodes = octree.ghostsnodes.size();
6390  int nofOctants = octree.connectivity.size();
6391  int nofGhosts = octree.ghostsconnectivity.size();
6392  int nofAll = nofGhosts + nofOctants;
6393  out << "<?xml version=\"1.0\"?>" << endl
6394  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6395  << " <UnstructuredGrid>" << endl
6396  << " <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() << "\">" << endl;
6397  out << " <Points>" << endl
6398  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6399  << " " << std::fixed;
6400  for(int i = 0; i < nofNodes; i++)
6401  {
6402  for(int j = 0; j < 3; ++j){
6403  if (j==0) out << std::setprecision(6) << trans.mapX(octree.nodes[i][j]) << " ";
6404  if (j==1) out << std::setprecision(6) << trans.mapY(octree.nodes[i][j]) << " ";
6405  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.nodes[i][j]) << " ";
6406  }
6407  if((i+1)%4==0 && i!=nofNodes-1)
6408  out << endl << " ";
6409  }
6410  for(int i = 0; i < nofGhostNodes; i++)
6411  {
6412  for(int j = 0; j < 3; ++j){
6413  if (j==0) out << std::setprecision(6) << trans.mapX(octree.ghostsnodes[i][j]) << " ";
6414  if (j==1) out << std::setprecision(6) << trans.mapY(octree.ghostsnodes[i][j]) << " ";
6415  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.ghostsnodes[i][j]) << " ";
6416  }
6417  if((i+1)%4==0 && i!=nofNodes-1)
6418  out << endl << " ";
6419  }
6420  out << endl << " </DataArray>" << endl
6421  << " </Points>" << endl
6422  << " <Cells>" << endl
6423  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6424  << " ";
6425  for(int i = 0; i < nofOctants; i++)
6426  {
6427  for(int j = 0; j < global2D.nnodes; j++)
6428  {
6429  int jj;
6430  if (j<2){
6431  jj = j;
6432  }
6433  else if(j==2){
6434  jj = 3;
6435  }
6436  else if(j==3){
6437  jj = 2;
6438  }
6439  out << octree.connectivity[i][jj] << " ";
6440  }
6441  if((i+1)%3==0 && i!=nofOctants-1)
6442  out << endl << " ";
6443  }
6444  for(int i = 0; i < nofGhosts; i++)
6445  {
6446  for(int j = 0; j < global2D.nnodes; j++)
6447  {
6448  int jj;
6449  if (j<2){
6450  jj = j;
6451  }
6452  else if(j==2){
6453  jj = 3;
6454  }
6455  else if(j==3){
6456  jj = 2;
6457  }
6458  out << octree.ghostsconnectivity[i][jj] + nofNodes << " ";
6459  }
6460  if((i+1)%3==0 && i!=nofGhosts-1)
6461  out << endl << " ";
6462  }
6463  out << endl << " </DataArray>" << endl
6464  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6465  << " ";
6466  for(int i = 0; i < nofAll; i++)
6467  {
6468  out << (i+1)*global2D.nnodes << " ";
6469  if((i+1)%12==0 && i!=nofAll-1)
6470  out << endl << " ";
6471  }
6472  out << endl << " </DataArray>" << endl
6473  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6474  << " ";
6475  for(int i = 0; i < nofAll; i++)
6476  {
6477  int type;
6478  type = 9;
6479  out << type << " ";
6480  if((i+1)%12==0 && i!=nofAll-1)
6481  out << endl << " ";
6482  }
6483  out << endl << " </DataArray>" << endl
6484  << " </Cells>" << endl
6485  << " </Piece>" << endl
6486  << " </UnstructuredGrid>" << endl
6487  << "</VTKFile>" << endl;
6488 
6489 
6490  if(rank == 0){
6491  name.str("");
6492  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6493  ofstream pout(name.str().c_str());
6494  if(!pout.is_open()){
6495  stringstream ss;
6496  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6497  log.writeLog(ss.str());
6498  return;
6499  }
6500 
6501  pout << "<?xml version=\"1.0\"?>" << endl
6502  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6503  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6504  << " <PPointData>" << endl
6505  << " </PPointData>" << endl
6506  << " <PCellData Scalars=\"\">" << endl;
6507  pout << " </PCellData>" << endl
6508  << " <PPoints>" << endl
6509  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6510  << " </PPoints>" << endl;
6511  for(int i = 0; i < nproc; i++)
6512  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6513  pout << " </PUnstructuredGrid>" << endl
6514  << "</VTKFile>";
6515 
6516  pout.close();
6517 
6518  }
6519 #if NOMPI==0
6520  MPI_Barrier(comm);
6521 #endif
6522 
6523  }
6524 
6525  // =============================================================================== //
6526 
6532  void writeTest(string filename, vector<double> data) {
6533 
6534  bool clear = false;
6535  if (octree.connectivity.size() == 0) {
6536  octree.computeConnectivity();
6537  clear = true;
6538  }
6539 
6540  stringstream name;
6541  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-p" << std::setfill('0') << std::setw(4) << rank << "-" << filename << ".vtu";
6542 
6543  ofstream out(name.str().c_str());
6544  if(!out.is_open()){
6545  stringstream ss;
6546  ss << filename << "*.vtu cannot be opened and it won't be written.";
6547  log.writeLog(ss.str());
6548  return;
6549  }
6550  int nofNodes = octree.nodes.size();
6551  int nofOctants = octree.connectivity.size();
6552  int nofAll = nofOctants;
6553  out << "<?xml version=\"1.0\"?>" << endl
6554  << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6555  << " <UnstructuredGrid>" << endl
6556  << " <Piece NumberOfCells=\"" << octree.connectivity.size() << "\" NumberOfPoints=\"" << octree.nodes.size() << "\">" << endl;
6557  out << " <CellData Scalars=\"Data\">" << endl;
6558  out << " <DataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6559  << " " << std::fixed;
6560  int ndata = octree.connectivity.size();
6561  for(int i = 0; i < ndata; i++)
6562  {
6563  out << std::setprecision(6) << data[i] << " ";
6564  if((i+1)%4==0 && i!=ndata-1)
6565  out << endl << " ";
6566  }
6567  out << endl << " </DataArray>" << endl
6568  << " </CellData>" << endl
6569  << " <Points>" << endl
6570  << " <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<"\" format=\"ascii\">" << endl
6571  << " " << std::fixed;
6572  for(int i = 0; i < nofNodes; i++)
6573  {
6574  for(int j = 0; j < 3; ++j){
6575  if (j==0) out << std::setprecision(6) << trans.mapX(octree.nodes[i][j]) << " ";
6576  if (j==1) out << std::setprecision(6) << trans.mapY(octree.nodes[i][j]) << " ";
6577  if (j==2) out << std::setprecision(6) << trans.mapZ(octree.nodes[i][j]) << " ";
6578  }
6579  if((i+1)%4==0 && i!=nofNodes-1)
6580  out << endl << " ";
6581  }
6582  out << endl << " </DataArray>" << endl
6583  << " </Points>" << endl
6584  << " <Cells>" << endl
6585  << " <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6586  << " ";
6587  for(int i = 0; i < nofOctants; i++)
6588  {
6589  for(int j = 0; j < global2D.nnodes; j++)
6590  {
6591  int jj;
6592  if (j<2){
6593  jj = j;
6594  }
6595  else if(j==2){
6596  jj = 3;
6597  }
6598  else if(j==3){
6599  jj = 2;
6600  }
6601  out << octree.connectivity[i][jj] << " ";
6602  }
6603  if((i+1)%3==0 && i!=nofOctants-1)
6604  out << endl << " ";
6605  }
6606  out << endl << " </DataArray>" << endl
6607  << " <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6608  << " ";
6609  for(int i = 0; i < nofAll; i++)
6610  {
6611  out << (i+1)*global2D.nnodes << " ";
6612  if((i+1)%12==0 && i!=nofAll-1)
6613  out << endl << " ";
6614  }
6615  out << endl << " </DataArray>" << endl
6616  << " <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6617  << " ";
6618  for(int i = 0; i < nofAll; i++)
6619  {
6620  int type;
6621  type = 9;
6622  out << type << " ";
6623  if((i+1)%12==0 && i!=nofAll-1)
6624  out << endl << " ";
6625  }
6626  out << endl << " </DataArray>" << endl
6627  << " </Cells>" << endl
6628  << " </Piece>" << endl
6629  << " </UnstructuredGrid>" << endl
6630  << "</VTKFile>" << endl;
6631 
6632 
6633  if(rank == 0){
6634  name.str("");
6635  name << "s" << std::setfill('0') << std::setw(4) << nproc << "-" << filename << ".pvtu";
6636  ofstream pout(name.str().c_str());
6637  if(!pout.is_open()){
6638  stringstream ss;
6639  ss << filename << "*.pvtu cannot be opened and it won't be written.";
6640  log.writeLog(ss.str());
6641  return;
6642  }
6643 
6644  pout << "<?xml version=\"1.0\"?>" << endl
6645  << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6646  << " <PUnstructuredGrid GhostLevel=\"0\">" << endl
6647  << " <PPointData>" << endl
6648  << " </PPointData>" << endl
6649  << " <PCellData Scalars=\"Data\">" << endl
6650  << " <PDataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\"/>" << endl
6651  << " </PCellData>" << endl
6652  << " <PPoints>" << endl
6653  << " <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6654  << " </PPoints>" << endl;
6655  for(int i = 0; i < nproc; i++)
6656  pout << " <Piece Source=\"s" << std::setw(4) << std::setfill('0') << nproc << "-p" << std::setw(4) << std::setfill('0') << i << "-" << filename << ".vtu\"/>" << endl;
6657  pout << " </PUnstructuredGrid>" << endl
6658  << "</VTKFile>";
6659 
6660  pout.close();
6661 
6662  }
6663 #if NOMPI==0
6664  MPI_Barrier(comm);
6665 #endif
6666 
6667  }
6668 
6669  // =============================================================================== //
6670 
6671 };
6672 
6673 
6674 
void setBalance(bool balance)
uint32_t getX() const
uint8_t getLevel(Class_Octant< 2 > *oct)
double getZ(Class_Octant< 2 > *oct)
double getArea(uint32_t idx)
void getCenter(uint32_t idx, vector< double > &center)
Class_Local_Tree< 2 > octree
Global variables used in PABLO - 2D specialization.
double getSize(uint32_t idx)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, uint8_t &level)
Bundle char container for communications.
bool getPbound(Class_Intersection< 2 > *inter)
Class_Octant< 2 > * getLogicalPointOwner(dvector &point)
bool getPbound(Class_Octant< 2 > *oct, uint8_t iface)
void setMarker(uint32_t idx, int8_t marker)
int8_t getMarker() const
Parallel Octree Manager Class.
dvector2D getNodes(Class_Intersection< 2 > *inter)
Class_Octant< 2 > * getOctant(uint32_t idx)
void writeTest(string filename, vector< double > data)
void findNeighbours(Class_Octant< 2 > *oct, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
u32vector getGhostNodeLogicalCoordinates(uint32_t inode)
bool getBalance(Class_Octant< 2 > *oct)
void assign(uint32_t stride, uint32_t length)
const Class_Global< 2 > & getGlobal()
Base class for data communications.
Parallel Octree Manager Class - 2D specialization.
Octant class definition - 2D specialization.
uint64_t computeMorton() const
void getFaceCenter(uint32_t idx, uint8_t iface, vector< double > &center)
void findNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
double getX(Class_Octant< 2 > *oct)
void setMarker(Class_Octant< 2 > *oct, int8_t marker)
u32vector getNodeLogicalCoordinates(uint32_t inode)
bool getIsNewR(uint32_t idx)
bool getBound(Class_Octant< 2 > *oct)
bool adapt(bool mapper_flag)
int8_t getMarker(Class_Octant< 2 > *oct)
bool getIsNewC(Class_Octant< 2 > *oct)
u32vector getOwners(Class_Intersection< 2 > *inter)
uint32_t getSize() const
uint8_t getBalanceCodimension() const
uint32_t getArea() const
dvector getFaceCenter(uint8_t iface)
void findGhostNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours)
bool getPbound(Class_Octant< 2 > *oct)
void setMarker(int8_t marker)
void resizeGhost(uint32_t newSize)
dvector getGhostNodeCoordinates(uint32_t inode)
double X0
Definition: Class_Map.hpp:47
Local octree portion for each process - 2D specialization.
bool getIsGhost(uint32_t idx)
const u32vector2D & getNodes()
double mapSize(uint32_t const &size)
Definition: Class_Map.tpp:63
uint32_t getPointOwnerIdx(u32vector &point)
bool getIsNewC(uint32_t idx)
dvector getNodeCoordinates(uint32_t inode)
void getNodes(u32vector2D &nodes)
uint64_t getVolume() const
void scatter(Buffer &buff, const uint32_t e)
double mapArea(uint64_t const &area)
Definition: Class_Map.tpp:68
bool getBound(uint8_t face) const
void getNormal(Class_Octant< 2 > *oct, uint8_t &iface, dvector &normal)
bool getIsNewC() const
void getNodes(Class_Octant< 2 > *oct, dvector2D &nodes)
uint8_t getLocalMaxDepth() const
void getNormal(uint8_t &iface, vector< int8_t > &normal)
vector< double > getCenter(Class_Intersection< 2 > *inter)
void writeLogical(string filename)
bool getPbound(uint8_t face) const
uint64_t getGlobalIdx(Class_Octant< 2 > *oct)
void getFaceCenter(Class_Octant< 2 > *oct, uint8_t iface, vector< double > &center)
uint32_t getNumOctants() const
bool getBound(Class_Octant< 2 > *oct, uint8_t iface)
uint32_t getLogicalPointOwnerIdx(dvector &point)
dvector getNormal(uint32_t idx, uint8_t &iface)
double getSize(Class_Octant< 2 > *oct)
vector< double > getFaceCenter(Class_Octant< 2 > *oct, uint8_t iface)
void getNormal(uint32_t idx, uint8_t &iface, dvector &normal)
uint32_t getY() const
vector< double > getCenter(Class_Octant< 2 > *oct)
Base class for data communications.
uint8_t getLevel() const
Class_Para_Tree(double X, double Y, double Z, double L, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
Class_Octant< 2 > * getPointOwner(dvector &point)
uint8_t getFace(Class_Intersection< 2 > *inter)
void mapNodesIntersection(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
Definition: Class_Map.tpp:197
Customized array definition.
Definition: Class_Array.hpp:37
vector< double > getFaceCenter(uint32_t idx, uint8_t iface)
bool getBound(Class_Intersection< 2 > *inter)
Class_Para_Tree(string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
size_t size(const uint32_t e) const
void setBalance(uint32_t idx, bool balance)
void setBalanceCodimension(uint8_t b21codim)
bool getBalance(uint32_t idx)
Class_Para_Tree(double X, double Y, double Z, double L, ivector2D &XY, ivector &levels, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
void scatter(Buffer &buff, const uint32_t e)
void mapCenter(double *&center, vector< double > &mapcenter)
Definition: Class_Map.tpp:78
void mapNodes(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
Definition: Class_Map.tpp:122
uint8_t nodeface[4][2]
dvector2D getNodes(uint32_t idx)
uint32_t getNumGhosts() const
uint8_t getLevel(Class_Intersection< 2 > *inter)
vector< double > getCenter(uint32_t idx)
void getMapping(uint32_t &idx, u32vector &mapper, vector< bool > &isghost)
void move(const uint32_t from, const uint32_t to)
uint32_t getIdx(Class_Octant< 2 > *oct)
dvector getNormal(Class_Intersection< 2 > *inter)
u32vector getOctantConnectivity(uint32_t idx)
Intersection class definition - 2D specialization.
void mapNode(vector< uint32_t > &node, vector< double > &mapnode)
Definition: Class_Map.tpp:179
u32vector getGhostOctantConnectivity(uint32_t idx)
double getVolume(uint32_t idx)
Class_Intersection< 2 > * getIntersection(uint32_t idx)
uint64_t * partition_range_globalidx
vector< pair< pair< uint32_t, uint32_t >, pair< int, int > > > mapPablos(Class_Para_Tree< 2 > &ptree)
u32vector getOctantConnectivity(Class_Octant< 2 > *oct)
uint32_t getZ() const
void gather(Buffer &buff, const uint32_t e)
void communicate(Class_Data_Comm_Interface< Impl > &userData)
int8_t getMarker(uint32_t idx)
double getY(Class_Octant< 2 > *oct)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, dvector *weight=NULL)
uint32_t getPointOwnerIdx(dvector &point)
bool getIsNewR(Class_Octant< 2 > *oct)
const u32vector2D & getGhostConnectivity()
double getArea(Class_Octant< 2 > *oct)
void resize(uint32_t newSize)
Local octree portion for each process.
void write(string filename)
void gather(Buffer &buff, const uint32_t e)
Class_Octant< 2 > * getGhostOctant(uint32_t idx)
dvector2D getNodes(Class_Octant< 2 > *oct)
const u32vector2D & getGhostNodes()
bool adapt(u32vector &mapper)
double Y0
Definition: Class_Map.hpp:48
void loadBalance(uint8_t &level)
bool getFiner(Class_Intersection< 2 > *inter)
bool getIsNewR() const
double getY(uint32_t idx)
double getArea(Class_Intersection< 2 > *inter)
double getZ(uint32_t idx)
uint64_t getGhostGlobalIdx(uint32_t idx)
uint64_t getGlobalIdx(uint32_t idx)
const u32vector2D & getConnectivity()
double getSize(Class_Intersection< 2 > *inter)
bool adaptGlobalRefine(u32vector &mapidx)
map< int, vector< uint32_t > > bordersPerProc
size_t size(const uint32_t e) const
bool getIsGhost(Class_Octant< 2 > *oct)
vector< double > getNode(uint32_t idx, uint8_t inode)
bool getNotBalance() const
void setBalance(Class_Octant< 2 > *oct, bool balance)
void getCenter(Class_Octant< 2 > *oct, vector< double > &center)
uint8_t facenode[4][2]
Class_Octant< 2 > * getPointOwner(u32vector &point)
void getNode(uint32_t idx, uint8_t inode, vector< double > &node)
void mapNormals(vector< int8_t > normal, vector< double > &mapnormal)
Definition: Class_Map.tpp:253
double getX(uint32_t idx)
dvector getNormal(Class_Octant< 2 > *oct, uint8_t &iface)
u32vector getGhostOctantConnectivity(Class_Octant< 2 > *oct)
int findOwner(const uint64_t &morton)
double mapX(uint32_t const &X)
Definition: Class_Map.tpp:33
uint8_t getLevel(uint32_t idx)
void getNodes(uint32_t idx, dvector2D &nodes)
double mapZ(uint32_t const &Z)
Definition: Class_Map.tpp:43
double getVolume(Class_Octant< 2 > *oct)
double mapY(uint32_t const &Y)
Definition: Class_Map.tpp:38
bool getIsGhost(Class_Intersection< 2 > *inter)
bool adaptGlobalCoarse(u32vector &mapidx)