32 typedef vector<Class_Octant<3> > OctantsType;
33 typedef vector<uint32_t> u32vector;
34 typedef vector<double> dvector;
35 typedef vector<vector<uint32_t> > u32vector2D;
36 typedef vector<vector<uint64_t> > u64vector2D;
37 typedef vector<double> dvector1D;
38 typedef vector<vector<double> > dvector2D;
39 typedef vector<int> ivector;
40 typedef vector<vector<int> > ivector2D;
92 Class_Para_Tree(
string logfile=
"PABLO.log",MPI_Comm comm_ = MPI_COMM_WORLD) : log(logfile,comm_),comm(comm_){
99 global_num_octants = octree.getNumOctants();
101 error_flag = MPI_Comm_size(comm,&nproc);
102 error_flag = MPI_Comm_rank(comm,&rank);
107 partition_first_desc =
new uint64_t[nproc];
108 partition_last_desc =
new uint64_t[nproc];
109 partition_range_globalidx =
new uint64_t[nproc];
110 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
111 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
112 for(
int p = 0; p < nproc; ++p){
113 partition_range_globalidx[p] = 0;
114 partition_last_desc[p] = lastDescMorton;
115 partition_last_desc[p] = firstDescMorton;
118 log.writeLog(
"---------------------------------------------");
119 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
120 log.writeLog(
"---------------------------------------------");
122 log.writeLog(
"---------------------------------------------");
123 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
124 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(3)));
125 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
126 log.writeLog(
"---------------------------------------------");
144 Class_Para_Tree(
double X,
double Y,
double Z,
double L,
string logfile=
"PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
146 Class_Para_Tree(
double X,
double Y,
double Z,
double L,
string logfile=
"PABLO.log"):trans(X,Y,Z,L),log(logfile){
151 global_num_octants = octree.getNumOctants();
153 error_flag = MPI_Comm_size(comm,&nproc);
154 error_flag = MPI_Comm_rank(comm,&rank);
159 partition_first_desc =
new uint64_t[nproc];
160 partition_last_desc =
new uint64_t[nproc];
161 partition_range_globalidx =
new uint64_t[nproc];
162 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
163 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
164 for(
int p = 0; p < nproc; ++p){
165 partition_range_globalidx[p] = 0;
166 partition_last_desc[p] = lastDescMorton;
167 partition_last_desc[p] = firstDescMorton;
170 log.writeLog(
"---------------------------------------------");
171 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
172 log.writeLog(
"---------------------------------------------");
174 log.writeLog(
"---------------------------------------------");
175 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
176 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(3)));
177 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
178 log.writeLog(
" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
179 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Y)));
180 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Z)));
181 log.writeLog(
" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
182 log.writeLog(
"---------------------------------------------");
202 Class_Para_Tree(
double & X,
double & Y,
double & Z,
double & L, ivector2D & XYZ, ivector & levels,
string logfile=
"PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
204 Class_Para_Tree(
double & X,
double & Y,
double & Z,
double & L, ivector2D & XYZ, ivector & levels,
string logfile=
"PABLO.log"):trans(X,Y,Z,L),log(logfile){
208 uint32_t NumOctants = XYZ.size();
209 octree.octants.resize(NumOctants);
210 for (uint32_t i=0; i<NumOctants; i++){
211 lev = uint8_t(levels[i]);
212 x0 = uint32_t(XYZ[i][0]);
213 y0 = uint32_t(XYZ[i][1]);
214 z0 = uint32_t(XYZ[i][2]);
241 octree.octants[i] = oct;
246 error_flag = MPI_Comm_size(comm,&nproc);
247 error_flag = MPI_Comm_rank(comm,&rank);
249 if (nproc > 1 ) serial =
false;
255 partition_first_desc =
new uint64_t[nproc];
256 partition_last_desc =
new uint64_t[nproc];
257 partition_range_globalidx =
new uint64_t[nproc];
261 octree.updateLocalMaxDepth();
267 log.writeLog(
"---------------------------------------------");
268 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
269 log.writeLog(
"---------------------------------------------");
271 log.writeLog(
"---------------------------------------------");
272 log.writeLog(
"- PABLO restart -");
273 log.writeLog(
"---------------------------------------------");
274 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
275 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(3)));
276 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
277 log.writeLog(
" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
278 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Y)));
279 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Z)));
280 log.writeLog(
" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
281 log.writeLog(
" Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
282 log.writeLog(
"---------------------------------------------");
302 Class_Para_Tree(
double X,
double Y,
double Z,
double L, ivector2D & XYZ, ivector & levels,
string logfile=
"PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
304 Class_Para_Tree(
double X,
double Y,
double Z,
double L, ivector2D & XYZ, ivector & levels,
string logfile=
"PABLO.log"):trans(X,Y,Z,L),log(logfile){
308 uint32_t NumOctants = XYZ.size();
309 octree.octants.resize(NumOctants);
310 for (uint32_t i=0; i<NumOctants; i++){
311 lev = uint8_t(levels[i]);
312 x0 = uint32_t(XYZ[i][0]);
313 y0 = uint32_t(XYZ[i][1]);
314 z0 = uint32_t(XYZ[i][2]);
341 octree.octants[i] = oct;
346 error_flag = MPI_Comm_size(comm,&nproc);
347 error_flag = MPI_Comm_rank(comm,&rank);
349 if (nproc > 1 ) serial =
false;
355 partition_first_desc =
new uint64_t[nproc];
356 partition_last_desc =
new uint64_t[nproc];
357 partition_range_globalidx =
new uint64_t[nproc];
361 octree.updateLocalMaxDepth();
367 log.writeLog(
"---------------------------------------------");
368 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
369 log.writeLog(
"---------------------------------------------");
371 log.writeLog(
"---------------------------------------------");
372 log.writeLog(
"- PABLO restart -");
373 log.writeLog(
"---------------------------------------------");
374 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
375 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(3)));
376 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_3D)));
377 log.writeLog(
" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
378 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Y)));
379 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Z)));
380 log.writeLog(
" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
381 log.writeLog(
" Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
382 log.writeLog(
"---------------------------------------------");
392 log.writeLog(
"---------------------------------------------");
393 log.writeLog(
"--------------- R.I.P. PABLO ----------------");
394 log.writeLog(
"---------------------------------------------");
395 log.writeLog(
"---------------------------------------------");
474 vector<double> center;
486 vector<double> center;
508 vector<double> center;
529 vector<double>
getNode(uint32_t idx, uint8_t inode) {
531 u32vector node_ = octree.octants[idx].getNode(inode);
541 void getNode(uint32_t idx, uint8_t inode, vector<double>& node) {
542 u32vector node_ = octree.octants[idx].getNode(inode);
577 vector<int8_t> normal_;
591 vector<int8_t> normal_;
637 for(
int i = 0; i < global3D.
nfaces; ++i)
648 for(
int i = 0; i < global3D.
nfaces; ++i)
695 if (getIsGhost(oct)){
697 return octree.globalidx_ghosts[idx];
702 return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
704 return uint64_t(idx);
713 if (getIsGhost(oct)){
793 vector<double>& center) {
803 vector<double> center;
840 vector<int8_t> normal_;
853 vector<int8_t> normal_;
899 if (getIsGhost(oct)){
901 return octree.globalidx_ghosts[idx];
907 return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
910 return uint64_t(idx);
915 return global_num_octants;
924 if (getIsGhost(oct)){
933 return octree.getNumOctants();
962 return trans.
mapX(octree.octants[idx].getX());
970 return trans.
mapY(octree.octants[idx].getY());
978 return trans.
mapZ(octree.octants[idx].getZ());
986 return trans.
mapSize(octree.octants[idx].getSize());
994 return trans.
mapArea(octree.octants[idx].getArea());
1002 return trans.
mapVolume(octree.octants[idx].getVolume());
1010 vector<double>& center) {
1011 dvector center_ = octree.octants[idx].getCenter();
1020 vector<double> center;
1021 dvector center_ = octree.octants[idx].getCenter();
1032 vector<double> center;
1033 vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
1044 vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
1054 vector<double> center;
1055 vector<double> center_ = octree.octants[idx].getEdgeCenter(iedge);
1066 vector<double> center_ = octree.octants[idx].getEdgeCenter(iedge);
1075 dvector2D & nodes) {
1077 octree.octants[idx].getNodes(nodes_);
1088 octree.octants[idx].getNodes(nodes_);
1101 vector<int8_t> normal_;
1102 octree.octants[idx].getNormal(iface, normal_);
1114 vector<int8_t> normal_;
1115 octree.octants[idx].getNormal(iface, normal_);
1125 return octree.getMarker(idx);
1133 return octree.getLevel(idx);
1141 return !octree.getBalance(idx);
1150 return (findOwner(octree.octants[idx].computeMorton()) != rank);
1159 return octree.octants[idx].getIsNewR();
1167 return octree.octants[idx].getIsNewC();
1176 return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
1179 return uint64_t(idx);
1181 return global_num_octants;
1189 if (idx<octree.size_ghosts){
1190 return octree.globalidx_ghosts[idx];
1192 return uint64_t(octree.size_ghosts);
1200 octree.setMarker(idx, marker);
1208 octree.setBalance(idx, !balance);
1225 return octree.getNumOctants();
1232 return octree.getSizeGhost();
1239 return octree.getLocalMaxDepth();
1246 return octree.getBalanceCodim();
1255 octree.setBalanceCodim(b21codim);
1261 return octree.getFirstDesc();
1265 return octree.getLastDesc();
1268 uint64_t getLastDescMorton(uint32_t idx) {
1269 return octree.octants[idx].buildLastDesc().computeMorton();
1274 void setFirstDesc(){
1275 octree.setFirstDesc();
1279 octree.setLastDesc();
1285 return octree.extractOctant(idx) ;
1295 if (idx < octree.getNumOctants()){
1296 return &octree.octants[idx] ;
1306 if (idx < octree.getSizeGhost()){
1307 return &octree.ghosts[idx] ;
1328 if(morton <= partition_last_desc[seed]){
1330 if(morton > partition_last_desc[seed-1])
1335 if(morton <= partition_last_desc[seed+1])
1339 seed = beg + length/2;
1357 u32vector & neighbours,
1358 vector<bool> & isghost){
1361 octree.findNeighbours(idx, iface, neighbours, isghost);
1363 else if (codim == 2){
1364 octree.findEdgeNeighbours(idx, iface, neighbours, isghost);
1366 else if (codim == 3){
1367 octree.findNodeNeighbours(idx, iface, neighbours, isghost);
1387 u32vector & neighbours,
1388 vector<bool> & isghost){
1391 octree.findNeighbours(oct, iface, neighbours, isghost);
1393 else if (codim == 2){
1394 octree.findEdgeNeighbours(oct, iface, neighbours, isghost);
1396 else if (codim == 3){
1397 octree.findNodeNeighbours(oct, iface, neighbours, isghost);
1416 u32vector & neighbours){
1419 octree.findGhostNeighbours(idx, iface, neighbours);
1421 else if (codim == 2){
1422 octree.findGhostEdgeNeighbours(idx, iface, neighbours);
1424 else if (codim == 3){
1425 octree.findGhostNodeNeighbours(idx, iface, neighbours);
1445 u32vector & neighbours,
1446 vector<bool> & isghost){
1449 octree.findNeighbours(&oct, iface, neighbours, isghost);
1451 else if (codim == 2){
1452 octree.findEdgeNeighbours(&oct, iface, neighbours, isghost);
1454 else if (codim == 3){
1455 octree.findNodeNeighbours(&oct, iface, neighbours, isghost);
1471 return octree.intersections.size();
1479 if (idx < octree.intersections.size()){
1480 return &octree.intersections[idx];
1490 if(inter->finer && inter->isghost)
1491 return octree.extractGhostOctant(inter->owners[inter->finer]).
getLevel();
1493 return octree.extractOctant(inter->owners[inter->finer]).
getLevel();
1501 return inter->finer;
1509 return inter->getBound();
1517 return inter->getIsGhost();
1526 return inter->getPbound();
1533 return inter->iface;
1541 u32vector owners(2);
1542 owners[0] = inter->owners[0];
1543 owners[1] = inter->owners[1];
1553 if(inter->finer && inter->isghost)
1554 Size = octree.extractGhostOctant(inter->owners[inter->finer]).
getSize();
1556 Size = octree.extractOctant(inter->owners[inter->finer]).
getSize();
1566 if(inter->finer && inter->isghost)
1567 Area = octree.extractGhostOctant(inter->owners[1]).
getArea();
1569 Area = octree.extractOctant(inter->owners[inter->finer]).
getArea();
1578 vector<double> center;
1580 if(inter->finer && inter->isghost)
1581 oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1583 oct = octree.extractOctant(inter->owners[inter->finer]);
1585 int sign = ( int(2*((inter->iface)%2)) - 1);
1586 double deplace = double (sign *
int(oct.
getSize())) / 2;
1587 center_[inter->iface/2] = uint32_t(
int(center_[inter->iface/2]) + deplace);
1599 if(inter->finer && inter->isghost)
1600 oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1602 oct = octree.extractOctant(inter->owners[inter->finer]);
1603 uint8_t iface = inter->iface;
1604 u32vector2D nodes_all;
1608 for (
int j=0; j<3; j++){
1609 nodes_[i][j] = nodes_all[global3D.
facenode[iface][i]][j];
1623 if(inter->finer && inter->isghost)
1624 oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1626 oct = octree.extractOctant(inter->owners[inter->finer]);
1627 uint8_t iface = inter->iface;
1628 vector<int8_t> normal_;
1640 if(inter.finer && inter.isghost)
1641 Size = octree.extractGhostOctant(inter.owners[inter.finer]).
getSize();
1643 Size = octree.extractOctant(inter.owners[inter.finer]).
getSize();
1649 if(inter.finer && inter.isghost)
1650 Area = octree.extractGhostOctant(inter.owners[inter.finer]).
getArea();
1652 Area = octree.extractOctant(inter.owners[inter.finer]).
getArea();
1657 vector<double> center){
1659 if(inter.finer && inter.isghost)
1660 oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1662 oct = octree.extractOctant(inter.owners[inter.finer]);
1664 int sign = ( int(2*((inter.iface)%2)) - 1);
1665 double deplace = double (sign *
int(oct.
getSize())) / 2;
1666 center_[inter.iface/2] = uint32_t(
int(center_[inter.iface/2]) + deplace);
1671 dvector2D & nodes) {
1673 if(inter.finer && inter.isghost)
1674 oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1676 oct = octree.extractOctant(inter.owners[inter.finer]);
1677 uint8_t iface = inter.iface;
1678 u32vector2D nodes_all;
1682 for (
int j=0; j<3; j++){
1683 nodes_[i][j] = nodes_all[global3D.
facenode[iface][i]][j];
1692 if(inter.finer && inter.isghost)
1693 oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1695 oct = octree.extractOctant(inter.owners[inter.finer]);
1696 uint8_t iface = inter.iface;
1697 vector<int8_t> normal_;
1708 octree.computeIntersections();
1718 uint32_t noctants = octree.octants.size();
1719 uint32_t idxtry = noctants/2;
1721 uint64_t morton, mortontry;
1724 x = trans.
mapX(point[0]);
1725 y = trans.
mapX(point[1]);
1726 z = trans.
mapX(point[2]);
1733 morton = mortonEncode_magicbits(x,y,z);
1736 if (!serial) powner = findOwner(morton);
1740 if ((powner!=rank) && (!serial))
1744 int32_t jump = idxtry;
1745 while(abs(jump) > 0){
1746 mortontry = octree.octants[idxtry].computeMorton();
1747 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1749 if (idxtry > noctants-1){
1751 idxtry = noctants - 1;
1760 if(octree.octants[idxtry].computeMorton() == morton){
1761 return &octree.octants[idxtry];
1766 while(octree.octants[idxtry].computeMorton() < morton){
1768 if(idxtry > noctants-1){
1769 idxtry = noctants-1;
1773 while(octree.octants[idxtry].computeMorton() > morton){
1775 if(idxtry > noctants-1){
1781 return &octree.octants[idxtry];
1793 uint32_t noctants = octree.octants.size();
1794 uint32_t idxtry = noctants/2;
1796 uint64_t morton, mortontry;
1799 x = trans.
mapX(point[0]);
1800 y = trans.
mapY(point[1]);
1801 z = trans.
mapZ(point[2]);
1804 || (point[0] < trans.
X0) || (point[1] < trans.
Y0) || (point[2] < trans.
Z0)){
1811 morton = mortonEncode_magicbits(x,y,z);
1815 if(!serial) powner = findOwner(morton);
1819 if ((powner!=rank) && (!serial))
1823 int32_t jump = idxtry;
1824 while(abs(jump) > 0){
1826 mortontry = octree.octants[idxtry].computeMorton();
1827 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1829 if (idxtry > noctants-1){
1831 idxtry = noctants - 1;
1840 if(octree.octants[idxtry].computeMorton() == morton){
1846 while(octree.octants[idxtry].computeMorton() < morton){
1848 if(idxtry > noctants-1){
1849 idxtry = noctants-1;
1853 while(octree.octants[idxtry].computeMorton() > morton){
1855 if(idxtry > noctants-1){
1872 uint32_t noctants = octree.octants.size();
1873 uint32_t idxtry = noctants/2;
1875 uint64_t morton, mortontry;
1882 || (point[0] < trans.
X0) || (point[1] < trans.
Y0) || (point[2] < trans.
Z0))
1888 morton = mortonEncode_magicbits(x,y,z);
1891 if(!serial) powner = findOwner(morton);
1895 if ((powner!=rank) && (!serial))
1899 int32_t jump = idxtry;
1900 while(abs(jump) > 0){
1901 mortontry = octree.octants[idxtry].computeMorton();
1902 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1904 if (idxtry > noctants-1){
1906 idxtry = noctants - 1;
1915 if(octree.octants[idxtry].computeMorton() == morton){
1916 return &octree.octants[idxtry];
1921 while(octree.octants[idxtry].computeMorton() < morton){
1923 if(idxtry > noctants-1){
1924 idxtry = noctants-1;
1928 while(octree.octants[idxtry].computeMorton() > morton){
1930 if(idxtry > noctants-1){
1936 return &octree.octants[idxtry];
1948 uint32_t noctants = octree.octants.size();
1949 uint32_t idxtry = noctants/2;
1951 uint64_t morton, mortontry;
1964 morton = mortonEncode_magicbits(x,y,z);
1967 if (!serial) powner = findOwner(morton);
1971 if ((powner!=rank) && (!serial))
1974 int32_t jump = idxtry;
1975 while(abs(jump) > 0){
1976 mortontry = octree.octants[idxtry].computeMorton();
1977 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1979 if (idxtry > noctants-1){
1981 idxtry = noctants - 1;
1990 if(octree.octants[idxtry].computeMorton() == morton){
1996 while(octree.octants[idxtry].computeMorton() < morton){
1998 if(idxtry > noctants-1){
1999 idxtry = noctants-1;
2003 while(octree.octants[idxtry].computeMorton() > morton){
2005 if(idxtry > noctants-1){
2023 uint32_t noctants = octree.octants.size();
2024 uint32_t idxtry = noctants/2;
2026 uint64_t morton, mortontry;
2029 x = uint32_t(point[0]);
2030 y = uint32_t(point[1]);
2031 z = uint32_t(point[2]);
2032 if ((point[0] < 0) || (point[0] >
double(global3D.
max_length)) || (point[1] < 0) || (point[1] >
double(global3D.
max_length)) || (point[2] < 0) || (point[2] >
double(global3D.
max_length)))
2038 morton = mortonEncode_magicbits(x,y,z);
2041 if (!serial) powner = findOwner(morton);
2045 if ((powner!=rank) && (!serial))
2049 int32_t jump = idxtry;
2050 while(abs(jump) > 0){
2051 mortontry = octree.octants[idxtry].computeMorton();
2052 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2054 if (idxtry > noctants-1){
2056 idxtry = noctants - 1;
2065 if(octree.octants[idxtry].computeMorton() == morton){
2066 return &octree.octants[idxtry];
2071 while(octree.octants[idxtry].computeMorton() < morton){
2073 if(idxtry > noctants-1){
2074 idxtry = noctants-1;
2078 while(octree.octants[idxtry].computeMorton() > morton){
2080 if(idxtry > noctants-1){
2086 return &octree.octants[idxtry];
2098 uint32_t noctants = octree.octants.size();
2099 uint32_t idxtry = noctants/2;
2101 uint64_t morton, mortontry;
2104 x = uint32_t(point[0]);
2105 y = uint32_t(point[1]);
2106 z = uint32_t(point[2]);
2107 if ((point[0] < 0) || (point[0] >
double(global3D.
max_length)) || (point[1] < 0) || (point[1] >
double(global3D.
max_length)) || (point[2] < 0) || (point[2] >
double(global3D.
max_length)))
2113 morton = mortonEncode_magicbits(x,y,z);
2116 if (!serial) powner = findOwner(morton);
2120 if ((powner!=rank) && (!serial))
2124 int32_t jump = idxtry;
2125 while(abs(jump) > 0){
2126 mortontry = octree.octants[idxtry].computeMorton();
2127 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2129 if (idxtry > noctants-1){
2131 idxtry = noctants - 1;
2140 if(octree.octants[idxtry].computeMorton() == morton){
2146 while(octree.octants[idxtry].computeMorton() < morton){
2148 if(idxtry > noctants-1){
2149 idxtry = noctants-1;
2153 while(octree.octants[idxtry].computeMorton() > morton){
2155 if(idxtry > noctants-1){
2171 void computePartition(uint32_t* partition){
2172 uint32_t division_result = 0;
2173 uint32_t remind = 0;
2174 division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2175 remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2176 for(uint32_t i = 0; i < (uint32_t)nproc; ++i)
2178 partition[i] = division_result + 1;
2180 partition[i] = division_result;
2185 void computePartition(uint32_t* partition, dvector* weight){
2190 double division_result = 0;
2191 double global_weight = 0.0;
2192 for (
int i=0; i<weight->size(); i++){
2193 global_weight += (*weight)[i];
2196 division_result = global_weight/(double)nproc;
2201 uint32_t i = 0, tot = 0;
2203 while (iproc < nproc-1){
2204 double partial_weight = 0.0;
2205 partition[iproc] = 0;
2206 while(partial_weight < division_result){
2207 partial_weight += (*weight)[i];
2214 partition[nproc-1] = weight->size() - tot;
2219 double division_result = 0;
2221 dvector local_weight(nproc,0.0);
2222 dvector temp_local_weight(nproc,0.0);
2223 dvector2D sending_weight(nproc, dvector(nproc,0.0));
2224 double* rbuff =
new double[nproc];
2225 double global_weight = 0.0;
2226 for (
int i=0; i<weight->size(); i++){
2227 local_weight[rank] += (*weight)[i];
2229 error_flag = MPI_Allgather(&local_weight[rank],1,MPI_DOUBLE,rbuff,1,MPI_DOUBLE,comm);
2230 for (
int i=0; i<nproc; i++){
2231 local_weight[i] = rbuff[i];
2232 global_weight += rbuff[i];
2234 delete [] rbuff; rbuff = NULL;
2235 division_result = global_weight/(double)nproc;
2239 temp_local_weight = local_weight;
2242 for (
int iter = 0; iter < 1; iter++){
2244 vector<double> delta(nproc);
2245 for (
int i=0; i<nproc; i++){
2246 delta[i] = temp_local_weight[i] - division_result;
2249 for (
int i=0; i<nproc-1; i++){
2251 double post_weight = 0.0;
2252 for (
int j=i+1; j<nproc; j++){
2253 post_weight += temp_local_weight[j];
2256 if (temp_local_weight[i] > division_result){
2258 delta[i] = temp_local_weight[i] - division_result;
2259 if (post_weight < division_result*(nproc-i-1)){
2261 double post_delta = division_result*(nproc-i-1) - post_weight;
2262 double delta_sending = min(local_weight[i], min(delta[i], post_delta));
2265 while (delta_sending > 0 && jproc<nproc){
2266 sending = min(division_result, delta_sending);
2267 sending = min(sending, (division_result-temp_local_weight[jproc]));
2268 sending = max(sending, 0.0);
2269 sending_weight[i][jproc] += sending;
2270 temp_local_weight[jproc] += sending;
2271 temp_local_weight[i] -= sending;
2272 delta_sending -= sending;
2273 delta[i] -= delta_sending;
2281 for (
int i = nproc-1; i>0; i--){
2283 double pre_weight = 0.0;
2284 for (
int j=i-1; j>=0; j--){
2285 pre_weight += temp_local_weight[j];
2287 if (temp_local_weight[i] > division_result){
2289 delta[i] = temp_local_weight[i] - division_result;
2290 if (pre_weight < division_result*(i)){
2292 double pre_delta = division_result*(i) - pre_weight;
2293 double delta_sending = min(local_weight[i], min(temp_local_weight[i], min(delta[i], pre_delta)));
2296 while (delta_sending > 0 && jproc >=0){
2297 sending = min(division_result, delta_sending);
2298 sending = min(sending, (division_result-temp_local_weight[jproc]));
2299 sending = max(sending, 0.0);
2300 sending_weight[i][jproc] += sending;
2301 temp_local_weight[jproc] += sending;
2302 temp_local_weight[i] -= sending;
2303 delta_sending -= sending;
2304 delta[i] -= delta_sending;
2314 u32vector sending_cell(nproc,0);
2316 int i = getNumOctants();
2317 for (
int jproc=nproc-1; jproc>rank; jproc--){
2318 double pack_weight = 0.0;
2319 while(pack_weight < sending_weight[rank][jproc] && i > 0){
2321 pack_weight += (*weight)[i];
2322 sending_cell[jproc]++;
2325 partition[rank] = i;
2327 for (
int jproc=0; jproc<rank; jproc++){
2328 double pack_weight = 0.0;
2330 while(pack_weight < sending_weight[rank][jproc] && i < getNumOctants()-1){
2332 pack_weight += (*weight)[i];
2333 sending_cell[jproc]++;
2336 partition[rank] -= i;
2339 u32vector rec_cell(nproc,0);
2340 MPI_Request* req =
new MPI_Request[nproc*10];
2341 MPI_Status* stats =
new MPI_Status[nproc*10];
2343 for (
int iproc=0; iproc<nproc; iproc++){
2344 error_flag = MPI_Irecv(&rec_cell[iproc],1,MPI_UINT32_T,iproc,rank,comm,&req[nReq]);
2347 for (
int iproc=0; iproc<nproc; iproc++){
2348 error_flag = MPI_Isend(&sending_cell[iproc],1,MPI_UINT32_T,iproc,iproc,comm,&req[nReq]);
2351 MPI_Waitall(nReq,req,stats);
2353 delete [] req; req = NULL;
2354 delete [] stats; stats = NULL;
2357 for (
int jproc=0; jproc<nproc; jproc++){
2358 i+= rec_cell[jproc];
2360 partition[rank] += i;
2362 uint32_t part = partition[rank];
2363 error_flag = MPI_Allgather(&part,1,MPI_UINT32_T,partition,1,MPI_UINT32_T,comm);
2370 void computePartition(uint32_t* partition,
2372 uint8_t level = uint8_t(min(
int(max(
int(max_depth) -
int(level_),
int(1))) , MAX_LEVEL_3D));
2373 uint32_t* partition_temp =
new uint32_t[nproc];
2375 uint8_t* boundary_proc =
new uint8_t[nproc-1];
2376 uint8_t dimcomm, indcomm;
2377 uint8_t* glbdimcomm =
new uint8_t[nproc];
2378 uint8_t* glbindcomm =
new uint8_t[nproc];
2380 uint32_t division_result = 0;
2381 uint32_t remind = 0;
2382 uint32_t Dh = uint32_t(pow(
double(2),
double(MAX_LEVEL_3D-level)));
2383 uint32_t istart, nocts, rest, forw, backw;
2384 uint32_t i = 0, iproc, j;
2386 int32_t* pointercomm;
2387 int32_t* deplace =
new int32_t[nproc-1];
2388 division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2389 remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2390 for(uint32_t i = 0; i < uint32_t(nproc); ++i)
2392 partition_temp[i] = division_result + 1;
2394 partition_temp[i] = division_result;
2398 for (iproc=0; iproc<uint32_t(nproc-1); iproc++){
2399 sum += partition_temp[iproc];
2400 while(sum > partition_range_globalidx[j]){
2403 boundary_proc[iproc] = j;
2405 nocts = octree.octants.size();
2409 for (iproc=0; iproc<uint32_t(nproc-1); iproc++){
2411 sum += partition_temp[iproc];
2412 if (boundary_proc[iproc] == rank){
2418 istart = sum - partition_range_globalidx[rank-1] - 1;
2423 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2430 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2434 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2441 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh + octree.octants[i].getZ()%Dh;
2445 deplace[iproc] = forw;
2447 deplace[iproc] = -(int32_t)backw;
2451 error_flag = MPI_Allgather(&dimcomm,1,MPI_UINT8_T,glbdimcomm,1,MPI_UINT8_T,comm);
2452 error_flag = MPI_Allgather(&indcomm,1,MPI_UINT8_T,glbindcomm,1,MPI_UINT8_T,comm);
2453 for (iproc=0; iproc<uint32_t(nproc); iproc++){
2454 pointercomm = &deplace[glbindcomm[iproc]];
2455 error_flag = MPI_Bcast(pointercomm, glbdimcomm[iproc], MPI_INT32_T, iproc, comm);
2458 for (iproc=0; iproc<uint32_t(nproc); iproc++){
2459 if (iproc < uint32_t(nproc-1))
2460 partition[iproc] = partition_temp[iproc] + deplace[iproc];
2462 partition[iproc] = partition_temp[iproc];
2464 partition[iproc] = partition[iproc] - deplace[iproc-1];
2467 delete [] partition_temp; partition_temp = NULL;
2468 delete [] boundary_proc; boundary_proc = NULL;
2469 delete [] glbdimcomm; glbdimcomm = NULL;
2470 delete [] glbindcomm; glbindcomm = NULL;
2471 delete [] deplace; deplace = NULL;
2477 void updateLoadBalance(){
2478 octree.updateLocalMaxDepth();
2480 uint64_t* rbuff =
new uint64_t[nproc];
2481 uint64_t local_num_octants = octree.getNumOctants();
2482 error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
2483 for(
int p = 0; p < nproc; ++p){
2484 partition_range_globalidx[p] = 0;
2485 for(
int pp = 0; pp <=p; ++pp)
2486 partition_range_globalidx[p] += rbuff[pp];
2487 --partition_range_globalidx[p];
2490 octree.setFirstDesc();
2491 octree.setLastDesc();
2493 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
2494 error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
2495 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
2496 error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
2498 delete [] rbuff; rbuff = NULL;
2503 void setPboundGhosts(){
2512 bordersPerProc.clear();
2516 for(uint8_t i = 0; i < global3D.
nfaces; ++i){
2517 if(it->getBound(i) ==
false){
2518 uint32_t virtualNeighborsSize = 0;
2519 uint8_t nvirtualneigh=0;
2520 vector<uint64_t> virtualNeighbors = it->computeVirtualMorton(i,max_depth,virtualNeighborsSize);
2521 uint32_t maxDelta = virtualNeighborsSize/2;
2522 for(uint32_t j = 0; j <= maxDelta; ++j){
2523 int pBegin = findOwner(virtualNeighbors[j]);
2524 int pEnd = findOwner(virtualNeighbors[virtualNeighborsSize - 1 - j]);
2525 procs.insert(pBegin);
2527 if(pBegin != rank || pEnd != rank){
2535 if (nvirtualneigh!=0){
2536 it->setPbound(i,
true);
2539 it->setPbound(i,
false);
2544 for(uint8_t e = 0; e < global3D.
nedges; ++e){
2545 uint32_t virtualEdgeNeighborSize = 0;
2546 vector<uint64_t> virtualEdgeNeighbors = it->computeEdgeVirtualMorton(e,max_depth,virtualEdgeNeighborSize,octree.balance_codim);
2547 uint32_t maxDelta = virtualEdgeNeighborSize/2;
2548 if(virtualEdgeNeighborSize){
2549 for(uint32_t ee = 0; ee <= maxDelta; ++ee){
2550 int pBegin = findOwner(virtualEdgeNeighbors[ee]);
2551 int pEnd = findOwner(virtualEdgeNeighbors[virtualEdgeNeighborSize - 1- ee]);
2552 procs.insert(pBegin);
2558 for(uint8_t c = 0; c < global3D.
nnodes; ++c){
2559 uint32_t virtualCornerNeighborSize = 0;
2560 uint64_t virtualCornerNeighbor = it ->computeNodeVirtualMorton(c,max_depth,virtualCornerNeighborSize);
2561 if(virtualCornerNeighborSize){
2562 int proc = findOwner(virtualCornerNeighbor);
2567 set<int>::iterator pitend = procs.end();
2568 for(set<int>::iterator pit = procs.begin(); pit != pitend; ++pit){
2572 bordersPerProc[p].push_back(distance(begin,it));
2573 vector<uint32_t> & bordersSingleProc = bordersPerProc[p];
2574 if(bordersSingleProc.capacity() - bordersSingleProc.size() < 2)
2575 bordersSingleProc.reserve(2*bordersSingleProc.size());
2587 uint64_t global_index;
2592 map<int,Class_Comm_Buffer> sendBuffers;
2593 map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
2594 uint32_t pbordersOversize = 0;
2595 for(map<
int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
2596 pbordersOversize += bit->second.size();
2598 int key = bit->first;
2599 const vector<uint32_t> & value = bit->second;
2602 int nofBorders = value.size();
2603 for(
int i = 0; i < nofBorders; ++i){
2611 global_index = getGlobalIdx(value[i]);
2612 for(
int i = 0; i < 16; ++i)
2613 info[i] = octant.info[i];
2614 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2615 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2616 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2617 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2618 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2619 for(
int j = 0; j < 16; ++j){
2620 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2622 error_flag = MPI_Pack(&global_index,1,MPI_INT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2629 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
2630 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
2632 map<int,int> recvBufferSizePerProc;
2633 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2634 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2635 recvBufferSizePerProc[sit->first] = 0;
2636 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
2639 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
2640 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2641 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
2644 MPI_Waitall(nReq,req,stats);
2650 uint32_t nofBytesOverProc = 0;
2651 map<int,Class_Comm_Buffer> recvBuffers;
2652 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
2653 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
2657 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2658 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
2659 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
2662 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2663 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
2666 MPI_Waitall(nReq,req,stats);
2672 octree.size_ghosts = nofGhosts;
2673 octree.ghosts.clear();
2674 octree.ghosts.resize(nofGhosts);
2675 octree.globalidx_ghosts.resize(nofGhosts);
2680 uint32_t ghostCounter = 0;
2681 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
2682 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
2685 for(
int i = 0; i < nofGhostsPerProc; ++i){
2686 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
2687 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
2688 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&z,1,MPI_UINT32_T,comm);
2689 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
2691 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
2692 octree.ghosts[ghostCounter].setMarker(m);
2693 for(
int j = 0; j < 16; ++j){
2694 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
2695 octree.ghosts[ghostCounter].info[j] = info[j];
2697 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&global_index,1,MPI_INT64_T,comm);
2698 octree.globalidx_ghosts[ghostCounter] = global_index;
2702 recvBuffers.clear();
2703 sendBuffers.clear();
2704 recvBufferSizePerProc.clear();
2706 delete [] req; req = NULL;
2707 delete [] stats; stats = NULL;
2721 log.writeLog(
"---------------------------------------------");
2722 log.writeLog(
" LOAD BALANCE ");
2724 uint32_t* partition =
new uint32_t [nproc];
2725 computePartition(partition);
2729 log.writeLog(
" Initial Serial distribution : ");
2730 for(
int ii=0; ii<nproc; ii++){
2731 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
2734 uint32_t stride = 0;
2735 for(
int i = 0; i < rank; ++i)
2736 stride += partition[i];
2740 octree.octants.assign(first, last);
2741 #if defined(__INTEL_COMPILER) || defined(__ICC)
2743 octree.octants.shrink_to_fit();
2745 first = octantsCopy.end();
2746 last = octantsCopy.end();
2749 updateLoadBalance();
2756 log.writeLog(
" Initial Parallel partition : ");
2757 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
2758 for(
int ii=1; ii<nproc; ii++){
2759 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
2763 octree.ghosts.clear();
2764 octree.size_ghosts = 0;
2766 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
2767 for(
int p = 0; p < nproc; ++p){
2768 newPartitionRangeGlobalidx[p] = 0;
2769 for(
int pp = 0; pp <= p; ++pp)
2770 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
2771 --newPartitionRangeGlobalidx[p];
2779 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
2783 else if(lh > (int32_t)(octree.octants.size() - 1))
2784 lh = octree.octants.size() - 1;
2786 if(rank == nproc - 1)
2787 ft = octree.octants.size();
2789 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
2791 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
2793 if(ft > (int32_t)(octree.octants.size() - 1))
2794 ft = octree.octants.size();
2799 uint32_t headSize = (uint32_t)(lh + 1);
2800 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
2801 uint32_t headOffset = headSize;
2802 uint32_t tailOffset = tailSize;
2805 map<int,Class_Comm_Buffer> sendBuffers;
2808 int64_t firstOctantGlobalIdx = 0;
2809 int64_t globalLastHead = (int64_t) lh;
2810 int64_t globalFirstTail = (int64_t) ft;
2811 int firstPredecessor = -1;
2812 int firstSuccessor = nproc;
2814 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
2815 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
2816 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
2817 for(
int pre = rank - 1; pre >=0; --pre){
2818 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
2819 firstPredecessor = pre;
2821 for(
int post = rank + 1; post < nproc; ++post){
2822 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
2823 firstSuccessor = post;
2838 uint32_t nofElementsFromSuccessiveToPrevious = 0;
2840 for(
int p = firstPredecessor; p >= 0; --p){
2841 if(headSize < partition[p]){
2842 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
2843 intBuffer = abs(intBuffer);
2844 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
2845 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
2846 nofElementsFromSuccessiveToPrevious = headSize;
2848 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
2851 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2859 for(uint32_t ii = 0; ii < 16; ++ii)
2860 info[ii] = octant.info[ii];
2861 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2862 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2863 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2864 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2865 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2866 for(
int j = 0; j < 16; ++j){
2867 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2870 if(nofElementsFromSuccessiveToPrevious == headSize)
2873 lh -= nofElementsFromSuccessiveToPrevious;
2874 globalLastHead -= nofElementsFromSuccessiveToPrevious;
2879 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
2880 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
2883 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2891 for(
int ii = 0; ii < 16; ++ii)
2892 info[ii] = octant.info[ii];
2893 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2894 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2895 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2896 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2897 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2898 for(
int j = 0; j < 16; ++j){
2899 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2902 lh -= nofElementsFromSuccessiveToPrevious;
2903 globalLastHead -= nofElementsFromSuccessiveToPrevious;
2911 uint32_t nofElementsFromPreviousToSuccessive = 0;
2915 for(
int p = firstSuccessor; p < nproc; ++p){
2916 if(tailSize < partition[p]){
2917 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2918 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
2919 nofElementsFromPreviousToSuccessive = tailSize;
2921 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
2924 uint32_t octantsSize = (uint32_t)octree.octants.size();
2925 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
2933 for(
int ii = 0; ii < 16; ++ii)
2934 info[ii] = octant.info[ii];
2935 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2936 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2937 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2938 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2939 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2940 for(
int j = 0; j < 16; ++j){
2941 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2944 if(nofElementsFromPreviousToSuccessive == tailSize)
2946 ft += nofElementsFromPreviousToSuccessive;
2947 globalFirstTail += nofElementsFromPreviousToSuccessive;
2948 tailSize -= nofElementsFromPreviousToSuccessive;
2952 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2953 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
2955 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
2957 for(uint32_t i = ft; i <= endOctants; ++i ){
2965 for(
int ii = 0; ii < 16; ++ii)
2966 info[ii] = octant.info[ii];
2967 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2968 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2969 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2970 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2971 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2972 for(
int j = 0; j < 16; ++j){
2973 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2976 ft += nofElementsFromPreviousToSuccessive;
2977 globalFirstTail += nofElementsFromPreviousToSuccessive;
2978 tailSize -= nofElementsFromPreviousToSuccessive;
2986 vector<Class_Array> recvs(nproc);
2987 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
2988 recvs[rank].array[0] = rank;
2990 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2991 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2992 recvs[rank].array[counter] = sit->first;
2995 int* nofRecvsPerProc =
new int[nproc];
2996 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
2997 int globalRecvsBuffSize = 0;
2998 int* displays =
new int[nproc];
2999 for(
int pp = 0; pp < nproc; ++pp){
3001 globalRecvsBuffSize += nofRecvsPerProc[pp];
3002 for(
int ppp = 0; ppp < pp; ++ppp){
3003 displays[pp] += nofRecvsPerProc[ppp];
3006 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
3007 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3009 vector<set<int> > sendersPerProc(nproc);
3010 for(
int pin = 0; pin < nproc; ++pin){
3011 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3012 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3017 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3018 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3020 map<int,int> recvBufferSizePerProc;
3021 set<int>::iterator senditend = sendersPerProc[rank].end();
3022 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3023 recvBufferSizePerProc[*sendit] = 0;
3024 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3027 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3028 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3029 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3032 MPI_Waitall(nReq,req,stats);
3037 uint32_t nofNewHead = 0;
3038 uint32_t nofNewTail = 0;
3039 map<int,Class_Comm_Buffer> recvBuffers;
3040 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3041 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3043 uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((
double)global3D.
octantBytes / (
double)(CHAR_BIT/8)));
3044 if(rit->first < rank)
3045 nofNewHead += nofNewPerProc;
3046 else if(rit->first > rank)
3047 nofNewTail += nofNewPerProc;
3050 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3052 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3055 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3056 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3059 MPI_Waitall(nReq,req,stats);
3062 uint32_t resEnd = octree.getNumOctants() - tailOffset;
3063 uint32_t nofResidents = resEnd - headOffset;
3065 for(uint32_t i = headOffset; i < resEnd; ++i){
3066 octree.octants[octCounter] = octree.octants[i];
3069 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3070 octree.octants.resize(newCounter);
3072 uint32_t resCounter = nofNewHead + nofResidents - 1;
3073 for(uint32_t k = 0; k < nofResidents ; ++k){
3074 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3079 bool jumpResident =
false;
3080 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3081 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3082 uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((
double)global3D.
octantBytes / (
double)(CHAR_BIT/8)));
3084 if(rbit->first > rank && !jumpResident){
3085 newCounter += nofResidents ;
3086 jumpResident =
true;
3088 for(
int i = nofNewPerProc - 1; i >= 0; --i){
3089 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
3090 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3091 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&z,1,MPI_UINT32_T,comm);
3092 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3094 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3095 octree.octants[newCounter].setMarker(m);
3096 for(
int j = 0; j < 16; ++j){
3097 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3098 octree.octants[newCounter].info[j] = info[j];
3103 #if defined(__INTEL_COMPILER) || defined(__ICC)
3105 octree.octants.shrink_to_fit();
3107 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3108 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3109 delete [] displays; displays = NULL;
3110 delete [] req; req = NULL;
3111 delete [] stats; stats = NULL;
3112 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3114 updateLoadBalance();
3118 delete [] partition; partition = NULL;
3122 log.writeLog(
" Final Parallel partition : ");
3123 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3124 for(
int ii=1; ii<nproc; ii++){
3125 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3128 log.writeLog(
"---------------------------------------------");
3142 log.writeLog(
"---------------------------------------------");
3143 log.writeLog(
" LOAD BALANCE ");
3145 uint32_t* partition =
new uint32_t [nproc];
3146 computePartition(partition, level);
3150 log.writeLog(
" Initial Serial distribution : ");
3151 for(
int ii=0; ii<nproc; ii++){
3152 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3155 uint32_t stride = 0;
3156 for(
int i = 0; i < rank; ++i)
3157 stride += partition[i];
3161 octree.octants.assign(first, last);
3162 #if defined(__INTEL_COMPILER) || defined(__ICC)
3164 octree.octants.shrink_to_fit();
3166 first = octantsCopy.end();
3167 last = octantsCopy.end();
3170 updateLoadBalance();
3177 log.writeLog(
" Initial Parallel partition : ");
3178 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3179 for(
int ii=1; ii<nproc; ii++){
3180 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3184 octree.ghosts.clear();
3185 octree.size_ghosts = 0;
3187 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
3188 for(
int p = 0; p < nproc; ++p){
3189 newPartitionRangeGlobalidx[p] = 0;
3190 for(
int pp = 0; pp <= p; ++pp)
3191 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3192 --newPartitionRangeGlobalidx[p];
3200 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3204 else if(lh > (int32_t)(octree.octants.size() - 1))
3205 lh = octree.octants.size() - 1;
3207 if(rank == nproc - 1)
3208 ft = octree.octants.size();
3210 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3212 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3214 if(ft > (int32_t)(octree.octants.size() - 1))
3215 ft = octree.octants.size();
3220 uint32_t headSize = (uint32_t)(lh + 1);
3221 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3222 uint32_t headOffset = headSize;
3223 uint32_t tailOffset = tailSize;
3226 map<int,Class_Comm_Buffer> sendBuffers;
3229 int64_t firstOctantGlobalIdx = 0;
3230 int64_t globalLastHead = (int64_t) lh;
3231 int64_t globalFirstTail = (int64_t) ft;
3232 int firstPredecessor = -1;
3233 int firstSuccessor = nproc;
3235 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3236 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3237 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3238 for(
int pre = rank - 1; pre >=0; --pre){
3239 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3240 firstPredecessor = pre;
3242 for(
int post = rank + 1; post < nproc; ++post){
3243 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3244 firstSuccessor = post;
3259 uint32_t nofElementsFromSuccessiveToPrevious = 0;
3261 for(
int p = firstPredecessor; p >= 0; --p){
3262 if(headSize < partition[p]){
3263 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3264 intBuffer = abs(intBuffer);
3265 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3266 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3267 nofElementsFromSuccessiveToPrevious = headSize;
3269 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3272 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3280 for(
int ii = 0; ii < 16; ++ii)
3281 info[ii] = octant.info[ii];
3282 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3283 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3284 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3285 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3286 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3287 for(
int j = 0; j < 16; ++j){
3288 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3291 if(nofElementsFromSuccessiveToPrevious == headSize)
3294 lh -= nofElementsFromSuccessiveToPrevious;
3295 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3300 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3301 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3304 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3312 for(
int ii = 0; ii < 16; ++ii)
3313 info[ii] = octant.info[ii];
3314 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3315 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3316 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3317 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3318 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3319 for(
int j = 0; j < 16; ++j){
3320 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3323 lh -= nofElementsFromSuccessiveToPrevious;
3324 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3332 uint32_t nofElementsFromPreviousToSuccessive = 0;
3336 for(
int p = firstSuccessor; p < nproc; ++p){
3337 if(tailSize < partition[p]){
3338 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3339 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3340 nofElementsFromPreviousToSuccessive = tailSize;
3342 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3345 uint32_t octantsSize = (uint32_t)octree.octants.size();
3346 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3354 for(
int ii = 0; ii < 16; ++ii)
3355 info[ii] = octant.info[ii];
3356 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3357 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3358 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3359 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3360 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3361 for(
int j = 0; j < 16; ++j){
3362 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3365 if(nofElementsFromPreviousToSuccessive == tailSize)
3367 ft += nofElementsFromPreviousToSuccessive;
3368 globalFirstTail += nofElementsFromPreviousToSuccessive;
3369 tailSize -= nofElementsFromPreviousToSuccessive;
3373 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3374 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3376 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3378 for(uint32_t i = ft; i <= endOctants; ++i ){
3386 for(
int ii = 0; ii < 16; ++ii)
3387 info[ii] = octant.info[ii];
3388 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3389 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3390 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3391 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3392 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3393 for(
int j = 0; j < 16; ++j){
3394 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3397 ft += nofElementsFromPreviousToSuccessive;
3398 globalFirstTail += nofElementsFromPreviousToSuccessive;
3399 tailSize -= nofElementsFromPreviousToSuccessive;
3407 vector<Class_Array> recvs(nproc);
3408 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
3409 recvs[rank].array[0] = rank;
3411 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3412 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3413 recvs[rank].array[counter] = sit->first;
3416 int* nofRecvsPerProc =
new int[nproc];
3417 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3418 int globalRecvsBuffSize = 0;
3419 int* displays =
new int[nproc];
3420 for(
int pp = 0; pp < nproc; ++pp){
3422 globalRecvsBuffSize += nofRecvsPerProc[pp];
3423 for(
int ppp = 0; ppp < pp; ++ppp){
3424 displays[pp] += nofRecvsPerProc[ppp];
3427 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
3428 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3430 vector<set<int> > sendersPerProc(nproc);
3431 for(
int pin = 0; pin < nproc; ++pin){
3432 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3433 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3438 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3439 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3441 map<int,int> recvBufferSizePerProc;
3442 set<int>::iterator senditend = sendersPerProc[rank].end();
3443 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3444 recvBufferSizePerProc[*sendit] = 0;
3445 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3448 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3449 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3450 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3453 MPI_Waitall(nReq,req,stats);
3458 uint32_t nofNewHead = 0;
3459 uint32_t nofNewTail = 0;
3460 map<int,Class_Comm_Buffer> recvBuffers;
3461 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3462 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3464 uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((
double)global3D.
octantBytes / (
double)(CHAR_BIT/8)));
3465 if(rit->first < rank)
3466 nofNewHead += nofNewPerProc;
3467 else if(rit->first > rank)
3468 nofNewTail += nofNewPerProc;
3471 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3473 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3476 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3477 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3480 MPI_Waitall(nReq,req,stats);
3483 uint32_t resEnd = octree.getNumOctants() - tailOffset;
3484 uint32_t nofResidents = resEnd - headOffset;
3486 for(uint32_t i = headOffset; i < resEnd; ++i){
3487 octree.octants[octCounter] = octree.octants[i];
3490 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3491 octree.octants.resize(newCounter);
3493 uint32_t resCounter = nofNewHead + nofResidents - 1;
3494 for(uint32_t k = 0; k < nofResidents ; ++k){
3495 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3500 bool jumpResident =
false;
3501 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3502 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3503 uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((
double)global3D.
octantBytes / (
double)(CHAR_BIT/8)));
3505 if(rbit->first > rank && !jumpResident){
3506 newCounter += nofResidents ;
3507 jumpResident =
true;
3509 for(
int i = nofNewPerProc - 1; i >= 0; --i){
3510 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
3511 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3512 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&z,1,MPI_UINT32_T,comm);
3513 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3515 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3516 octree.octants[newCounter].setMarker(m);
3517 for(
int j = 0; j < 16; ++j){
3518 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3519 octree.octants[newCounter].info[j] = info[j];
3524 #if defined(__INTEL_COMPILER) || defined(__ICC)
3526 octree.octants.shrink_to_fit();
3528 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3529 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3530 delete [] displays; displays = NULL;
3531 delete [] req; req = NULL;
3532 delete [] stats; stats = NULL;
3533 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3536 updateLoadBalance();
3540 delete [] partition;
3545 log.writeLog(
" Final Parallel partition : ");
3546 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3547 for(
int ii=1; ii<nproc; ii++){
3548 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3551 log.writeLog(
"---------------------------------------------");
3563 template<
class Impl>
3566 log.writeLog(
"---------------------------------------------");
3567 log.writeLog(
" LOAD BALANCE ");
3569 uint32_t* partition =
new uint32_t [nproc];
3571 computePartition(partition);
3573 computePartition(partition, weight);
3580 log.writeLog(
" Initial Serial distribution : ");
3581 for(
int ii=0; ii<nproc; ii++){
3582 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3585 uint32_t stride = 0;
3586 for(
int i = 0; i < rank; ++i)
3587 stride += partition[i];
3591 octree.octants.assign(first, last);
3592 #if defined(__INTEL_COMPILER) || defined(__ICC)
3594 octree.octants.shrink_to_fit();
3596 first = octantsCopy.end();
3597 last = octantsCopy.end();
3600 userData.
assign(stride,partition[rank]);
3604 updateLoadBalance();
3610 log.writeLog(
" Initial Parallel partition : ");
3611 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3612 for(
int ii=1; ii<nproc; ii++){
3613 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3617 octree.ghosts.clear();
3618 octree.size_ghosts = 0;
3620 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
3621 for(
int p = 0; p < nproc; ++p){
3622 newPartitionRangeGlobalidx[p] = 0;
3623 for(
int pp = 0; pp <= p; ++pp)
3624 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3625 --newPartitionRangeGlobalidx[p];
3633 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3637 else if(lh > octree.octants.size() - 1)
3638 lh = octree.octants.size() - 1;
3640 if(rank == nproc - 1)
3641 ft = octree.octants.size();
3643 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3645 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3647 if(ft > (int32_t)(octree.octants.size() - 1))
3648 ft = octree.octants.size();
3653 uint32_t headSize = (uint32_t)(lh + 1);
3654 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3655 uint32_t headOffset = headSize;
3656 uint32_t tailOffset = tailSize;
3659 map<int,Class_Comm_Buffer> sendBuffers;
3662 int64_t firstOctantGlobalIdx = 0;
3663 int64_t globalLastHead = (int64_t) lh;
3664 int64_t globalFirstTail = (int64_t) ft;
3665 int firstPredecessor = -1;
3666 int firstSuccessor = nproc;
3668 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3669 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3670 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3671 for(
int pre = rank - 1; pre >=0; --pre){
3672 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3673 firstPredecessor = pre;
3675 for(
int post = rank + 1; post < nproc; ++post){
3676 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3677 firstSuccessor = post;
3692 uint32_t nofElementsFromSuccessiveToPrevious = 0;
3694 for(
int p = firstPredecessor; p >= 0; --p){
3695 if(headSize < partition[p]){
3696 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3697 intBuffer = abs(intBuffer);
3698 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3699 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3700 nofElementsFromSuccessiveToPrevious = headSize;
3702 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3705 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
3708 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3709 buffSize += userData.
size(i);
3713 buffSize +=
sizeof(int);
3716 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3717 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3725 for(
int j = 0; j < 16; ++j)
3726 info[j] = octant.info[j];
3727 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3728 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3729 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3730 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3731 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3732 for(
int j = 0; j < 16; ++j){
3733 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3736 userData.
gather(sendBuffers[p],i);
3738 if(nofElementsFromSuccessiveToPrevious == headSize)
3741 lh -= nofElementsFromSuccessiveToPrevious;
3742 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3747 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3748 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3751 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
3754 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3755 buffSize += userData.
size(i);
3759 buffSize +=
sizeof(int);
3762 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3763 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3771 for(
int j = 0; j < 16; ++j)
3772 info[j] = octant.info[j];
3773 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3774 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3775 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3776 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3777 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3778 for(
int j = 0; j < 16; ++j){
3779 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3781 userData.
gather(sendBuffers[p],i);
3783 lh -= nofElementsFromSuccessiveToPrevious;
3784 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3791 uint32_t nofElementsFromPreviousToSuccessive = 0;
3795 for(
int p = firstSuccessor; p < nproc; ++p){
3796 if(tailSize < partition[p]){
3797 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3798 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3799 nofElementsFromPreviousToSuccessive = tailSize;
3801 uint32_t octantsSize = (uint32_t)octree.octants.size();
3802 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3805 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
3808 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3809 buffSize += userData.
size(i);
3813 buffSize +=
sizeof(int);
3816 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3817 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3825 for(
int j = 0; j < 16; ++j)
3826 info[j] = octant.info[j];
3827 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3828 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3829 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3830 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3831 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3832 for(
int j = 0; j < 16; ++j){
3833 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3835 userData.
gather(sendBuffers[p],i);
3837 if(nofElementsFromPreviousToSuccessive == tailSize)
3839 ft += nofElementsFromPreviousToSuccessive;
3840 globalFirstTail += nofElementsFromPreviousToSuccessive;
3841 tailSize -= nofElementsFromPreviousToSuccessive;
3845 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3846 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3847 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
3850 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
3853 for(uint32_t i = ft; i <= endOctants; ++i){
3854 buffSize += userData.
size(i);
3858 buffSize +=
sizeof(int);
3861 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3862 for(uint32_t i = ft; i <= endOctants; ++i ){
3870 for(
int j = 0; j < 16; ++j)
3871 info[j] = octant.info[j];
3872 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3873 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3874 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3875 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3876 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3877 for(
int j = 0; j < 16; ++j){
3878 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3880 userData.
gather(sendBuffers[p],i);
3882 ft += nofElementsFromPreviousToSuccessive;
3883 globalFirstTail += nofElementsFromPreviousToSuccessive;
3884 tailSize -= nofElementsFromPreviousToSuccessive;
3892 vector<Class_Array> recvs(nproc);
3893 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
3894 recvs[rank].array[0] = rank;
3896 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3897 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3898 recvs[rank].array[counter] = sit->first;
3901 int* nofRecvsPerProc =
new int[nproc];
3902 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3903 int globalRecvsBuffSize = 0;
3904 int* displays =
new int[nproc];
3905 for(
int pp = 0; pp < nproc; ++pp){
3907 globalRecvsBuffSize += nofRecvsPerProc[pp];
3908 for(
int ppp = 0; ppp < pp; ++ppp){
3909 displays[pp] += nofRecvsPerProc[ppp];
3912 int globalRecvsBuff[globalRecvsBuffSize];
3913 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3915 vector<set<int> > sendersPerProc(nproc);
3916 for(
int pin = 0; pin < nproc; ++pin){
3917 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3918 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3923 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3924 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3926 map<int,int> recvBufferSizePerProc;
3927 set<int>::iterator senditend = sendersPerProc[rank].end();
3928 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3929 recvBufferSizePerProc[*sendit] = 0;
3930 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3933 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3934 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3935 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3938 MPI_Waitall(nReq,req,stats);
3943 uint32_t nofNewHead = 0;
3944 uint32_t nofNewTail = 0;
3945 map<int,Class_Comm_Buffer> recvBuffers;
3947 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3948 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3953 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3954 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3957 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3958 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3961 MPI_Waitall(nReq,req,stats);
3964 map<int,uint32_t> nofNewOverProcs;
3965 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3966 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3967 uint32_t nofNewPerProc;
3968 MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
3969 nofNewOverProcs[rbit->first] = nofNewPerProc;
3970 if(rbit->first < rank)
3971 nofNewHead += nofNewPerProc;
3972 else if(rbit->first > rank)
3973 nofNewTail += nofNewPerProc;
3977 uint32_t resEnd = octree.getNumOctants() - tailOffset;
3978 uint32_t nofResidents = resEnd - headOffset;
3979 uint32_t octCounter = 0;
3980 for(uint32_t i = headOffset; i < resEnd; ++i){
3981 octree.octants[octCounter] = octree.octants[i];
3982 userData.
move(i,octCounter);
3985 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3986 octree.octants.resize(newCounter);
3987 userData.
resize(newCounter);
3989 uint32_t resCounter = nofNewHead + nofResidents - 1;
3990 for(uint32_t k = 0; k < nofResidents ; ++k){
3991 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3993 userData.
move(nofResidents - k - 1,resCounter - k);
3998 bool jumpResident =
false;
4000 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4002 uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];
4003 if(rbit->first > rank && !jumpResident){
4004 newCounter += nofResidents ;
4005 jumpResident =
true;
4007 for(
int i = nofNewPerProc - 1; i >= 0; --i){
4008 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
4009 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
4010 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&z,1,MPI_UINT32_T,comm);
4011 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
4013 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
4014 octree.octants[newCounter].setMarker(m);
4015 for(
int j = 0; j < 16; ++j){
4016 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
4017 octree.octants[newCounter].info[j] = info[j];
4020 userData.
scatter(rbit->second,newCounter);
4024 #if defined(__INTEL_COMPILER) || defined(__ICC)
4026 octree.octants.shrink_to_fit();
4030 delete [] newPartitionRangeGlobalidx;
4031 newPartitionRangeGlobalidx = NULL;
4032 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
4033 delete [] displays; displays = NULL;
4034 delete [] req; req = NULL;
4035 delete [] stats; stats = NULL;
4038 updateLoadBalance();
4040 uint32_t nofGhosts = getNumGhosts();
4043 delete [] partition;
4048 log.writeLog(
" Final Parallel partition : ");
4049 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4050 for(
int ii=1; ii<nproc; ii++){
4051 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4054 log.writeLog(
"---------------------------------------------");
4065 template<
class Impl>
4068 log.writeLog(
"---------------------------------------------");
4069 log.writeLog(
" LOAD BALANCE ");
4071 uint32_t* partition =
new uint32_t [nproc];
4072 computePartition(partition,level);
4076 log.writeLog(
" Initial Serial distribution : ");
4077 for(
int ii=0; ii<nproc; ii++){
4078 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
4081 uint32_t stride = 0;
4082 for(
int i = 0; i < rank; ++i)
4083 stride += partition[i];
4087 octree.octants.assign(first, last);
4088 #if defined(__INTEL_COMPILER) || defined(__ICC)
4090 octree.octants.shrink_to_fit();
4092 first = octantsCopy.end();
4093 last = octantsCopy.end();
4095 userData.
assign(stride,partition[rank]);
4098 updateLoadBalance();
4104 log.writeLog(
" Initial Parallel partition : ");
4105 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4106 for(
int ii=1; ii<nproc; ii++){
4107 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4111 octree.ghosts.clear();
4112 octree.size_ghosts = 0;
4114 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
4115 for(
int p = 0; p < nproc; ++p){
4116 newPartitionRangeGlobalidx[p] = 0;
4117 for(
int pp = 0; pp <= p; ++pp)
4118 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
4119 --newPartitionRangeGlobalidx[p];
4127 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
4131 else if(lh > octree.octants.size() - 1)
4132 lh = octree.octants.size() - 1;
4134 if(rank == nproc - 1)
4135 ft = octree.octants.size();
4137 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
4139 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
4141 if(ft > (int32_t)(octree.octants.size() - 1))
4142 ft = octree.octants.size();
4147 uint32_t headSize = (uint32_t)(lh + 1);
4148 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
4149 uint32_t headOffset = headSize;
4150 uint32_t tailOffset = tailSize;
4153 map<int,Class_Comm_Buffer> sendBuffers;
4156 int64_t firstOctantGlobalIdx = 0;
4157 int64_t globalLastHead = (int64_t) lh;
4158 int64_t globalFirstTail = (int64_t) ft;
4159 int firstPredecessor = -1;
4160 int firstSuccessor = nproc;
4162 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
4163 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
4164 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
4165 for(
int pre = rank - 1; pre >=0; --pre){
4166 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
4167 firstPredecessor = pre;
4169 for(
int post = rank + 1; post < nproc; ++post){
4170 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
4171 firstSuccessor = post;
4186 uint32_t nofElementsFromSuccessiveToPrevious = 0;
4188 for(
int p = firstPredecessor; p >= 0; --p){
4189 if(headSize < partition[p]){
4190 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
4191 intBuffer = abs(intBuffer);
4192 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
4193 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
4194 nofElementsFromSuccessiveToPrevious = headSize;
4196 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
4199 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
4202 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4203 buffSize += userData.
size(i);
4207 buffSize +=
sizeof(int);
4210 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4211 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4219 for(
int j = 0; j < 16; ++j)
4220 info[j] = octant.info[j];
4221 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4222 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4223 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4224 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4225 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4226 for(
int j = 0; j < 16; ++j){
4227 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4230 userData.
gather(sendBuffers[p],i);
4232 if(nofElementsFromSuccessiveToPrevious == headSize)
4235 lh -= nofElementsFromSuccessiveToPrevious;
4236 globalLastHead -= nofElementsFromSuccessiveToPrevious;
4241 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
4242 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
4245 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
4248 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4249 buffSize += userData.
size(i);
4253 buffSize +=
sizeof(int);
4256 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4257 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4265 for(
int j = 0; j < 16; ++j)
4266 info[j] = octant.info[j];
4267 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4268 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4269 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4270 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4271 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4272 for(
int j = 0; j < 16; ++j){
4273 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4275 userData.
gather(sendBuffers[p],i);
4277 lh -= nofElementsFromSuccessiveToPrevious;
4278 globalLastHead -= nofElementsFromSuccessiveToPrevious;
4287 uint32_t nofElementsFromPreviousToSuccessive = 0;
4290 for(
int p = firstSuccessor; p < nproc; ++p){
4291 if(tailSize < partition[p]){
4292 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4293 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
4294 nofElementsFromPreviousToSuccessive = tailSize;
4296 uint32_t octantsSize = (uint32_t)octree.octants.size();
4297 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
4300 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
4303 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4304 buffSize += userData.
size(i);
4308 buffSize +=
sizeof(int);
4311 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4312 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4320 for(
int j = 0; j < 16; ++j)
4321 info[j] = octant.info[j];
4322 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4323 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4324 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4325 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4326 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4327 for(
int j = 0; j < 16; ++j){
4328 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4330 userData.
gather(sendBuffers[p],i);
4332 if(nofElementsFromPreviousToSuccessive == tailSize)
4334 ft += nofElementsFromPreviousToSuccessive;
4335 globalFirstTail += nofElementsFromPreviousToSuccessive;
4336 tailSize -= nofElementsFromPreviousToSuccessive;
4340 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4341 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
4342 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global3D.
octantBytes / (double)(CHAR_BIT/8));
4345 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
4348 for(uint32_t i = ft; i <= endOctants; ++i){
4349 buffSize += userData.
size(i);
4353 buffSize +=
sizeof(int);
4356 MPI_Pack(&partition[p],1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4357 for(uint32_t i = ft; i <= endOctants; ++i ){
4365 for(
int j = 0; j < 16; ++j)
4366 info[j] = octant.info[j];
4367 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4368 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4369 error_flag = MPI_Pack(&z,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4370 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4371 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4372 for(
int j = 0; j < 16; ++j){
4373 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4375 userData.
gather(sendBuffers[p],i);
4377 ft += nofElementsFromPreviousToSuccessive;
4378 globalFirstTail += nofElementsFromPreviousToSuccessive;
4379 tailSize -= nofElementsFromPreviousToSuccessive;
4387 vector<Class_Array> recvs(nproc);
4388 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
4389 recvs[rank].array[0] = rank;
4391 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4392 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4393 recvs[rank].array[counter] = sit->first;
4396 int* nofRecvsPerProc =
new int[nproc];
4397 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
4398 int globalRecvsBuffSize = 0;
4399 int* displays =
new int[nproc];
4400 for(
int pp = 0; pp < nproc; ++pp){
4402 globalRecvsBuffSize += nofRecvsPerProc[pp];
4403 for(
int ppp = 0; ppp < pp; ++ppp){
4404 displays[pp] += nofRecvsPerProc[ppp];
4407 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
4408 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
4410 vector<set<int> > sendersPerProc(nproc);
4411 for(
int pin = 0; pin < nproc; ++pin){
4412 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
4413 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
4418 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
4419 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
4421 map<int,int> recvBufferSizePerProc;
4422 set<int>::iterator senditend = sendersPerProc[rank].end();
4423 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4424 recvBufferSizePerProc[*sendit] = 0;
4425 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
4428 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4429 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4430 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4433 MPI_Waitall(nReq,req,stats);
4438 uint32_t nofNewHead = 0;
4439 uint32_t nofNewTail = 0;
4440 map<int,Class_Comm_Buffer> recvBuffers;
4442 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4443 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4448 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4450 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
4453 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4454 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4457 MPI_Waitall(nReq,req,stats);
4460 map<int,uint32_t> nofNewOverProcs;
4461 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
4462 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4463 uint32_t nofNewPerProc;
4464 MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
4465 nofNewOverProcs[rbit->first] = nofNewPerProc;
4466 if(rbit->first < rank)
4467 nofNewHead += nofNewPerProc;
4468 else if(rbit->first > rank)
4469 nofNewTail += nofNewPerProc;
4473 uint32_t resEnd = octree.getNumOctants() - tailOffset;
4474 uint32_t nofResidents = resEnd - headOffset;
4475 uint32_t octCounter = 0;
4476 for(uint32_t i = headOffset; i < resEnd; ++i){
4477 octree.octants[octCounter] = octree.octants[i];
4478 userData.
move(i,octCounter);
4481 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
4482 octree.octants.resize(newCounter);
4483 userData.
resize(newCounter);
4485 uint32_t resCounter = nofNewHead + nofResidents - 1;
4486 for(uint32_t k = 0; k < nofResidents ; ++k){
4487 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
4489 userData.
move(nofResidents - k - 1,resCounter - k);
4494 bool jumpResident =
false;
4496 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4498 uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];
4499 if(rbit->first > rank && !jumpResident){
4500 newCounter += nofResidents ;
4501 jumpResident =
true;
4503 for(
int i = nofNewPerProc - 1; i >= 0; --i){
4504 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
4505 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
4506 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&z,1,MPI_UINT32_T,comm);
4507 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
4509 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
4510 octree.octants[newCounter].setMarker(m);
4511 for(
int j = 0; j < 16; ++j){
4512 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
4513 octree.octants[newCounter].info[j] = info[j];
4516 userData.
scatter(rbit->second,newCounter);
4520 #if defined(__INTEL_COMPILER) || defined(__ICC)
4522 octree.octants.shrink_to_fit();
4526 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
4527 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
4528 delete [] displays; displays = NULL;
4529 delete [] req; req = NULL;
4530 delete [] stats; stats = NULL;
4531 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
4534 updateLoadBalance();
4536 uint32_t nofGhosts = getNumGhosts();
4540 delete [] partition;
4545 log.writeLog(
" Final Parallel partition : ");
4546 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4547 for(
int ii=1; ii<nproc; ii++){
4548 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4551 log.writeLog(
"---------------------------------------------");
4563 max_depth = octree.local_max_depth;
4564 global_num_octants = octree.getNumOctants();
4565 for(
int p = 0; p < nproc; ++p){
4566 partition_range_globalidx[p] = global_num_octants - 1;
4573 error_flag = MPI_Allreduce(&octree.local_max_depth,&max_depth,1,MPI_UINT8_T,MPI_MAX,comm);
4575 uint64_t local_num_octants = (uint64_t) octree.getNumOctants();
4576 error_flag = MPI_Allreduce(&local_num_octants,&global_num_octants,1,MPI_UINT64_T,MPI_SUM,comm);
4578 uint64_t* rbuff =
new uint64_t[nproc];
4579 error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
4580 for(
int p = 0; p < nproc; ++p){
4581 partition_range_globalidx[p] = 0;
4582 for(
int pp = 0; pp <=p; ++pp)
4583 partition_range_globalidx[p] += rbuff[pp];
4584 --partition_range_globalidx[p];
4587 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
4588 error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
4589 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
4590 error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
4591 delete [] rbuff; rbuff = NULL;
4598 void updateAfterCoarse(){
4608 uint64_t lastDescMortonPre, firstDescMortonPost;
4609 lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4610 firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4611 octree.checkCoarse(lastDescMortonPre, firstDescMortonPost);
4619 void updateAfterCoarse(u32vector & mapidx){
4629 uint64_t lastDescMortonPre, firstDescMortonPost;
4630 lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4631 firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4632 octree.checkCoarse(lastDescMortonPre, firstDescMortonPost, mapidx);
4651 map<int,Class_Comm_Buffer> sendBuffers;
4652 map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
4653 uint32_t pbordersOversize = 0;
4654 for(map<
int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
4655 pbordersOversize += bit->second.size();
4656 int buffSize = bit->second.size() * (int)ceil((
double)(global3D.
markerBytes + global3D.
boolBytes) / (
double)(CHAR_BIT/8));
4657 int key = bit->first;
4658 const vector<uint32_t> & value = bit->second;
4661 int nofBorders = value.size();
4662 for(
int i = 0; i < nofBorders; ++i){
4666 mod = octant.info[15];
4667 error_flag = MPI_Pack(&marker,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4668 error_flag = MPI_Pack(&mod,1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4675 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
4676 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
4678 map<int,int> recvBufferSizePerProc;
4679 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4680 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4681 recvBufferSizePerProc[sit->first] = 0;
4682 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
4685 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4686 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4687 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4690 MPI_Waitall(nReq,req,stats);
4696 uint32_t nofBytesOverProc = 0;
4697 map<int,Class_Comm_Buffer> recvBuffers;
4698 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4699 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4703 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4704 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
4705 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
4708 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4709 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4712 MPI_Waitall(nReq,req,stats);
4717 uint32_t ghostCounter = 0;
4718 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
4719 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
4721 int nofGhostsPerProc = int(rrit->second.commBufferSize / ((uint32_t) (global3D.
markerBytes + global3D.
boolBytes)));
4722 for(
int i = 0; i < nofGhostsPerProc; ++i){
4723 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&marker,1,MPI_INT8_T,comm);
4724 octree.ghosts[ghostCounter].setMarker(marker);
4725 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&mod,1,MPI::BOOL,comm);
4726 octree.ghosts[ghostCounter].info[15] = mod;
4730 recvBuffers.clear();
4731 sendBuffers.clear();
4732 recvBufferSizePerProc.clear();
4733 delete [] req; req = NULL;
4734 delete [] stats; stats = NULL;
4740 void balance21(
bool const first){
4742 bool globalDone =
true, localDone =
false;
4746 octree.preBalance21(
true);
4749 log.writeLog(
"---------------------------------------------");
4750 log.writeLog(
" 2:1 BALANCE (balancing Marker before Adapt)");
4752 log.writeLog(
" Iterative procedure ");
4754 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4758 localDone = octree.localBalance(
true);
4760 octree.preBalance21(
false);
4762 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4766 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4768 localDone = octree.localBalance(
false);
4770 octree.preBalance21(
false);
4771 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4775 log.writeLog(
" Iteration : Finalizing ");
4782 log.writeLog(
" 2:1 Balancing reached ");
4784 log.writeLog(
"---------------------------------------------");
4791 localDone = octree.localBalanceAll(
true);
4793 octree.preBalance21(
false);
4795 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4800 localDone = octree.localBalanceAll(
false);
4802 octree.preBalance21(
false);
4803 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4814 bool localDone =
false;
4817 octree.preBalance21(
true);
4820 log.writeLog(
"---------------------------------------------");
4821 log.writeLog(
" 2:1 BALANCE (balancing Marker before Adapt)");
4823 log.writeLog(
" Iterative procedure ");
4825 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4828 localDone = octree.localBalance(
true);
4829 octree.preBalance21(
false);
4833 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4834 localDone = octree.localBalance(
false);
4835 octree.preBalance21(
false);
4838 log.writeLog(
" Iteration : Finalizing ");
4843 log.writeLog(
" 2:1 Balancing reached ");
4845 log.writeLog(
"---------------------------------------------");
4850 localDone = octree.localBalanceAll(
true);
4851 octree.preBalance21(
false);
4855 localDone = octree.localBalanceAll(
false);
4856 octree.preBalance21(
false);
4873 bool globalDone =
false, localDone =
false;
4874 uint32_t nocts = octree.getNumOctants();
4875 vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
4877 for (iter = octree.octants.begin(); iter != iterend; iter++){
4878 iter->info[12] =
false;
4879 iter->info[13] =
false;
4880 iter->info[15] =
false;
4885 log.writeLog(
"---------------------------------------------");
4886 log.writeLog(
" ADAPT (Refine/Coarse)");
4893 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4896 while(octree.refine());
4898 if (octree.getNumOctants() > nocts)
4900 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4901 nocts = octree.getNumOctants();
4905 while(octree.coarse());
4906 updateAfterCoarse();
4910 if (octree.getNumOctants() < nocts){
4913 nocts = octree.getNumOctants();
4915 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
4918 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4921 log.writeLog(
"---------------------------------------------");
4925 log.writeLog(
"---------------------------------------------");
4926 log.writeLog(
" ADAPT (Refine/Coarse)");
4933 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4936 while(octree.refine());
4937 if (octree.getNumOctants() > nocts){
4942 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4943 nocts = octree.getNumOctants();
4946 while(octree.coarse());
4947 updateAfterCoarse();
4953 if (octree.getNumOctants() < nocts){
4956 nocts = octree.getNumOctants();
4959 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4960 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4962 log.writeLog(
"---------------------------------------------");
4964 status += globalDone;
4967 status += localDone;
4978 bool adapt_mapidx(){
4980 bool globalDone =
false, localDone =
false;
4981 uint32_t nocts = octree.getNumOctants();
4982 vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
4984 for (iter = octree.octants.begin(); iter != iterend; iter++){
4985 iter->info[12] =
false;
4986 iter->info[13] =
false;
4987 iter->info[15] =
false;
4992 mapidx.resize(nocts);
4993 #if defined(__INTEL_COMPILER) || defined(__ICC)
4995 mapidx.shrink_to_fit();
4997 for (uint32_t i=0; i<nocts; i++){
5003 log.writeLog(
"---------------------------------------------");
5004 log.writeLog(
" ADAPT (Refine/Coarse)");
5011 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5014 while(octree.refine(mapidx));
5016 if (octree.getNumOctants() > nocts)
5018 nocts = octree.getNumOctants();
5019 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(nocts)));
5023 while(octree.coarse(mapidx));
5024 updateAfterCoarse(mapidx);
5028 if (octree.getNumOctants() < nocts){
5031 nocts = octree.getNumOctants();
5033 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5036 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5039 log.writeLog(
"---------------------------------------------");
5043 log.writeLog(
"---------------------------------------------");
5044 log.writeLog(
" ADAPT (Refine/Coarse)");
5051 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5054 while(octree.refine(mapidx));
5055 if (octree.getNumOctants() > nocts)
5059 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5060 nocts = octree.getNumOctants();
5063 while(octree.coarse(mapidx));
5064 updateAfterCoarse(mapidx);
5070 if (octree.getNumOctants() < nocts){
5073 nocts = octree.getNumOctants();
5076 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5077 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5079 log.writeLog(
"---------------------------------------------");
5099 done = adapt_mapidx();
5123 done = adapt_mapidx();
5141 void getMapping(uint32_t & idx, u32vector & mapper, vector<bool> & isghost){
5143 uint32_t i, nocts = getNumOctants();
5144 uint32_t nghbro = octree.last_ghost_bros.size();;
5149 mapper.push_back(mapidx[idx]);
5150 isghost.push_back(
false);
5151 if (getIsNewC(idx)){
5152 if (idx < nocts-1 || !nghbro){
5154 mapper.push_back(mapidx[idx]+i);
5155 isghost.push_back(
false);
5158 else if (idx == nocts-1 && nghbro){
5159 for (i=1; i<global3D.
nchildren-nghbro; i++){
5160 mapper.push_back(mapidx[idx]+i);
5161 isghost.push_back(
false);
5163 for (i=0; i<nghbro; i++){
5164 mapper.push_back(octree.last_ghost_bros[i]);
5165 isghost.push_back(
true);
5177 bool globalDone =
false, localDone =
false, cDone =
false;
5178 uint32_t nocts = octree.getNumOctants();
5179 vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5181 for (iter = octree.octants.begin(); iter != iterend; iter++){
5182 iter->info[12] =
false;
5183 iter->info[13] =
false;
5184 iter->info[15] =
false;
5189 log.writeLog(
"---------------------------------------------");
5190 log.writeLog(
" ADAPT (GlobalRefine)");
5194 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5197 while(octree.globalRefine());
5199 if (octree.getNumOctants() > nocts)
5201 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5202 nocts = octree.getNumOctants();
5207 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5210 log.writeLog(
"---------------------------------------------");
5214 log.writeLog(
"---------------------------------------------");
5215 log.writeLog(
" ADAPT (Global Refine)");
5219 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5222 while(octree.globalRefine());
5223 if (octree.getNumOctants() > nocts)
5227 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5228 nocts = octree.getNumOctants();
5231 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5233 log.writeLog(
"---------------------------------------------");
5252 bool globalDone =
false, localDone =
false;
5253 uint32_t nocts = octree.getNumOctants();
5254 vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5256 for (iter = octree.octants.begin(); iter != iterend; iter++){
5257 iter->info[12] =
false;
5258 iter->info[13] =
false;
5259 iter->info[15] =
false;
5264 mapidx.resize(nocts);
5265 #if defined(__INTEL_COMPILER) || defined(__ICC)
5267 mapidx.shrink_to_fit();
5269 for (uint32_t i=0; i<nocts; i++){
5275 log.writeLog(
"---------------------------------------------");
5276 log.writeLog(
" ADAPT (Global Refine)");
5280 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5283 while(octree.globalRefine(mapidx));
5285 if (octree.getNumOctants() > nocts)
5287 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5288 nocts = octree.getNumOctants();
5293 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5296 log.writeLog(
"---------------------------------------------");
5300 log.writeLog(
"---------------------------------------------");
5301 log.writeLog(
" ADAPT (Global Refine)");
5305 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5308 while(octree.globalRefine(mapidx));
5309 if (octree.getNumOctants() > nocts)
5313 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5314 nocts = octree.getNumOctants();
5317 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5319 log.writeLog(
"---------------------------------------------");
5332 bool globalDone =
false, localDone =
false, cDone =
false;
5333 uint32_t nocts = octree.getNumOctants();
5334 vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5336 for (iter = octree.octants.begin(); iter != iterend; iter++){
5337 iter->info[12] =
false;
5338 iter->info[13] =
false;
5339 iter->info[15] =
false;
5344 log.writeLog(
"---------------------------------------------");
5345 log.writeLog(
" ADAPT (Global Coarse)");
5352 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5355 while(octree.globalCoarse());
5356 updateAfterCoarse();
5358 while(octree.refine());
5360 if (octree.getNumOctants() < nocts){
5363 nocts = octree.getNumOctants();
5365 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5368 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5371 log.writeLog(
"---------------------------------------------");
5375 log.writeLog(
"---------------------------------------------");
5376 log.writeLog(
" ADAPT (Global Coarse)");
5383 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5386 while(octree.globalCoarse());
5387 updateAfterCoarse();
5390 while(octree.refine());
5393 if (octree.getNumOctants() < nocts){
5396 nocts = octree.getNumOctants();
5400 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5401 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5403 log.writeLog(
"---------------------------------------------");
5422 bool globalDone =
false, localDone =
false;
5423 uint32_t nocts = octree.getNumOctants();
5424 vector<Class_Octant<3> >::iterator iter, iterend = octree.octants.end();
5426 for (iter = octree.octants.begin(); iter != iterend; iter++){
5427 iter->info[12] =
false;
5428 iter->info[13] =
false;
5429 iter->info[15] =
false;
5434 mapidx.resize(nocts);
5435 #if defined(__INTEL_COMPILER) || defined(__ICC)
5437 mapidx.shrink_to_fit();
5439 for (uint32_t i=0; i<nocts; i++){
5445 log.writeLog(
"---------------------------------------------");
5446 log.writeLog(
" ADAPT (Global Coarse)");
5453 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5456 while(octree.globalCoarse(mapidx));
5457 updateAfterCoarse(mapidx);
5459 while(octree.refine(mapidx));
5461 if (octree.getNumOctants() < nocts){
5464 nocts = octree.getNumOctants();
5466 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5469 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5472 log.writeLog(
"---------------------------------------------");
5476 log.writeLog(
"---------------------------------------------");
5477 log.writeLog(
" ADAPT (Global Coarse)");
5484 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5487 while(octree.globalCoarse(mapidx));
5488 updateAfterCoarse(mapidx);
5491 while(octree.refine(mapidx));
5494 if (octree.getNumOctants() < nocts){
5497 nocts = octree.getNumOctants();
5501 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5502 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5504 log.writeLog(
"---------------------------------------------");
5516 template<
class Impl>
5520 map<int,Class_Comm_Buffer> sendBuffers;
5521 size_t fixedDataSize = userData.
fixedSize();
5522 map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
5523 map<int,vector<uint32_t> >::iterator bitbegin = bordersPerProc.begin();
5524 for(map<
int,vector<uint32_t> >::iterator bit = bitbegin; bit != bitend; ++bit){
5525 const int & key = bit->first;
5526 const vector<uint32_t> & pborders = bit->second;
5527 size_t buffSize = 0;
5528 size_t nofPbordersPerProc = pborders.size();
5529 if(fixedDataSize != 0){
5530 buffSize = fixedDataSize*nofPbordersPerProc;
5533 for(
size_t i = 0; i < nofPbordersPerProc; ++i){
5534 buffSize += userData.
size(pborders[i]);
5538 buffSize +=
sizeof(int);
5542 MPI_Pack(&nofPbordersPerProc,1,MPI_INT,sendBuffers[key].commBuffer,sendBuffers[key].commBufferSize,&sendBuffers[key].pos,comm);
5545 for(
size_t j = 0; j < nofPbordersPerProc; ++j){
5546 userData.
gather(sendBuffers[key],pborders[j]);
5551 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
5552 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
5554 map<int,int> recvBufferSizePerProc;
5555 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5556 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5557 recvBufferSizePerProc[sit->first] = 0;
5558 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5561 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5562 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5563 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5566 MPI_Waitall(nReq,req,stats);
5570 map<int,Class_Comm_Buffer> recvBuffers;
5571 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5572 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5576 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5578 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5581 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5582 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5585 MPI_Waitall(nReq,req,stats);
5588 int ghostOffset = 0;
5589 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
5590 map<int,Class_Comm_Buffer>::iterator rbitbegin = recvBuffers.begin();
5591 for(map<int,Class_Comm_Buffer>::iterator rbit = rbitbegin; rbit != rbitend; ++rbit){
5592 int nofGhostFromThisProc = 0;
5593 MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofGhostFromThisProc,1,MPI_INT,comm);
5594 for(
int k = 0; k < nofGhostFromThisProc; ++k){
5595 userData.
scatter(rbit->second, k+ghostOffset);
5597 ghostOffset += nofGhostFromThisProc;
5600 delete [] req; req = NULL;
5601 delete [] stats; stats = NULL;
5611 octree.computeConnectivity();
5619 octree.clearConnectivity();
5627 octree.updateConnectivity();
5635 octree.computeGhostsConnectivity();
5643 octree.clearGhostsConnectivity();
5651 octree.updateGhostsConnectivity();
5659 return octree.nodes.size();
5668 return octree.connectivity;
5677 return octree.ghostsconnectivity;
5686 return octree.connectivity[idx];
5695 return octree.connectivity[getIdx(oct)];
5705 return octree.ghostsconnectivity[idx];
5715 return octree.ghostsconnectivity[getIdx(oct)];
5724 return octree.nodes;
5734 return octree.nodes[inode];
5743 return octree.ghostsnodes;
5753 vector<double> coords(3,0);
5754 coords[0] = trans.
mapX(octree.nodes[inode][0]);
5755 coords[1] = trans.
mapY(octree.nodes[inode][1]);
5756 coords[2] = trans.
mapZ(octree.nodes[inode][2]);
5767 return octree.ghostsnodes[inode];
5777 vector<double> coords(3,0);
5778 coords[0] = trans.
mapX(octree.ghostsnodes[inode][0]);
5779 coords[1] = trans.
mapY(octree.ghostsnodes[inode][1]);
5780 coords[2] = trans.
mapZ(octree.ghostsnodes[inode][2]);
5797 vector<pair<pair<uint32_t, uint32_t>, pair<int, int> > > mapper;
5798 uint64_t morton2 = 0, morton1 = 0, mortonlastdesc = 0, mortonfirstdesc = 0;
5799 uint32_t idx1 = 0, idx2 = 0;
5800 uint32_t nocts = octree.getNumOctants();
5801 uint32_t nocts2 = ptree.
octree.getNumOctants();
5803 mapper.resize(nocts);
5807 for (uint32_t i=0; i<nocts; i++){
5808 mapper[i].first.first = idx1;
5809 mapper[i].first.second = idx2;
5810 mapper[i].second.first = rank;
5811 mapper[i].second.second = rank;
5812 mortonfirstdesc = octree.octants[i].computeMorton();
5813 mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5814 while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5815 mapper[i].first.first = idx1;
5824 while(morton2 <= mortonlastdesc && idx2 < nocts2){
5825 mapper[i].first.second = idx2;
5838 map<int,vector<uint64_t> > FirstMortonperproc, SecondMortonperproc;
5839 map<int,vector<uint64_t> > FirstMortonReceived, SecondMortonReceived;
5840 map<int,vector<uint32_t> > FirstIndexperproc, SecondIndexperproc;
5841 map<int,vector<uint32_t> > FirstLocalIndex, SecondLocalIndex;
5846 for (uint32_t i=0; i<nocts; i++){
5847 mortonfirstdesc = octree.octants[i].computeMorton();
5848 owner = ptree.
findOwner(mortonfirstdesc);
5850 mapper[i].second.first = rank;
5851 while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5852 mapper[i].first.first = idx1;
5861 mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5862 owner = ptree.
findOwner(mortonlastdesc);
5864 mapper[i].second.second = rank;
5865 mapper[i].first.second = idx2;
5866 while(morton2 <= mortonlastdesc && idx2 < nocts2){
5867 mapper[i].first.second = idx2;
5878 mapper[i].second.second = owner;
5879 SecondMortonperproc[owner].push_back(mortonfirstdesc);
5880 SecondLocalIndex[owner].push_back(i);
5884 mapper[i].second.first = owner;
5885 FirstMortonperproc[owner].push_back(mortonfirstdesc);
5886 FirstLocalIndex[owner].push_back(i);
5887 mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5888 owner = ptree.
findOwner(mortonlastdesc);
5890 mapper[i].second.second = rank;
5891 mapper[i].first.second = idx2;
5892 while(morton2 <= mortonlastdesc && idx2 < nocts2){
5893 mapper[i].first.second = idx2;
5904 mapper[i].second.second = owner;
5905 SecondMortonperproc[owner].push_back(mortonfirstdesc);
5906 SecondLocalIndex[owner].push_back(i);
5914 for(
int iproc=0; iproc<nproc; iproc++){
5915 FirstMortonperproc[iproc].push_back(-1);
5916 SecondMortonperproc[iproc].push_back(-1);
5922 map<int,Class_Comm_Buffer> sendBuffers;
5923 map<int,vector<uint64_t> >::iterator bitend = FirstMortonperproc.end();
5924 for(map<
int,vector<uint64_t> >::iterator bit = FirstMortonperproc.begin(); bit != bitend; ++bit){
5925 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint64_t)) / (double)(CHAR_BIT/8));
5926 int key = bit->first;
5927 vector<uint64_t> & value = bit->second;
5930 int nofMortons = value.size();
5931 for(
int i = 0; i < nofMortons; ++i){
5933 uint64_t Morton = value[i];
5934 error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
5941 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
5942 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
5944 map<int,int> recvBufferSizePerProc;
5945 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5946 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5947 recvBufferSizePerProc[sit->first] = 0;
5948 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5951 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5952 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5953 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5956 MPI_Waitall(nReq,req,stats);
5962 uint32_t nofBytesOverProc = 0;
5963 map<int,Class_Comm_Buffer> recvBuffers;
5964 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5965 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5969 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5970 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5971 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5974 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5975 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5978 MPI_Waitall(nReq,req,stats);
5983 uint32_t Mortoncounter = 0;
5984 uint64_t Morton = 0;
5985 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
5986 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
5988 int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint64_t)));
5989 for(
int i = 0; i < nofMortonPerProc-1; ++i){
5990 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
5991 FirstMortonReceived[rrit->first].push_back(Morton);
5996 recvBuffers.clear();
5997 sendBuffers.clear();
5998 recvBufferSizePerProc.clear();
5999 delete [] req; req = NULL;
6000 delete [] stats; stats = NULL;
6006 map<int,Class_Comm_Buffer> sendBuffers;
6007 map<int,vector<uint64_t> >::iterator bitend = SecondMortonperproc.end();
6008 uint32_t pbordersOversize = 0;
6009 for(map<
int,vector<uint64_t> >::iterator bit = SecondMortonperproc.begin(); bit != bitend; ++bit){
6010 pbordersOversize += bit->second.size();
6011 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint64_t)) / (double)(CHAR_BIT/8));
6012 int key = bit->first;
6013 vector<uint64_t> & value = bit->second;
6016 int nofMortons = value.size();
6017 for(
int i = 0; i < nofMortons; ++i){
6019 uint64_t Morton = value[i];
6020 error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6027 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
6028 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
6030 map<int,int> recvBufferSizePerProc;
6031 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6032 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6033 recvBufferSizePerProc[sit->first] = 0;
6034 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6037 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6038 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6039 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6042 MPI_Waitall(nReq,req,stats);
6048 uint32_t nofBytesOverProc = 0;
6049 map<int,Class_Comm_Buffer> recvBuffers;
6050 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6051 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6055 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6056 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6057 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6060 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6061 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6064 MPI_Waitall(nReq,req,stats);
6069 uint32_t Mortoncounter = 0;
6070 uint64_t Morton = 0;
6071 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6072 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6074 int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint64_t)));
6075 for(
int i = 0; i < nofMortonPerProc-1; ++i){
6076 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
6077 SecondMortonReceived[rrit->first].push_back(Morton);
6081 recvBuffers.clear();
6082 sendBuffers.clear();
6083 recvBufferSizePerProc.clear();
6084 delete [] req; req = NULL;
6085 delete [] stats; stats = NULL;
6089 for (
int iproc=0; iproc<nproc; iproc++){
6090 vector<Class_Octant<3> >::iterator oend = octree.octants.end();
6091 vector<Class_Octant<3> >::iterator obegin = octree.octants.begin();
6092 vector<Class_Octant<3> >::iterator it = obegin;
6093 int nmortons = FirstMortonReceived[iproc].size();
6094 FirstIndexperproc[iproc].resize(nmortons);
6095 for (
int idx=0; idx<nmortons; idx++){
6096 FirstIndexperproc[iproc][idx] = octree.getNumOctants()-1;
6098 mortonfirstdesc = FirstMortonReceived[iproc][idx];
6099 morton1 = it->computeMorton();
6100 while(morton1 <= mortonfirstdesc && it != oend){
6102 FirstIndexperproc[iproc][idx] = idx1;
6105 morton1 = it->computeMorton();
6116 for (
int iproc=0; iproc<nproc; iproc++){
6117 vector<Class_Octant<3> >::iterator oend = octree.octants.end();
6118 vector<Class_Octant<3> >::iterator obegin = octree.octants.begin();
6119 vector<Class_Octant<3> >::iterator it = obegin;
6120 int nmortons = SecondMortonReceived[iproc].size();
6121 SecondIndexperproc[iproc].resize(nmortons);
6122 for (
int idx=0; idx<nmortons; idx++){
6123 SecondIndexperproc[iproc][idx] = octree.getNumOctants()-1;
6125 mortonlastdesc = SecondMortonReceived[iproc][idx];
6126 morton2 = it->computeMorton();
6127 while(morton2 <= mortonlastdesc && it != oend){
6128 SecondIndexperproc[iproc][idx] = idx2;
6132 morton2 = it->computeMorton();
6142 for(
int iproc=0; iproc<nproc; iproc++){
6143 FirstIndexperproc[iproc].push_back(-1);
6144 SecondIndexperproc[iproc].push_back(-1);
6150 map<int,Class_Comm_Buffer> sendBuffers;
6151 map<int,vector<uint32_t> >::iterator bitend = FirstIndexperproc.end();
6152 for(map<
int,vector<uint32_t> >::iterator bit = FirstIndexperproc.begin(); bit != bitend; ++bit){
6153 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6154 int key = bit->first;
6155 vector<uint32_t> & value = bit->second;
6158 int nofIndices = value.size();
6159 for(
int i = 0; i < nofIndices; ++i){
6161 uint32_t Index = value[i];
6162 error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6169 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
6170 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
6172 map<int,int> recvBufferSizePerProc;
6173 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6174 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6175 recvBufferSizePerProc[sit->first] = 0;
6176 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6179 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6180 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6181 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6184 MPI_Waitall(nReq,req,stats);
6190 uint32_t nofBytesOverProc = 0;
6191 map<int,Class_Comm_Buffer> recvBuffers;
6192 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6193 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6197 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6198 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6199 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6202 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6203 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6206 MPI_Waitall(nReq,req,stats);
6211 uint32_t Indexcounter = 0;
6213 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6214 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6216 int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint32_t)));
6217 for(
int i = 0; i < nofIndexPerProc-1; ++i){
6218 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6219 mapper[FirstLocalIndex[rrit->first][i]].first.first = Index;
6224 recvBuffers.clear();
6225 sendBuffers.clear();
6226 recvBufferSizePerProc.clear();
6227 delete [] req; req = NULL;
6228 delete [] stats; stats = NULL;
6233 map<int,Class_Comm_Buffer> sendBuffers;
6234 map<int,vector<uint32_t> >::iterator bitend = SecondIndexperproc.end();
6235 uint32_t pbordersOversize = 0;
6236 for(map<
int,vector<uint32_t> >::iterator bit = SecondIndexperproc.begin(); bit != bitend; ++bit){
6237 pbordersOversize += bit->second.size();
6238 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6239 int key = bit->first;
6240 vector<uint32_t> & value = bit->second;
6243 int nofIndices = value.size();
6244 for(
int i = 0; i < nofIndices; ++i){
6246 uint64_t Index = value[i];
6247 error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6254 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
6255 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
6257 map<int,int> recvBufferSizePerProc;
6258 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6259 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6260 recvBufferSizePerProc[sit->first] = 0;
6261 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6264 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6265 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6266 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6269 MPI_Waitall(nReq,req,stats);
6275 uint32_t nofBytesOverProc = 0;
6276 map<int,Class_Comm_Buffer> recvBuffers;
6277 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6278 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6282 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6283 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6284 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6287 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6288 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6291 MPI_Waitall(nReq,req,stats);
6296 uint32_t Indexcounter = 0;
6298 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6299 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6301 int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint32_t)));
6302 for(
int i = 0; i < nofIndexPerProc-1; ++i){
6303 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6304 mapper[SecondLocalIndex[rrit->first][i]].first.second = Index;
6308 recvBuffers.clear();
6309 sendBuffers.clear();
6310 recvBufferSizePerProc.clear();
6311 delete [] req; req = NULL;
6312 delete [] stats; stats = NULL;
6330 if (octree.connectivity.size() == 0) {
6331 octree.computeConnectivity();
6336 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6338 ofstream out(name.str().c_str());
6341 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6342 log.writeLog(ss.str());
6345 int nofNodes = octree.nodes.size();
6346 int nofGhostNodes = octree.ghostsnodes.size();
6347 int nofOctants = octree.connectivity.size();
6348 int nofGhosts = octree.ghostsconnectivity.size();
6349 int nofAll = nofGhosts + nofOctants;
6350 out <<
"<?xml version=\"1.0\"?>" << endl
6351 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6352 <<
" <UnstructuredGrid>" << endl
6353 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() <<
"\">" << endl;
6354 out <<
" <Points>" << endl
6355 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6356 <<
" " << std::fixed;
6357 for(
int i = 0; i < nofNodes; i++)
6359 for(
int j = 0; j < 3; ++j)
6360 out << std::setprecision(6) << octree.nodes[i][j] <<
" ";
6361 if((i+1)%4==0 && i!=nofNodes-1)
6364 for(
int i = 0; i < nofGhostNodes; i++)
6366 for(
int j = 0; j < 3; ++j)
6367 out << std::setprecision(6) << octree.ghostsnodes[i][j] <<
" ";
6368 if((i+1)%4==0 && i!=nofNodes-1)
6371 out << endl <<
" </DataArray>" << endl
6372 <<
" </Points>" << endl
6373 <<
" <Cells>" << endl
6374 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6376 for(
int i = 0; i < nofOctants; i++)
6378 for(
int j = 0; j < global3D.
nnodes; j++)
6380 out << octree.connectivity[i][j] <<
" ";
6382 if((i+1)%3==0 && i!=nofOctants-1)
6385 for(
int i = 0; i < nofGhosts; i++)
6387 for(
int j = 0; j < global3D.
nnodes; j++)
6389 out << octree.ghostsconnectivity[i][j] + nofNodes <<
" ";
6391 if((i+1)%3==0 && i!=nofGhosts-1)
6394 out << endl <<
" </DataArray>" << endl
6395 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6397 for(
int i = 0; i < nofAll; i++)
6399 out << (i+1)*global3D.
nnodes <<
" ";
6400 if((i+1)%12==0 && i!=nofAll-1)
6403 out << endl <<
" </DataArray>" << endl
6404 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6406 for(
int i = 0; i < nofAll; i++)
6411 if((i+1)%12==0 && i!=nofAll-1)
6414 out << endl <<
" </DataArray>" << endl
6415 <<
" </Cells>" << endl
6416 <<
" </Piece>" << endl
6417 <<
" </UnstructuredGrid>" << endl
6418 <<
"</VTKFile>" << endl;
6422 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6423 ofstream pout(name.str().c_str());
6424 if(!pout.is_open()){
6426 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6427 log.writeLog(ss.str());
6431 pout <<
"<?xml version=\"1.0\"?>" << endl
6432 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6433 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6434 <<
" <PPointData>" << endl
6435 <<
" </PPointData>" << endl
6436 <<
" <PCellData Scalars=\"\">" << endl;
6437 pout <<
" </PCellData>" << endl
6438 <<
" <PPoints>" << endl
6439 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6440 <<
" </PPoints>" << endl;
6441 for(
int i = 0; i < nproc; i++)
6442 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6443 pout <<
" </PUnstructuredGrid>" << endl
6453 octree.clearConnectivity();
6467 if (octree.connectivity.size() == 0) {
6468 octree.computeConnectivity();
6473 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6475 ofstream out(name.str().c_str());
6478 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6479 log.writeLog(ss.str());
6482 int nofNodes = octree.nodes.size();
6483 int nofGhostNodes = octree.ghostsnodes.size();
6484 int nofOctants = octree.connectivity.size();
6485 int nofGhosts = octree.ghostsconnectivity.size();
6486 int nofAll = nofGhosts + nofOctants;
6487 out <<
"<?xml version=\"1.0\"?>" << endl
6488 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6489 <<
" <UnstructuredGrid>" << endl
6490 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() <<
"\">" << endl;
6491 out <<
" <Points>" << endl
6492 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6493 <<
" " << std::fixed;
6494 for(
int i = 0; i < nofNodes; i++)
6496 for(
int j = 0; j < 3; ++j){
6497 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.nodes[i][j]) <<
" ";
6498 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.nodes[i][j]) <<
" ";
6499 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.nodes[i][j]) <<
" ";
6501 if((i+1)%4==0 && i!=nofNodes-1)
6504 for(
int i = 0; i < nofGhostNodes; i++)
6506 for(
int j = 0; j < 3; ++j){
6507 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.ghostsnodes[i][j]) <<
" ";
6508 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.ghostsnodes[i][j]) <<
" ";
6509 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.ghostsnodes[i][j]) <<
" ";
6511 if((i+1)%4==0 && i!=nofNodes-1)
6514 out << endl <<
" </DataArray>" << endl
6515 <<
" </Points>" << endl
6516 <<
" <Cells>" << endl
6517 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6519 for(
int i = 0; i < nofOctants; i++)
6521 for(
int j = 0; j < global3D.
nnodes; j++)
6523 out << octree.connectivity[i][j] <<
" ";
6525 if((i+1)%3==0 && i!=nofOctants-1)
6528 for(
int i = 0; i < nofGhosts; i++)
6530 for(
int j = 0; j < global3D.
nnodes; j++)
6532 out << octree.ghostsconnectivity[i][j] + nofNodes <<
" ";
6534 if((i+1)%3==0 && i!=nofGhosts-1)
6537 out << endl <<
" </DataArray>" << endl
6538 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6540 for(
int i = 0; i < nofAll; i++)
6542 out << (i+1)*global3D.
nnodes <<
" ";
6543 if((i+1)%12==0 && i!=nofAll-1)
6546 out << endl <<
" </DataArray>" << endl
6547 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6549 for(
int i = 0; i < nofAll; i++)
6554 if((i+1)%12==0 && i!=nofAll-1)
6557 out << endl <<
" </DataArray>" << endl
6558 <<
" </Cells>" << endl
6559 <<
" </Piece>" << endl
6560 <<
" </UnstructuredGrid>" << endl
6561 <<
"</VTKFile>" << endl;
6566 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6567 ofstream pout(name.str().c_str());
6568 if(!pout.is_open()){
6570 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6571 log.writeLog(ss.str());
6575 pout <<
"<?xml version=\"1.0\"?>" << endl
6576 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6577 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6578 <<
" <PPointData>" << endl
6579 <<
" </PPointData>" << endl
6580 <<
" <PCellData Scalars=\"\">" << endl;
6581 pout <<
" </PCellData>" << endl
6582 <<
" <PPoints>" << endl
6583 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6584 <<
" </PPoints>" << endl;
6585 for(
int i = 0; i < nproc; i++)
6586 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6587 pout <<
" </PUnstructuredGrid>" << endl
6597 octree.clearConnectivity();
6612 if (octree.connectivity.size() == 0) {
6613 octree.computeConnectivity();
6618 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6620 ofstream out(name.str().c_str());
6623 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6624 log.writeLog(ss.str());
6627 int nofNodes = octree.nodes.size();
6628 int nofOctants = octree.connectivity.size();
6629 int nofAll = nofOctants;
6630 out <<
"<?xml version=\"1.0\"?>" << endl
6631 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6632 <<
" <UnstructuredGrid>" << endl
6633 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() <<
"\">" << endl;
6634 out <<
" <CellData Scalars=\"Data\">" << endl;
6635 out <<
" <DataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6636 <<
" " << std::fixed;
6637 int ndata = octree.connectivity.size();
6638 for(
int i = 0; i < ndata; i++)
6640 out << std::setprecision(6) << data[i] <<
" ";
6641 if((i+1)%4==0 && i!=ndata-1)
6644 out << endl <<
" </DataArray>" << endl
6645 <<
" </CellData>" << endl
6646 <<
" <Points>" << endl
6647 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6648 <<
" " << std::fixed;
6649 for(
int i = 0; i < nofNodes; i++)
6651 for(
int j = 0; j < 3; ++j){
6652 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.nodes[i][j]) <<
" ";
6653 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.nodes[i][j]) <<
" ";
6654 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.nodes[i][j]) <<
" ";
6656 if((i+1)%4==0 && i!=nofNodes-1)
6659 out << endl <<
" </DataArray>" << endl
6660 <<
" </Points>" << endl
6661 <<
" <Cells>" << endl
6662 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6664 for(
int i = 0; i < nofOctants; i++)
6666 for(
int j = 0; j < global3D.
nnodes; j++)
6668 out << octree.connectivity[i][j] <<
" ";
6670 if((i+1)%3==0 && i!=nofOctants-1)
6673 out << endl <<
" </DataArray>" << endl
6674 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6676 for(
int i = 0; i < nofAll; i++)
6678 out << (i+1)*global3D.
nnodes <<
" ";
6679 if((i+1)%12==0 && i!=nofAll-1)
6682 out << endl <<
" </DataArray>" << endl
6683 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6685 for(
int i = 0; i < nofAll; i++)
6690 if((i+1)%12==0 && i!=nofAll-1)
6693 out << endl <<
" </DataArray>" << endl
6694 <<
" </Cells>" << endl
6695 <<
" </Piece>" << endl
6696 <<
" </UnstructuredGrid>" << endl
6697 <<
"</VTKFile>" << endl;
6702 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6703 ofstream pout(name.str().c_str());
6704 if(!pout.is_open()){
6706 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6707 log.writeLog(ss.str());
6711 pout <<
"<?xml version=\"1.0\"?>" << endl
6712 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6713 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6714 <<
" <PPointData>" << endl
6715 <<
" </PPointData>" << endl
6716 <<
" <PCellData Scalars=\"Data\">" << endl
6717 <<
" <PDataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\"/>" << endl
6718 <<
" </PCellData>" << endl
6719 <<
" <PPoints>" << endl
6720 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6721 <<
" </PPoints>" << endl;
6722 for(
int i = 0; i < nproc; i++)
6723 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6724 pout <<
" </PUnstructuredGrid>" << endl
6734 octree.clearConnectivity();
6745 void writeTest(
string filename, vector<double> data, vector<double> ghostdata) {
6748 if (octree.connectivity.size() == 0) {
6749 octree.computeConnectivity();
6750 octree.computeGhostsConnectivity();
6755 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6757 ofstream out(name.str().c_str());
6760 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6761 log.writeLog(ss.str());
6764 int nofNodes = octree.nodes.size();
6765 int nofOctants = octree.connectivity.size();
6766 int nofGhostNodes = octree.ghostsnodes.size();
6767 int nofGhostOctants = octree.ghostsconnectivity.size();
6768 int nofAll = nofOctants + nofGhostOctants;
6769 out <<
"<?xml version=\"1.0\"?>" << endl
6770 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6771 <<
" <UnstructuredGrid>" << endl
6772 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() <<
"\">" << endl;
6773 out <<
" <CellData Scalars=\"Data\">" << endl;
6774 out <<
" <DataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6775 <<
" " << std::fixed;
6776 int ndata = octree.connectivity.size();
6777 for(
int i = 0; i < ndata; i++)
6779 out << std::setprecision(6) << data[i] <<
" ";
6780 if((i+1)%4==0 && i!=ndata-1)
6783 int nghostdata = octree.ghostsconnectivity.size();
6784 for(
int i = 0; i < nghostdata; i++)
6786 out << std::setprecision(6) << ghostdata[i] <<
" ";
6787 if((i+1)%4==0 && i!=nghostdata-1)
6790 out << endl <<
" </DataArray>" << endl
6791 <<
" </CellData>" << endl
6792 <<
" <Points>" << endl
6793 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6794 <<
" " << std::fixed;
6795 for(
int i = 0; i < nofNodes; i++)
6797 for(
int j = 0; j < 3; ++j){
6798 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.nodes[i][j]) <<
" ";
6799 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.nodes[i][j]) <<
" ";
6800 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.nodes[i][j]) <<
" ";
6802 if((i+1)%4==0 && i!=nofNodes-1)
6805 for(
int i = 0; i < nofGhostNodes; i++)
6807 for(
int j = 0; j < 3; ++j){
6808 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.ghostsnodes[i][j]) <<
" ";
6809 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.ghostsnodes[i][j]) <<
" ";
6810 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.ghostsnodes[i][j]) <<
" ";
6812 if((i+1)%4==0 && i!=nofGhostNodes-1)
6815 out << endl <<
" </DataArray>" << endl
6816 <<
" </Points>" << endl
6817 <<
" <Cells>" << endl
6818 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6820 for(
int i = 0; i < nofOctants; i++)
6822 for(
int j = 0; j < global3D.
nnodes; j++)
6824 out << octree.connectivity[i][j] <<
" ";
6826 if((i+1)%3==0 && i!=nofOctants-1)
6829 for(
int i = 0; i < nofGhostOctants; i++)
6831 for(
int j = 0; j < global3D.
nnodes; j++)
6833 out << octree.ghostsconnectivity[i][j] + nofNodes <<
" ";
6835 if((i+1)%3==0 && i!=nofGhostOctants-1)
6838 out << endl <<
" </DataArray>" << endl
6839 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6841 for(
int i = 0; i < nofAll; i++)
6843 out << (i+1)*global3D.
nnodes <<
" ";
6844 if((i+1)%12==0 && i!=nofAll-1)
6847 out << endl <<
" </DataArray>" << endl
6848 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6850 for(
int i = 0; i < nofAll; i++)
6855 if((i+1)%12==0 && i!=nofAll-1)
6858 out << endl <<
" </DataArray>" << endl
6859 <<
" </Cells>" << endl
6860 <<
" </Piece>" << endl
6861 <<
" </UnstructuredGrid>" << endl
6862 <<
"</VTKFile>" << endl;
6867 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6868 ofstream pout(name.str().c_str());
6869 if(!pout.is_open()){
6871 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6872 log.writeLog(ss.str());
6876 pout <<
"<?xml version=\"1.0\"?>" << endl
6877 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6878 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6879 <<
" <PPointData>" << endl
6880 <<
" </PPointData>" << endl
6881 <<
" <PCellData Scalars=\"Data\">" << endl
6882 <<
" <PDataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\"/>" << endl
6883 <<
" </PCellData>" << endl
6884 <<
" <PPoints>" << endl
6885 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6886 <<
" </PPoints>" << endl;
6887 for(
int i = 0; i < nproc; i++)
6888 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6889 pout <<
" </PUnstructuredGrid>" << endl
6899 octree.clearConnectivity();
6900 octree.clearGhostsConnectivity();
uint64_t computeMorton() const
void getFaceCenter(uint32_t idx, uint8_t iface, vector< double > ¢er)
Class_Para_Tree(double &X, double &Y, double &Z, double &L, ivector2D &XYZ, ivector &levels, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
Class_Octant< 3 > * getOctant(uint32_t idx)
const u32vector2D & getNodes()
void setBalance(uint32_t idx, bool balance)
bool getBalance(uint32_t idx)
dvector getEdgeCenter(uint8_t iedge)
Bundle char container for communications.
uint64_t * partition_first_desc
void getFaceCenter(Class_Octant< 3 > *oct, uint8_t iface, vector< double > ¢er)
vector< double > getEdgeCenter(uint32_t idx, uint8_t iedge)
Parallel Octree Manager Class.
u32vector getOwners(Class_Intersection< 3 > *inter)
void findNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
Class_Local_Tree< 3 > octree
Parallel Octree Manager Class - 3D specialization.
void findNeighbours(Class_Octant< 3 > *oct, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
uint64_t * partition_last_desc
void assign(uint32_t stride, uint32_t length)
Base class for data communications.
u32vector getOctantConnectivity(uint32_t idx)
void write(string filename)
double getZ(Class_Octant< 3 > *const oct)
uint8_t getBalanceCodimension() const
bool getPbound(uint8_t face) const
void findGhostNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours)
int8_t getMarker(uint32_t idx)
void getNormal(Class_Octant< 3 > *oct, uint8_t &iface, dvector &normal)
double getSize(Class_Octant< 3 > *const oct)
double getSize(uint32_t idx)
uint32_t getPointOwnerIdx(u32vector &point)
uint8_t getLevel(Class_Octant< 3 > *oct)
void setBalanceCodimension(uint8_t b21codim)
Class_Octant< 3 > * getPointOwner(dvector &point)
void getNormal(uint8_t &iface, vector< int8_t > &normal)
bool getBound(Class_Octant< 3 > *oct)
dvector getNormal(uint32_t idx, uint8_t &iface)
void getCenter(Class_Octant< 3 > *oct, dvector ¢er)
bool getIsNewC(Class_Octant< 3 > *oct)
vector< double > getFaceCenter(Class_Octant< 3 > *oct, uint8_t iface)
void getNodes(Class_Octant< 3 > *oct, dvector2D &nodes)
double getVolume(uint32_t idx)
bool adaptGlobalCoarse(u32vector &mapidx)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, uint8_t &level)
uint8_t getLocalMaxDepth() const
void loadBalance(uint8_t &level)
void resizeGhost(uint32_t newSize)
dvector getNodeCoordinates(uint32_t inode)
void setMarker(uint32_t idx, int8_t marker)
u32vector getOctantConnectivity(Class_Octant< 3 > *oct)
bool getNotBalance() const
bool adapt(bool mapper_flag)
double getX(Class_Octant< 3 > *const oct)
bool getPbound(Class_Octant< 3 > *oct)
vector< double > getNode(uint32_t idx, uint8_t inode)
void computeIntersections()
double mapSize(uint32_t const &size)
dvector getNormal(Class_Intersection< 3 > *inter)
dvector2D getNodes(uint32_t idx)
uint32_t getNumOctants() const
uint32_t getLogicalPointOwnerIdx(dvector &point)
void scatter(Buffer &buff, const uint32_t e)
uint8_t getLevel(uint32_t idx)
double mapArea(uint64_t const &area)
bool getPbound(Class_Intersection< 3 > *inter)
void getNode(uint32_t idx, uint8_t inode, vector< double > &node)
bool adaptGlobalRefine(u32vector &mapidx)
void writeTest(string filename, vector< double > data, vector< double > ghostdata)
double getArea(uint32_t idx)
void writeLogical(string filename)
u32vector getGhostNodeLogicalCoordinates(uint32_t inode)
Class_Para_Tree(string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
map< int, vector< uint32_t > > bordersPerProc
bool getIsGhost(uint32_t idx)
double getY(uint32_t idx)
void computeGhostsConnectivity()
void communicate(Class_Data_Comm_Interface< Impl > &userData)
int8_t getMarker(Class_Octant< 3 > *oct)
bool getBound(Class_Intersection< 3 > *inter)
Base class for data communications.
void setMarker(Class_Octant< 3 > *oct, int8_t marker)
void getMapping(uint32_t &idx, u32vector &mapper, vector< bool > &isghost)
void mapNodesIntersection(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
uint64_t getGlobalIdx(uint32_t idx)
Customized array definition.
Local octree portion for each process - 3D specialization.
bool getPbound(Class_Octant< 3 > *oct, uint8_t iface)
uint64_t getGhostGlobalIdx(uint32_t idx)
uint8_t getFace(Class_Intersection< 3 > *inter)
double getSize(Class_Intersection< 3 > *inter)
size_t size(const uint32_t e) const
dvector2D getNodes(Class_Octant< 3 > *oct)
void getEdgeCenter(uint32_t idx, uint8_t iedge, vector< double > ¢er)
void getCenter(uint32_t idx, vector< double > ¢er)
const u32vector2D & getGhostNodes()
bool getIsGhost(Class_Octant< 3 > *oct)
Octant class definition - 3D specialization.
Intersection class definition - 3D specialization.
void scatter(Buffer &buff, const uint32_t e)
void mapCenter(double *¢er, vector< double > &mapcenter)
void mapNodes(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
uint8_t getLevel(Class_Intersection< 3 > *inter)
uint64_t getGlobalIdx(Class_Octant< 3 > *oct)
dvector2D getNodes(Class_Intersection< 3 > *inter)
void move(const uint32_t from, const uint32_t to)
Class_Octant< 3 > * getGhostOctant(uint32_t idx)
void setBalance(bool balance)
bool getIsGhost(Class_Intersection< 3 > *inter)
void mapNode(vector< uint32_t > &node, vector< double > &mapnode)
uint32_t getIdx(Class_Octant< 3 > *oct)
uint64_t getVolume() const
uint32_t getPointOwnerIdx(dvector &point)
vector< double > getCenter(uint32_t idx)
void computeConnectivity()
double getY(Class_Octant< 3 > *const oct)
vector< double > getFaceCenter(uint32_t idx, uint8_t iface)
void gather(Buffer &buff, const uint32_t e)
bool adapt(u32vector &mapper)
double getVolume(Class_Octant< 3 > *const oct)
Class_Para_Tree(double X, double Y, double Z, double L, ivector2D &XYZ, ivector &levels, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
int findOwner(const uint64_t &morton)
double getArea(Class_Intersection< 3 > *inter)
void getNodes(uint32_t idx, dvector2D &nodes)
uint64_t global_num_octants
vector< double > getCenter(Class_Intersection< 3 > *inter)
dvector getFaceCenter(uint8_t iface)
const u32vector2D & getConnectivity()
void resize(uint32_t newSize)
double getZ(uint32_t idx)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, dvector *weight=NULL)
void writeTest(string filename, vector< double > data)
Local octree portion for each process.
Class_Octant< 3 > * getPointOwner(u32vector &point)
bool getBound(uint8_t face) const
void gather(Buffer &buff, const uint32_t e)
dvector getGhostNodeCoordinates(uint32_t inode)
u32vector getGhostOctantConnectivity(uint32_t idx)
void getEdgeCenter(Class_Octant< 3 > *oct, uint8_t iedge, vector< double > ¢er)
Global variables used in PABLO - 3D specialization.
bool getBalance(Class_Octant< 3 > *oct)
void updateGhostsConnectivity()
bool getBound(Class_Octant< 3 > *oct, uint8_t iface)
Class_Para_Tree(double X, double Y, double Z, double L, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
Class_Intersection< 3 > * getIntersection(uint32_t idx)
size_t size(const uint32_t e) const
void setMarker(int8_t marker)
u32vector getGhostOctantConnectivity(Class_Octant< 3 > *oct)
const u32vector2D & getGhostConnectivity()
void clearGhostsConnectivity()
vector< double > getEdgeCenter(Class_Octant< 3 > *oct, uint8_t iedge)
uint32_t getNumIntersections()
vector< double > getCenter(Class_Octant< 3 > *oct)
uint64_t * partition_range_globalidx
vector< pair< pair< uint32_t, uint32_t >, pair< int, int > > > mapPablos(Class_Para_Tree< 3 > &ptree)
u32vector getNodeLogicalCoordinates(uint32_t inode)
double getX(uint32_t idx)
void getNodes(u32vector2D &nodes)
void mapNormals(vector< int8_t > normal, vector< double > &mapnormal)
bool getFiner(Class_Intersection< 3 > *inter)
Class_Octant< 3 > * getLogicalPointOwner(dvector &point)
double getArea(Class_Octant< 3 > *const oct)
void updateConnectivity()
bool getIsNewR(Class_Octant< 3 > *oct)
void getNormal(uint32_t idx, uint8_t &iface, dvector &normal)
const Class_Global< 3 > & getGlobal()
void setBalance(Class_Octant< 3 > *oct, bool balance)
double mapX(uint32_t const &X)
bool getIsNewC(uint32_t idx)
uint32_t getNumGhosts() const
double mapZ(uint32_t const &Z)
dvector getNormal(Class_Octant< 3 > *oct, uint8_t &iface)
double mapVolume(uint64_t const &volume)
bool getIsNewR(uint32_t idx)
double mapY(uint32_t const &Y)