32 typedef vector<Class_Octant<2> > OctantsType;
33 typedef vector<uint32_t> u32vector;
34 typedef vector<double> dvector;
35 typedef vector<vector<uint32_t> > u32vector2D;
36 typedef vector<vector<uint64_t> > u64vector2D;
37 typedef vector<vector<double> > dvector2D;
38 typedef vector<int> ivector;
39 typedef vector<vector<int> > ivector2D;
90 Class_Para_Tree(
string logfile=
"PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD) : log(logfile,comm_),comm(comm_){
97 global_num_octants = octree.getNumOctants();
99 error_flag = MPI_Comm_size(comm,&nproc);
100 error_flag = MPI_Comm_rank(comm,&rank);
105 partition_first_desc =
new uint64_t[nproc];
106 partition_last_desc =
new uint64_t[nproc];
107 partition_range_globalidx =
new uint64_t[nproc];
108 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
109 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
110 for(
int p = 0; p < nproc; ++p){
111 partition_range_globalidx[p] = 0;
112 partition_last_desc[p] = lastDescMorton;
113 partition_last_desc[p] = firstDescMorton;
116 log.writeLog(
"---------------------------------------------");
117 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
118 log.writeLog(
"---------------------------------------------");
120 log.writeLog(
"---------------------------------------------");
121 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
122 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(2)));
123 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_2D)));
124 log.writeLog(
"---------------------------------------------");
142 Class_Para_Tree(
double X,
double Y,
double Z,
double L,
string logfile=
"PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
144 Class_Para_Tree(
double X,
double Y,
double Z,
double L,
string logfile=
"PABLO.log"):trans(X,Y,Z,L),log(logfile){
149 global_num_octants = octree.getNumOctants();
151 error_flag = MPI_Comm_size(comm,&nproc);
152 error_flag = MPI_Comm_rank(comm,&rank);
157 partition_first_desc =
new uint64_t[nproc];
158 partition_last_desc =
new uint64_t[nproc];
159 partition_range_globalidx =
new uint64_t[nproc];
160 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
161 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
162 for(
int p = 0; p < nproc; ++p){
163 partition_range_globalidx[p] = 0;
164 partition_last_desc[p] = lastDescMorton;
165 partition_last_desc[p] = firstDescMorton;
168 log.writeLog(
"---------------------------------------------");
169 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
170 log.writeLog(
"---------------------------------------------");
172 log.writeLog(
"---------------------------------------------");
173 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
174 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(2)));
175 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_2D)));
176 log.writeLog(
" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
177 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Y)));
178 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Z)));
179 log.writeLog(
" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
180 log.writeLog(
"---------------------------------------------");
200 Class_Para_Tree(
double X,
double Y,
double Z,
double L, ivector2D & XY, ivector & levels,
string logfile=
"PABLO.log", MPI_Comm comm_ = MPI_COMM_WORLD):trans(X,Y,Z,L),log(logfile,comm_),comm(comm_){
202 Class_Para_Tree(
double X,
double Y,
double Z,
double L, ivector2D & XY, ivector & levels,
string logfile=
"PABLO.log"):trans(X,Y,Z,L),log(logfile){
206 uint32_t NumOctants = XY.size();
207 octree.octants.resize(NumOctants);
208 for (uint32_t i=0; i<NumOctants; i++){
209 lev = uint8_t(levels[i]);
210 x0 = uint32_t(XY[i][0]);
211 y0 = uint32_t(XY[i][1]);
230 octree.octants[i] = oct;
235 error_flag = MPI_Comm_size(comm,&nproc);
236 error_flag = MPI_Comm_rank(comm,&rank);
238 if (nproc > 1 ) serial =
false;
244 partition_first_desc =
new uint64_t[nproc];
245 partition_last_desc =
new uint64_t[nproc];
246 partition_range_globalidx =
new uint64_t[nproc];
250 octree.updateLocalMaxDepth();
256 log.writeLog(
"---------------------------------------------");
257 log.writeLog(
"- PABLO PArallel Balanced Linear Octree -");
258 log.writeLog(
"---------------------------------------------");
260 log.writeLog(
"---------------------------------------------");
261 log.writeLog(
"- PABLO restart -");
262 log.writeLog(
"---------------------------------------------");
263 log.writeLog(
" Number of proc : " + to_string(static_cast<unsigned long long>(nproc)));
264 log.writeLog(
" Dimension : " + to_string(static_cast<unsigned long long>(2)));
265 log.writeLog(
" Max allowed level : " + to_string(static_cast<unsigned long long>(MAX_LEVEL_2D)));
266 log.writeLog(
" Domain Origin : " + to_string(static_cast<unsigned long long>(X)));
267 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Y)));
268 log.writeLog(
" " + to_string(static_cast<unsigned long long>(Z)));
269 log.writeLog(
" Domain Size : " + to_string(static_cast<unsigned long long>(L)));
270 log.writeLog(
" Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
271 log.writeLog(
"---------------------------------------------");
281 log.writeLog(
"---------------------------------------------");
282 log.writeLog(
"--------------- R.I.P. PABLO ----------------");
283 log.writeLog(
"---------------------------------------------");
284 log.writeLog(
"---------------------------------------------");
351 vector<double>& center) {
352 vector<double> center_ = oct->
getCenter();
361 vector<double> center;
362 vector<double> center_ = oct->
getCenter();
373 vector<double> center;
420 vector<int8_t> normal_;
433 vector<int8_t> normal_;
479 for(
int i = 0; i < global2D.
nfaces; ++i)
490 for(
int i = 0; i < global2D.
nfaces; ++i)
537 if (getIsGhost(oct)){
539 return octree.globalidx_ghosts[idx];
544 return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
546 return uint64_t(idx);
555 if (getIsGhost(oct)){
636 vector<double>& center) {
637 vector<double> center_ = oct.
getCenter();
646 vector<double> center;
647 vector<double> center_ = oct.
getCenter();
683 vector<int8_t> normal_;
696 vector<int8_t> normal_;
742 if (getIsGhost(oct)){
744 return octree.globalidx_ghosts[idx];
750 return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
753 return uint64_t(idx);
758 return global_num_octants;
767 if (getIsGhost(oct)){
776 return octree.getNumOctants();
804 return trans.
mapX(octree.octants[idx].getX());
812 return trans.
mapY(octree.octants[idx].getY());
820 return trans.
mapZ(octree.octants[idx].getZ());
828 return trans.
mapSize(octree.octants[idx].getSize());
836 return trans.
mapSize(octree.octants[idx].getArea());
844 return trans.
mapArea(octree.octants[idx].getVolume());
852 vector<double>& center) {
853 vector<double> center_ = octree.octants[idx].getCenter();
862 vector<double> center;
863 vector<double> center_ = octree.octants[idx].getCenter();
874 vector<double> center;
875 vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
886 vector<double> center_ = octree.octants[idx].getFaceCenter(iface);
895 vector<double>
getNode(uint32_t idx, uint8_t inode) {
897 u32vector node_ = octree.octants[idx].getNode(inode);
907 void getNode(uint32_t idx, uint8_t inode, vector<double>& node) {
908 u32vector node_ = octree.octants[idx].getNode(inode);
919 octree.octants[idx].getNodes(nodes_);
930 octree.octants[idx].getNodes(nodes_);
943 vector<int8_t> normal_;
944 octree.octants[idx].getNormal(iface, normal_);
956 vector<int8_t> normal_;
957 octree.octants[idx].getNormal(iface, normal_);
967 return octree.getMarker(idx);
975 return octree.getLevel(idx);
983 return !octree.getBalance(idx);
992 return (findOwner(octree.octants[idx].computeMorton()) != rank);
1001 return octree.octants[idx].getIsNewR();
1009 return octree.octants[idx].getIsNewC();
1018 return partition_range_globalidx[rank-1] + uint64_t(idx + 1);
1021 return uint64_t(idx);
1023 return global_num_octants;
1031 if (idx<octree.size_ghosts){
1032 return octree.globalidx_ghosts[idx];
1034 return uint64_t(octree.size_ghosts);
1042 octree.setMarker(idx, marker);
1050 octree.setBalance(idx, !balance);
1067 return octree.getNumOctants();
1074 return octree.getSizeGhost();
1081 return octree.getLocalMaxDepth();
1088 return octree.getBalanceCodim();
1095 octree.setBalanceCodim(b21codim);
1102 return octree.getFirstDesc();
1106 return octree.getLastDesc();
1109 uint64_t getLastDescMorton(uint32_t idx) {
1110 return octree.octants[idx].buildLastDesc().computeMorton();
1116 void setFirstDesc(){
1117 octree.setFirstDesc();
1121 octree.setLastDesc();
1125 return octree.extractOctant(idx) ;
1137 if (idx < octree.getNumOctants()){
1138 return &octree.octants[idx] ;
1148 if (idx < octree.getSizeGhost()){
1149 return &octree.ghosts[idx] ;
1166 u32vector & neighbours,
1167 vector<bool> & isghost){
1170 octree.findNeighbours(idx, iface, neighbours, isghost);
1172 else if (codim == 2){
1173 octree.findNodeNeighbours(idx, iface, neighbours, isghost);
1193 u32vector & neighbours,
1194 vector<bool> & isghost){
1197 octree.findNeighbours(oct, iface, neighbours, isghost);
1199 else if (codim == 2){
1200 octree.findNodeNeighbours(oct, iface, neighbours, isghost);
1220 u32vector & neighbours){
1223 octree.findGhostNeighbours(idx, iface, neighbours);
1225 else if (codim == 2){
1226 octree.findGhostNodeNeighbours(idx, iface, neighbours);
1249 if(morton <= partition_last_desc[seed]){
1251 if(morton > partition_last_desc[seed-1])
1256 if(morton <= partition_last_desc[seed+1])
1260 seed = beg + length/2;
1279 u32vector & neighbours,
1280 vector<bool> & isghost){
1283 octree.findNeighbours(&oct, iface, neighbours, isghost);
1285 else if (codim == 2){
1286 octree.findNodeNeighbours(&oct, iface, neighbours, isghost);
1302 return octree.intersections.size();
1310 if (idx < octree.intersections.size()){
1311 return &octree.intersections[idx];
1321 if(inter->finer && inter->isghost)
1322 return octree.extractGhostOctant(inter->owners[inter->finer]).
getLevel();
1324 return octree.extractOctant(inter->owners[inter->finer]).
getLevel();
1332 return inter->finer;
1340 return inter->getBound();
1348 return inter->getIsGhost();
1356 return inter->getPbound();
1364 return inter->iface;
1372 u32vector owners(2);
1373 owners[0] = inter->owners[0];
1374 owners[1] = inter->owners[1];
1384 if(inter->finer && inter->isghost)
1385 Size = octree.extractGhostOctant(inter->owners[inter->finer]).
getSize();
1387 Size = octree.extractOctant(inter->owners[inter->finer]).
getSize();
1397 if(inter->finer && inter->isghost)
1398 Area = octree.extractGhostOctant(inter->owners[1]).
getArea();
1400 Area = octree.extractOctant(inter->owners[inter->finer]).
getArea();
1409 vector<double> center;
1411 if(inter->finer && inter->isghost)
1412 oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1414 oct = octree.extractOctant(inter->owners[inter->finer]);
1415 vector<double> center_ = oct.
getCenter();
1416 int sign = ( int(2*((inter->iface)%2)) - 1);
1417 double deplace = double (sign *
int(oct.
getSize())) / 2;
1418 center_[inter->iface/2] = uint32_t(
int(center_[inter->iface/2]) + deplace);
1430 if(inter->finer && inter->isghost)
1431 oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1433 oct = octree.extractOctant(inter->owners[inter->finer]);
1434 uint8_t iface = inter->iface;
1435 u32vector2D nodes_all;
1439 for (
int j=0; j<3; j++){
1440 nodes_[i][j] = nodes_all[global2D.
facenode[iface][i]][j];
1454 if(inter->finer && inter->isghost)
1455 oct = octree.extractGhostOctant(inter->owners[inter->finer]);
1457 oct = octree.extractOctant(inter->owners[inter->finer]);
1458 uint8_t iface = inter->iface;
1459 vector<int8_t> normal_;
1471 if(inter.finer && inter.isghost)
1472 Size = octree.extractGhostOctant(inter.owners[inter.finer]).
getSize();
1474 Size = octree.extractOctant(inter.owners[inter.finer]).
getSize();
1480 if(inter.finer && inter.isghost)
1481 Area = octree.extractGhostOctant(inter.owners[inter.finer]).
getArea();
1483 Area = octree.extractOctant(inter.owners[inter.finer]).
getArea();
1489 if(inter.finer && inter.isghost)
1490 oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1492 oct = octree.extractOctant(inter.owners[inter.finer]);
1493 vector<double>center_ = oct.
getCenter();
1494 int sign = ( int(2*((inter.iface)%2)) - 1);
1495 double deplace = double (sign *
int(oct.
getSize())) / 2;
1496 center_[inter.iface/2] = uint32_t(
int(center_[inter.iface/2]) + deplace);
1501 dvector2D & nodes) {
1503 if(inter.finer && inter.isghost)
1504 oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1506 oct = octree.extractOctant(inter.owners[inter.finer]);
1507 uint8_t iface = inter.iface;
1508 u32vector2D nodes_all;
1512 for (
int j=0; j<3; j++){
1513 nodes_[i][j] = nodes_all[global2D.
facenode[iface][i]][j];
1522 if(inter.finer && inter.isghost)
1523 oct = octree.extractGhostOctant(inter.owners[inter.finer]);
1525 oct = octree.extractOctant(inter.owners[inter.finer]);
1526 uint8_t iface = inter.iface;
1527 vector<int8_t> normal_;
1538 octree.computeIntersections();
1550 uint32_t noctants = octree.octants.size();
1551 uint32_t idxtry = noctants/2;
1553 uint64_t morton, mortontry;
1556 x = trans.
mapX(point[0]);
1557 y = trans.
mapY(point[1]);
1560 || (point[0] < trans.
X0) || (point[1] < trans.
Y0))
1565 morton = mortonEncode_magicbits(x,y);
1568 if (!serial) powner = findOwner(morton);
1572 if ((powner!=rank) && (!serial))
1575 int32_t jump = idxtry;
1576 while(abs(jump) > 0){
1577 mortontry = octree.octants[idxtry].computeMorton();
1578 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1580 if (idxtry > noctants-1){
1582 idxtry = noctants - 1;
1591 if(octree.octants[idxtry].computeMorton() == morton){
1592 return &octree.octants[idxtry];
1597 while(octree.octants[idxtry].computeMorton() < morton){
1599 if(idxtry > noctants-1){
1600 idxtry = noctants-1;
1604 while(octree.octants[idxtry].computeMorton() > morton){
1606 if(idxtry > noctants-1){
1612 return &octree.octants[idxtry];
1621 uint32_t noctants = octree.octants.size();
1622 uint32_t idxtry = noctants/2;
1624 uint64_t morton, mortontry;
1627 x = trans.
mapX(point[0]);
1628 y = trans.
mapY(point[1]);
1631 || (point[0] < trans.
X0) || (point[1] < trans.
Y0))
1636 morton = mortonEncode_magicbits(x,y);
1639 if(!serial) powner = findOwner(morton);
1644 if ((powner!=rank) && (!serial))
1648 int32_t jump = idxtry;
1649 while(abs(jump) > 0){
1650 mortontry = octree.octants[idxtry].computeMorton();
1651 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1653 if (idxtry > noctants-1){
1655 idxtry = noctants - 1;
1664 if(octree.octants[idxtry].computeMorton() == morton){
1670 while(octree.octants[idxtry].computeMorton() < morton){
1672 if(idxtry > noctants-1){
1673 idxtry = noctants-1;
1677 while(octree.octants[idxtry].computeMorton() > morton){
1679 if(idxtry > noctants-1){
1692 uint32_t noctants = octree.octants.size();
1693 uint32_t idxtry = noctants/2;
1695 uint64_t morton, mortontry;
1698 x = trans.
mapX(point[0]);
1699 y = trans.
mapY(point[1]);
1702 || (point[0] < trans.
X0) || (point[1] < trans.
Y0)){
1709 morton = mortonEncode_magicbits(x,y);
1712 if (!serial) powner = findOwner(morton);
1716 if ((powner!=rank) && (!serial)){
1721 int32_t jump = idxtry;
1722 while(abs(jump) > 0){
1723 mortontry = octree.octants[idxtry].computeMorton();
1724 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1726 if (idxtry > noctants-1){
1728 idxtry = noctants - 1;
1737 if(octree.octants[idxtry].computeMorton() == morton){
1738 return octree.octants[idxtry];
1743 while(octree.octants[idxtry].computeMorton() < morton){
1745 if(idxtry > noctants-1){
1746 idxtry = noctants-1;
1750 while(octree.octants[idxtry].computeMorton() > morton){
1752 if(idxtry > noctants-1){
1753 idxtry = noctants-1;
1758 return octree.octants[idxtry];
1768 uint32_t noctants = octree.octants.size();
1769 uint32_t idxtry = noctants/2;
1771 uint64_t morton, mortontry;
1781 morton = mortonEncode_magicbits(x,y);
1784 if (!serial) powner = findOwner(morton);
1788 if ((powner!=rank) && (!serial))
1791 int32_t jump = idxtry;
1792 while(abs(jump) > 0){
1793 mortontry = octree.octants[idxtry].computeMorton();
1794 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1796 if (idxtry > noctants-1){
1798 idxtry = noctants - 1;
1807 if(octree.octants[idxtry].computeMorton() == morton){
1808 return &octree.octants[idxtry];
1813 while(octree.octants[idxtry].computeMorton() < morton){
1815 if(idxtry > noctants-1){
1816 idxtry = noctants-1;
1820 while(octree.octants[idxtry].computeMorton() > morton){
1822 if(idxtry > noctants-1){
1823 idxtry = noctants-1;
1828 return &octree.octants[idxtry];
1837 uint32_t noctants = octree.octants.size();
1838 uint32_t idxtry = noctants/2;
1840 uint64_t morton, mortontry;
1850 morton = mortonEncode_magicbits(x,y);
1853 if (!serial) powner = findOwner(morton);
1857 if ((powner!=rank) && (!serial))
1860 int32_t jump = idxtry;
1861 while(abs(jump) > 0){
1862 mortontry = octree.octants[idxtry].computeMorton();
1863 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1865 if (idxtry > noctants-1){
1867 idxtry = noctants - 1;
1876 if(octree.octants[idxtry].computeMorton() == morton){
1882 while(octree.octants[idxtry].computeMorton() < morton){
1884 if(idxtry > noctants-1){
1885 idxtry = noctants-1;
1889 while(octree.octants[idxtry].computeMorton() > morton){
1891 if(idxtry > noctants-1){
1892 idxtry = noctants-1;
1906 uint32_t noctants = octree.octants.size();
1907 uint32_t idxtry = noctants/2;
1909 uint64_t morton, mortontry;
1912 x = uint32_t(point[0]);
1913 y = uint32_t(point[1]);
1915 if ((point[0] < 0) || (point[0] >
double(global2D.
max_length)) || (point[1] < 0) || (point[1] >
double(global2D.
max_length)))
1920 morton = mortonEncode_magicbits(x,y);
1923 if (!serial) powner = findOwner(morton);
1927 if ((powner!=rank) && (!serial))
1930 int32_t jump = idxtry;
1931 while(abs(jump) > 0){
1932 mortontry = octree.octants[idxtry].computeMorton();
1933 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
1935 if (idxtry > noctants-1){
1937 idxtry = noctants - 1;
1946 if(octree.octants[idxtry].computeMorton() == morton){
1947 return &octree.octants[idxtry];
1952 while(octree.octants[idxtry].computeMorton() < morton){
1954 if(idxtry > noctants-1){
1955 idxtry = noctants-1;
1959 while(octree.octants[idxtry].computeMorton() > morton){
1961 if(idxtry > noctants-1){
1962 idxtry = noctants-1;
1967 return &octree.octants[idxtry];
1976 uint32_t noctants = octree.octants.size();
1977 uint32_t idxtry = noctants/2;
1979 uint64_t morton, mortontry;
1982 x = uint32_t(point[0]);
1983 y = uint32_t(point[1]);
1984 if ((point[0] < 0) || (point[0] >
double(global2D.
max_length)) || (point[1] < 0) || (point[1] >
double(global2D.
max_length)))
1989 morton = mortonEncode_magicbits(x,y);
1992 if (!serial) powner = findOwner(morton);
1996 if ((powner!=rank) && (!serial))
1999 int32_t jump = idxtry;
2000 while(abs(jump) > 0){
2001 mortontry = octree.octants[idxtry].computeMorton();
2002 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2004 if (idxtry > noctants-1){
2006 idxtry = noctants - 1;
2015 if(octree.octants[idxtry].computeMorton() == morton){
2021 while(octree.octants[idxtry].computeMorton() < morton){
2023 if(idxtry > noctants-1){
2024 idxtry = noctants-1;
2028 while(octree.octants[idxtry].computeMorton() > morton){
2030 if(idxtry > noctants-1){
2031 idxtry = noctants-1;
2042 uint32_t noctants = octree.octants.size();
2043 uint32_t idxtry = noctants/2;
2045 uint64_t morton, mortontry;
2058 morton = mortonEncode_magicbits(x,y);
2061 powner = findOwner(morton);
2065 if ((powner!=rank) && (!serial)){
2070 int32_t jump = idxtry;
2071 while(abs(jump) > 0){
2072 mortontry = octree.octants[idxtry].computeMorton();
2073 jump = ((mortontry<morton)-(mortontry>morton))*abs(jump)/2;
2075 if (idxtry > noctants-1){
2077 idxtry = noctants - 1;
2086 if(octree.octants[idxtry].computeMorton() == morton){
2087 return octree.octants[idxtry];
2092 while(octree.octants[idxtry].computeMorton() < morton){
2094 if(idxtry > noctants-1){
2095 idxtry = noctants-1;
2099 while(octree.octants[idxtry].computeMorton() > morton){
2101 if(idxtry > noctants-1){
2102 idxtry = noctants-1;
2107 return octree.octants[idxtry];
2115 void computePartition(uint32_t* partition) {
2116 uint32_t division_result = 0;
2117 uint32_t remind = 0;
2118 division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2119 remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2120 for(uint32_t i = 0; i < (uint32_t)nproc; ++i)
2122 partition[i] = division_result + 1;
2124 partition[i] = division_result;
2130 void computePartition(uint32_t* partition, dvector* weight){
2134 double division_result = 0;
2135 double global_weight = 0.0;
2136 for (
int i=0; i<weight->size(); i++){
2137 global_weight += (*weight)[i];
2139 division_result = global_weight/(double)nproc;
2143 uint32_t i = 0, tot = 0;
2145 while (iproc < nproc-1){
2146 double partial_weight = 0.0;
2147 partition[iproc] = 0;
2148 while(partial_weight < division_result){
2149 partial_weight += (*weight)[i];
2156 partition[nproc-1] = weight->size() - tot;
2160 double division_result = 0;
2162 dvector local_weight(nproc,0.0);
2163 dvector temp_local_weight(nproc,0.0);
2164 dvector2D sending_weight(nproc, dvector(nproc,0.0));
2165 double* rbuff =
new double[nproc];
2166 double global_weight = 0.0;
2167 for (
int i=0; i<weight->size(); i++){
2168 local_weight[rank] += (*weight)[i];
2170 error_flag = MPI_Allgather(&local_weight[rank],1,MPI_DOUBLE,rbuff,1,MPI_DOUBLE,comm);
2171 for (
int i=0; i<nproc; i++){
2172 local_weight[i] = rbuff[i];
2173 global_weight += rbuff[i];
2175 delete [] rbuff; rbuff = NULL;
2176 division_result = global_weight/(double)nproc;
2180 temp_local_weight = local_weight;
2183 for (
int iter = 0; iter < 1; iter++){
2185 vector<double> delta(nproc);
2186 for (
int i=0; i<nproc; i++){
2187 delta[i] = temp_local_weight[i] - division_result;
2190 for (
int i=0; i<nproc-1; i++){
2192 double post_weight = 0.0;
2193 for (
int j=i+1; j<nproc; j++){
2194 post_weight += temp_local_weight[j];
2196 if (temp_local_weight[i] > division_result){
2198 delta[i] = temp_local_weight[i] - division_result;
2199 if (post_weight < division_result*(nproc-i-1)){
2201 double post_delta = division_result*(nproc-i-1) - post_weight;
2202 double delta_sending = min(local_weight[i], min(delta[i], post_delta));
2205 while (delta_sending > 0 && jproc<nproc){
2206 sending = min(division_result, delta_sending);
2207 sending = min(sending, (division_result-temp_local_weight[jproc]));
2208 sending = max(sending, 0.0);
2209 sending_weight[i][jproc] += sending;
2210 temp_local_weight[jproc] += sending;
2211 temp_local_weight[i] -= sending;
2212 delta_sending -= sending;
2213 delta[i] -= delta_sending;
2220 for (
int i = nproc-1; i>0; i--){
2222 double pre_weight = 0.0;
2223 for (
int j=i-1; j>=0; j--){
2224 pre_weight += temp_local_weight[j];
2226 if (temp_local_weight[i] > division_result){
2228 delta[i] = temp_local_weight[i] - division_result;
2229 if (pre_weight < division_result*(i)){
2231 double pre_delta = division_result*(i) - pre_weight;
2232 double delta_sending = min(local_weight[i], min(temp_local_weight[i], min(delta[i], pre_delta)));
2235 while (delta_sending > 0 && jproc >=0){
2236 sending = min(division_result, delta_sending);
2237 sending = min(sending, (division_result-temp_local_weight[jproc]));
2238 sending = max(sending, 0.0);
2239 sending_weight[i][jproc] += sending;
2240 temp_local_weight[jproc] += sending;
2241 temp_local_weight[i] -= sending;
2242 delta_sending -= sending;
2243 delta[i] -= delta_sending;
2253 u32vector sending_cell(nproc,0);
2254 int i = getNumOctants();;
2255 for (
int jproc=nproc-1; jproc>rank; jproc--){
2256 double pack_weight = 0.0;
2257 while(pack_weight < sending_weight[rank][jproc] && i > 0){
2259 pack_weight += (*weight)[i];
2260 sending_cell[jproc]++;
2263 partition[rank] = i;
2265 for (
int jproc=0; jproc<rank; jproc++){
2266 double pack_weight = 0.0;
2267 while(pack_weight < sending_weight[rank][jproc] && i < getNumOctants()-1){
2269 pack_weight += (*weight)[i];
2270 sending_cell[jproc]++;
2273 partition[rank] -= i;
2276 u32vector rec_cell(nproc,0);
2277 MPI_Request* req =
new MPI_Request[nproc*10];
2278 MPI_Status* stats =
new MPI_Status[nproc*10];
2280 for (
int iproc=0; iproc<nproc; iproc++){
2281 error_flag = MPI_Irecv(&rec_cell[iproc],1,MPI_UINT32_T,iproc,rank,comm,&req[nReq]);
2284 for (
int iproc=0; iproc<nproc; iproc++){
2285 error_flag = MPI_Isend(&sending_cell[iproc],1,MPI_UINT32_T,iproc,iproc,comm,&req[nReq]);
2288 MPI_Waitall(nReq,req,stats);
2290 delete [] req; req = NULL;
2291 delete [] stats; stats = NULL;
2294 for (
int jproc=0; jproc<nproc; jproc++){
2295 i+= rec_cell[jproc];
2297 partition[rank] += i;
2298 uint32_t part = partition[rank];
2299 error_flag = MPI_Allgather(&part,1,MPI_UINT32_T,partition,1,MPI_UINT32_T,comm);
2305 void computePartition(uint32_t* partition, uint8_t & level_) {
2306 uint8_t level = uint8_t(min(
int(max(
int(max_depth) -
int(level_),
int(1))) , MAX_LEVEL_2D));
2307 uint32_t* partition_temp =
new uint32_t[nproc];
2308 uint8_t* boundary_proc =
new uint8_t[nproc-1];
2309 uint8_t dimcomm, indcomm;
2310 uint8_t* glbdimcomm =
new uint8_t[nproc];
2311 uint8_t* glbindcomm =
new uint8_t[nproc];
2313 uint32_t division_result = 0;
2314 uint32_t remind = 0;
2315 uint32_t Dh = uint32_t(pow(
double(2),
double(MAX_LEVEL_2D-level)));
2316 uint32_t istart, nocts, rest, forw, backw;
2317 uint32_t i = 0, iproc, j;
2319 int32_t* pointercomm;
2320 int32_t* deplace =
new int32_t[nproc-1];
2321 division_result = uint32_t(global_num_octants/(uint64_t)nproc);
2322 remind = (uint32_t)(global_num_octants%(uint64_t)nproc);
2323 for(uint32_t i = 0; i < (uint32_t)nproc; ++i)
2325 partition_temp[i] = division_result + 1;
2327 partition_temp[i] = division_result;
2331 for (iproc=0; iproc<(uint32_t)(nproc-1); iproc++){
2332 sum += partition_temp[iproc];
2333 while(sum > partition_range_globalidx[j]){
2336 boundary_proc[iproc] = j;
2338 nocts = octree.octants.size();
2342 for (iproc=0; iproc<(uint32_t)(nproc-1); iproc++){
2344 sum += partition_temp[iproc];
2345 if (boundary_proc[iproc] == rank){
2351 istart = sum - partition_range_globalidx[rank-1] - 1;
2356 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2363 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2367 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2374 rest = octree.octants[i].getX()%Dh + octree.octants[i].getY()%Dh;
2378 deplace[iproc] = forw;
2380 deplace[iproc] = -(int32_t)backw;
2384 error_flag = MPI_Allgather(&dimcomm,1,MPI_UINT8_T,glbdimcomm,1,MPI_UINT8_T,comm);
2385 error_flag = MPI_Allgather(&indcomm,1,MPI_UINT8_T,glbindcomm,1,MPI_UINT8_T,comm);
2386 for (iproc=0; iproc<(uint32_t)(nproc); iproc++){
2387 pointercomm = &deplace[glbindcomm[iproc]];
2388 error_flag = MPI_Bcast(pointercomm, glbdimcomm[iproc], MPI_INT32_T, iproc, comm);
2391 for (iproc=0; iproc<(uint32_t)(nproc); iproc++){
2392 if (iproc < (uint32_t)(nproc-1))
2393 partition[iproc] = partition_temp[iproc] + deplace[iproc];
2395 partition[iproc] = partition_temp[iproc];
2397 partition[iproc] = partition[iproc] - deplace[iproc-1];
2400 delete [] partition_temp; partition_temp = NULL;
2401 delete [] boundary_proc; boundary_proc = NULL;
2402 delete [] glbdimcomm; glbdimcomm = NULL;
2403 delete [] glbindcomm; glbindcomm = NULL;
2404 delete [] deplace; deplace = NULL;
2409 void updateLoadBalance() {
2410 octree.updateLocalMaxDepth();
2411 uint64_t* rbuff =
new uint64_t[nproc];
2412 uint64_t local_num_octants = octree.getNumOctants();
2413 error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
2414 for(
int p = 0; p < nproc; ++p){
2415 partition_range_globalidx[p] = 0;
2416 for(
int pp = 0; pp <=p; ++pp)
2417 partition_range_globalidx[p] += rbuff[pp];
2418 --partition_range_globalidx[p];
2421 octree.setFirstDesc();
2422 octree.setLastDesc();
2424 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
2425 error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
2426 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
2427 error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
2429 delete [] rbuff; rbuff = NULL;
2434 void setPboundGhosts() {
2444 bordersPerProc.clear();
2448 for(uint8_t i = 0; i < global2D.
nfaces; ++i){
2449 if(it->getBound(i) ==
false){
2450 uint32_t virtualNeighborsSize = 0;
2451 vector<uint64_t> virtualNeighbors = it->computeVirtualMorton(i,max_depth,virtualNeighborsSize);
2452 uint32_t maxDelta = virtualNeighborsSize/2;
2453 for(uint32_t j = 0; j <= maxDelta; ++j){
2454 int pBegin = findOwner(virtualNeighbors[j]);
2455 int pEnd = findOwner(virtualNeighbors[virtualNeighborsSize - 1 - j]);
2456 procs.insert(pBegin);
2458 if(pBegin != rank || pEnd != rank){
2459 it->setPbound(i,
true);
2462 it->setPbound(i,
false);
2468 for(uint8_t c = 0; c < global2D.
nnodes; ++c){
2469 if(!it->getBound(global2D.
nodeface[c][0]) && !it->getBound(global2D.
nodeface[c][1])){
2470 uint32_t virtualCornerNeighborSize = 0;
2471 uint64_t virtualCornerNeighbor = it ->computeNodeVirtualMorton(c,max_depth,virtualCornerNeighborSize);
2472 if(virtualCornerNeighborSize){
2473 int proc = findOwner(virtualCornerNeighbor);
2479 set<int>::iterator pitend = procs.end();
2480 for(set<int>::iterator pit = procs.begin(); pit != pitend; ++pit){
2484 bordersPerProc[p].push_back(distance(begin,it));
2485 vector<uint32_t> & bordersSingleProc = bordersPerProc[p];
2486 if(bordersSingleProc.capacity() - bordersSingleProc.size() < 2)
2487 bordersSingleProc.reserve(2*bordersSingleProc.size());
2498 uint64_t global_index;
2503 map<int,Class_Comm_Buffer> sendBuffers;
2504 map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
2505 uint32_t pbordersOversize = 0;
2506 for(map<
int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
2507 pbordersOversize += bit->second.size();
2509 int key = bit->first;
2510 const vector<uint32_t> & value = bit->second;
2513 int nofBorders = value.size();
2514 for(
int i = 0; i < nofBorders; ++i){
2521 global_index = getGlobalIdx(value[i]);
2522 for(
int i = 0; i < 12; ++i)
2523 info[i] = octant.info[i];
2524 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2525 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2526 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2527 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2528 for(
int j = 0; j < 12; ++j){
2529 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2531 error_flag = MPI_Pack(&global_index,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
2538 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
2539 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
2541 map<int,int> recvBufferSizePerProc;
2542 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2543 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2544 recvBufferSizePerProc[sit->first] = 0;
2545 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
2548 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
2549 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2550 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
2553 MPI_Waitall(nReq,req,stats);
2559 uint32_t nofBytesOverProc = 0;
2560 map<int,Class_Comm_Buffer> recvBuffers;
2561 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
2562 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
2566 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2567 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
2568 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
2571 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2572 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
2575 MPI_Waitall(nReq,req,stats);
2581 octree.size_ghosts = nofGhosts;
2582 octree.ghosts.clear();
2583 octree.ghosts.resize(nofGhosts);
2584 octree.globalidx_ghosts.resize(nofGhosts);
2589 uint32_t ghostCounter = 0;
2590 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
2591 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
2594 for(
int i = 0; i < nofGhostsPerProc; ++i){
2595 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
2596 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
2597 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
2599 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
2600 octree.ghosts[ghostCounter].setMarker(m);
2601 for(
int j = 0; j < 12; ++j){
2602 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
2603 octree.ghosts[ghostCounter].info[j] = info[j];
2605 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&global_index,1,MPI_UINT64_T,comm);
2606 octree.globalidx_ghosts[ghostCounter] = global_index;
2610 recvBuffers.clear();
2611 sendBuffers.clear();
2612 recvBufferSizePerProc.clear();
2614 delete [] req; req = NULL;
2615 delete [] stats; stats = NULL;
2629 log.writeLog(
"---------------------------------------------");
2630 log.writeLog(
" LOAD BALANCE ");
2632 uint32_t* partition =
new uint32_t [nproc];
2633 computePartition(partition);
2637 log.writeLog(
" Initial Serial distribution : ");
2638 for(
int ii=0; ii<nproc; ii++){
2639 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
2642 uint32_t stride = 0;
2643 for(
int i = 0; i < rank; ++i)
2644 stride += partition[i];
2648 octree.octants.assign(first, last);
2649 #if defined(__INTEL_COMPILER) || defined(__ICC)
2651 octree.octants.shrink_to_fit();
2653 first = octantsCopy.end();
2654 last = octantsCopy.end();
2657 updateLoadBalance();
2664 log.writeLog(
" Initial Parallel partition : ");
2665 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
2666 for(
int ii=1; ii<nproc; ii++){
2667 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
2671 octree.ghosts.clear();
2672 octree.size_ghosts = 0;
2674 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
2675 for(
int p = 0; p < nproc; ++p){
2676 newPartitionRangeGlobalidx[p] = 0;
2677 for(
int pp = 0; pp <= p; ++pp)
2678 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
2679 --newPartitionRangeGlobalidx[p];
2687 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
2691 else if(lh > (int32_t)(octree.octants.size() - 1))
2692 lh = octree.octants.size() - 1;
2694 if(rank == nproc - 1)
2695 ft = octree.octants.size();
2697 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
2699 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
2701 if(ft > (int32_t)(octree.octants.size() - 1))
2702 ft = octree.octants.size();
2707 uint32_t headSize = (uint32_t)(lh + 1);
2708 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
2709 uint32_t headOffset = headSize;
2710 uint32_t tailOffset = tailSize;
2713 map<int,Class_Comm_Buffer> sendBuffers;
2716 int64_t firstOctantGlobalIdx = 0;
2717 int64_t globalLastHead = (int64_t) lh;
2718 int64_t globalFirstTail = (int64_t) ft;
2719 int firstPredecessor = -1;
2720 int firstSuccessor = nproc;
2722 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
2723 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
2724 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
2725 for(
int pre = rank - 1; pre >=0; --pre){
2726 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
2727 firstPredecessor = pre;
2729 for(
int post = rank + 1; post < nproc; ++post){
2730 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
2731 firstSuccessor = post;
2746 uint32_t nofElementsFromSuccessiveToPrevious = 0;
2748 for(
int p = firstPredecessor; p >= 0; --p){
2749 if(headSize < partition[p]){
2751 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
2752 intBuffer = abs(intBuffer);
2753 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
2754 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
2755 nofElementsFromSuccessiveToPrevious = headSize;
2757 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
2761 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2768 for(
int ii = 0; ii < 12; ++ii)
2769 info[ii] = octant.info[ii];
2770 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2771 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2772 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2773 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2774 for(
int j = 0; j < 12; ++j){
2775 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2779 if(nofElementsFromSuccessiveToPrevious == headSize)
2782 lh -= nofElementsFromSuccessiveToPrevious;
2783 globalLastHead -= nofElementsFromSuccessiveToPrevious;
2789 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
2790 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
2793 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
2800 for(
int i = 0; i < 12; ++i)
2801 info[i] = octant.info[i];
2802 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2803 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2804 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2805 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2806 for(
int j = 0; j < 12; ++j){
2807 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2810 lh -= nofElementsFromSuccessiveToPrevious;
2811 globalLastHead -= nofElementsFromSuccessiveToPrevious;
2819 uint32_t nofElementsFromPreviousToSuccessive = 0;
2823 for(
int p = firstSuccessor; p < nproc; ++p){
2824 if(tailSize < partition[p]){
2826 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2827 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
2828 nofElementsFromPreviousToSuccessive = tailSize;
2830 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
2833 uint32_t octantsSize = (uint32_t)octree.octants.size();
2834 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
2841 for(
int ii = 0; ii < 12; ++ii)
2842 info[ii] = octant.info[ii];
2843 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2844 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2845 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2846 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2847 for(
int j = 0; j < 12; ++j){
2848 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2852 if(nofElementsFromPreviousToSuccessive == tailSize)
2854 ft += nofElementsFromPreviousToSuccessive;
2855 globalFirstTail += nofElementsFromPreviousToSuccessive;
2856 tailSize -= nofElementsFromPreviousToSuccessive;
2860 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
2861 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
2864 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
2866 for(uint32_t i = ft; i <= endOctants; ++i ){
2873 for(
int i = 0; i < 12; ++i)
2874 info[i] = octant.info[i];
2875 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2876 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2877 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2878 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2879 for(
int j = 0; j < 12; ++j){
2880 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
2885 ft += nofElementsFromPreviousToSuccessive;
2886 globalFirstTail += nofElementsFromPreviousToSuccessive;
2887 tailSize -= nofElementsFromPreviousToSuccessive;
2895 vector<Class_Array> recvs(nproc);
2896 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
2897 recvs[rank].array[0] = rank;
2899 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
2900 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
2901 recvs[rank].array[counter] = sit->first;
2904 int* nofRecvsPerProc =
new int[nproc];
2905 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
2906 int globalRecvsBuffSize = 0;
2907 int* displays =
new int[nproc];
2908 for(
int pp = 0; pp < nproc; ++pp){
2910 globalRecvsBuffSize += nofRecvsPerProc[pp];
2911 for(
int ppp = 0; ppp < pp; ++ppp){
2912 displays[pp] += nofRecvsPerProc[ppp];
2915 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
2916 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
2918 vector<set<int> > sendersPerProc(nproc);
2919 for(
int pin = 0; pin < nproc; ++pin){
2920 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
2921 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
2926 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
2927 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
2929 map<int,int> recvBufferSizePerProc;
2930 set<int>::iterator senditend = sendersPerProc[rank].end();
2931 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
2932 recvBufferSizePerProc[*sendit] = 0;
2933 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
2936 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
2937 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2938 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
2941 MPI_Waitall(nReq,req,stats);
2946 uint32_t nofNewHead = 0;
2947 uint32_t nofNewTail = 0;
2948 map<int,Class_Comm_Buffer> recvBuffers;
2949 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
2950 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
2952 uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((
double)global2D.
octantBytes / (
double)(CHAR_BIT/8)));
2953 if(rit->first < rank)
2954 nofNewHead += nofNewPerProc;
2955 else if(rit->first > rank)
2956 nofNewTail += nofNewPerProc;
2959 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
2961 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
2964 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
2965 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
2968 MPI_Waitall(nReq,req,stats);
2971 uint32_t resEnd = octree.getNumOctants() - tailOffset;
2972 uint32_t nofResidents = resEnd - headOffset;
2974 for(uint32_t i = headOffset; i < resEnd; ++i){
2975 octree.octants[octCounter] = octree.octants[i];
2978 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
2979 octree.octants.resize(newCounter);
2981 uint32_t resCounter = nofNewHead + nofResidents - 1;
2982 for(uint32_t k = 0; k < nofResidents ; ++k){
2983 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
2988 bool jumpResident =
false;
2989 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
2990 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
2991 uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((
double)global2D.
octantBytes / (
double)(CHAR_BIT/8)));
2993 if(rbit->first > rank && !jumpResident){
2994 newCounter += nofResidents ;
2995 jumpResident =
true;
2997 for(
int i = nofNewPerProc - 1; i >= 0; --i){
2998 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
2999 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3000 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3002 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3003 octree.octants[newCounter].setMarker(m);
3004 for(
int j = 0; j < 12; ++j){
3005 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3006 octree.octants[newCounter].info[j] = info[j];
3011 #if defined(__INTEL_COMPILER) || defined(__ICC)
3013 octree.octants.shrink_to_fit();
3015 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3016 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3017 delete [] displays; displays = NULL;
3018 delete [] req; req = NULL;
3019 delete [] stats; stats = NULL;
3020 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3022 updateLoadBalance();
3026 delete [] partition;
3031 log.writeLog(
" Final Parallel partition : ");
3032 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3033 for(
int ii=1; ii<nproc; ii++){
3034 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3037 log.writeLog(
"---------------------------------------------");
3051 log.writeLog(
"---------------------------------------------");
3052 log.writeLog(
" LOAD BALANCE ");
3054 uint32_t* partition =
new uint32_t [nproc];
3055 computePartition(partition, level);
3059 log.writeLog(
" Initial Serial distribution : ");
3060 for(
int ii=0; ii<nproc; ii++){
3061 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3064 uint32_t stride = 0;
3065 for(
int i = 0; i < rank; ++i)
3066 stride += partition[i];
3070 octree.octants.assign(first, last);
3071 #if defined(__INTEL_COMPILER) || defined(__ICC)
3073 octree.octants.shrink_to_fit();
3075 first = octantsCopy.end();
3076 last = octantsCopy.end();
3079 updateLoadBalance();
3086 log.writeLog(
" Initial Parallel partition : ");
3087 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3088 for(
int ii=1; ii<nproc; ii++){
3089 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3093 octree.ghosts.clear();
3094 octree.size_ghosts = 0;
3096 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
3097 for(
int p = 0; p < nproc; ++p){
3098 newPartitionRangeGlobalidx[p] = 0;
3099 for(
int pp = 0; pp <= p; ++pp)
3100 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3101 --newPartitionRangeGlobalidx[p];
3109 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3113 else if(lh > (int32_t)(octree.octants.size() - 1))
3114 lh = octree.octants.size() - 1;
3116 if(rank == nproc - 1)
3117 ft = octree.octants.size();
3119 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3121 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3123 if(ft > (int32_t)(octree.octants.size() - 1))
3124 ft = octree.octants.size();
3129 uint32_t headSize = (uint32_t)(lh + 1);
3130 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3131 uint32_t headOffset = headSize;
3132 uint32_t tailOffset = tailSize;
3135 map<int,Class_Comm_Buffer> sendBuffers;
3138 int64_t firstOctantGlobalIdx = 0;
3139 int64_t globalLastHead = (int64_t) lh;
3140 int64_t globalFirstTail = (int64_t) ft;
3141 int firstPredecessor = -1;
3142 int firstSuccessor = nproc;
3144 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3145 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3146 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3147 for(
int pre = rank - 1; pre >=0; --pre){
3148 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3149 firstPredecessor = pre;
3151 for(
int post = rank + 1; post < nproc; ++post){
3152 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3153 firstSuccessor = post;
3168 uint32_t nofElementsFromSuccessiveToPrevious = 0;
3170 for(
int p = firstPredecessor; p >= 0; --p){
3171 if(headSize < partition[p]){
3172 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3173 intBuffer = abs(intBuffer);
3174 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3175 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3176 nofElementsFromSuccessiveToPrevious = headSize;
3178 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3181 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3188 for(
int ii = 0; ii < 12; ++ii)
3189 info[ii] = octant.info[ii];
3190 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3191 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3192 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3193 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3194 for(
int j = 0; j < 12; ++j){
3195 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3198 if(nofElementsFromSuccessiveToPrevious == headSize)
3201 lh -= nofElementsFromSuccessiveToPrevious;
3202 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3207 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3208 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3211 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3218 for(
int i = 0; i < 12; ++i)
3219 info[i] = octant.info[i];
3220 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3221 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3222 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3223 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3224 for(
int j = 0; j < 12; ++j){
3225 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3228 lh -= nofElementsFromSuccessiveToPrevious;
3229 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3237 uint32_t nofElementsFromPreviousToSuccessive = 0;
3241 for(
int p = firstSuccessor; p < nproc; ++p){
3242 if(tailSize < partition[p]){
3243 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3244 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3245 nofElementsFromPreviousToSuccessive = tailSize;
3247 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3250 uint32_t octantsSize = (uint32_t)octree.octants.size();
3251 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3258 for(
int ii = 0; ii < 12; ++ii)
3259 info[ii] = octant.info[ii];
3260 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3261 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3262 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3263 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3264 for(
int j = 0; j < 12; ++j){
3265 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3268 if(nofElementsFromPreviousToSuccessive == tailSize)
3270 ft += nofElementsFromPreviousToSuccessive;
3271 globalFirstTail += nofElementsFromPreviousToSuccessive;
3272 tailSize -= nofElementsFromPreviousToSuccessive;
3276 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3277 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3279 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3281 for(uint32_t i = ft; i <= endOctants; ++i ){
3288 for(
int ii = 0; ii < 12; ++ii)
3289 info[ii] = octant.info[ii];
3290 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3291 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3292 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3293 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3294 for(
int j = 0; j < 12; ++j){
3295 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&pos,comm);
3298 ft += nofElementsFromPreviousToSuccessive;
3299 globalFirstTail += nofElementsFromPreviousToSuccessive;
3300 tailSize -= nofElementsFromPreviousToSuccessive;
3308 vector<Class_Array> recvs(nproc);
3309 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
3310 recvs[rank].array[0] = rank;
3312 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3313 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3314 recvs[rank].array[counter] = sit->first;
3317 int* nofRecvsPerProc =
new int[nproc];
3318 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3319 int globalRecvsBuffSize = 0;
3320 int* displays =
new int[nproc];
3321 for(
int pp = 0; pp < nproc; ++pp){
3323 globalRecvsBuffSize += nofRecvsPerProc[pp];
3324 for(
int ppp = 0; ppp < pp; ++ppp){
3325 displays[pp] += nofRecvsPerProc[ppp];
3328 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
3329 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3331 vector<set<int> > sendersPerProc(nproc);
3332 for(
int pin = 0; pin < nproc; ++pin){
3333 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3334 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3339 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3340 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3342 map<int,int> recvBufferSizePerProc;
3343 set<int>::iterator senditend = sendersPerProc[rank].end();
3344 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3345 recvBufferSizePerProc[*sendit] = 0;
3346 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3349 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3350 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3351 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3354 MPI_Waitall(nReq,req,stats);
3359 uint32_t nofNewHead = 0;
3360 uint32_t nofNewTail = 0;
3361 map<int,Class_Comm_Buffer> recvBuffers;
3362 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3363 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3365 uint32_t nofNewPerProc = (uint32_t)(rit->second / (uint32_t)ceil((
double)global2D.
octantBytes / (
double)(CHAR_BIT/8)));
3366 if(rit->first < rank)
3367 nofNewHead += nofNewPerProc;
3368 else if(rit->first > rank)
3369 nofNewTail += nofNewPerProc;
3372 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3374 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3377 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3378 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3381 MPI_Waitall(nReq,req,stats);
3384 uint32_t resEnd = octree.getNumOctants() - tailOffset;
3385 uint32_t nofResidents = resEnd - headOffset;
3387 for(uint32_t i = headOffset; i < resEnd; ++i){
3388 octree.octants[octCounter] = octree.octants[i];
3391 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3392 octree.octants.resize(newCounter);
3394 uint32_t resCounter = nofNewHead + nofResidents - 1;
3395 for(uint32_t k = 0; k < nofResidents ; ++k){
3396 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3401 bool jumpResident =
false;
3402 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3403 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3404 uint32_t nofNewPerProc = (uint32_t)(rbit->second.commBufferSize / (uint32_t)ceil((
double)global2D.
octantBytes / (
double)(CHAR_BIT/8)));
3406 if(rbit->first > rank && !jumpResident){
3407 newCounter += nofResidents ;
3408 jumpResident =
true;
3410 for(
int i = nofNewPerProc - 1; i >= 0; --i){
3411 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&x,1,MPI_UINT32_T,comm);
3412 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&y,1,MPI_UINT32_T,comm);
3413 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&l,1,MPI_UINT8_T,comm);
3415 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&m,1,MPI_INT8_T,comm);
3416 octree.octants[newCounter].setMarker(m);
3417 for(
int j = 0; j < 12; ++j){
3418 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&pos,&info[j],1,MPI::BOOL,comm);
3419 octree.octants[newCounter].info[j] = info[j];
3424 #if defined(__INTEL_COMPILER) || defined(__ICC)
3426 octree.octants.shrink_to_fit();
3428 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3429 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3430 delete [] displays; displays = NULL;
3431 delete [] req; req = NULL;
3432 delete [] stats; stats = NULL;
3433 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3435 updateLoadBalance();
3439 delete [] partition;
3444 log.writeLog(
" Final Parallel partition : ");
3445 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3446 for(
int ii=1; ii<nproc; ii++){
3447 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3450 log.writeLog(
"---------------------------------------------");
3460 template<
class Impl>
3463 log.writeLog(
"---------------------------------------------");
3464 log.writeLog(
" LOAD BALANCE ");
3466 uint32_t* partition =
new uint32_t [nproc];
3468 computePartition(partition);
3470 computePartition(partition, weight);
3477 log.writeLog(
" Initial Serial distribution : ");
3478 for(
int ii=0; ii<nproc; ii++){
3479 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3482 uint32_t stride = 0;
3483 for(
int i = 0; i < rank; ++i)
3484 stride += partition[i];
3488 octree.octants.assign(first, last);
3489 #if defined(__INTEL_COMPILER) || defined(__ICC)
3491 octree.octants.shrink_to_fit();
3493 first = octantsCopy.end();
3494 last = octantsCopy.end();
3496 userData.
assign(stride,partition[rank]);
3499 updateLoadBalance();
3505 log.writeLog(
" Initial Parallel partition : ");
3506 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3507 for(
int ii=1; ii<nproc; ii++){
3508 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3512 octree.ghosts.clear();
3513 octree.size_ghosts = 0;
3515 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
3516 for(
int p = 0; p < nproc; ++p){
3517 newPartitionRangeGlobalidx[p] = 0;
3518 for(
int pp = 0; pp <= p; ++pp)
3519 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
3520 --newPartitionRangeGlobalidx[p];
3528 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
3532 else if(lh > octree.octants.size() - 1)
3533 lh = octree.octants.size() - 1;
3535 if(rank == nproc - 1)
3536 ft = octree.octants.size();
3538 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
3540 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
3542 if(ft > (int32_t)(octree.octants.size() - 1))
3543 ft = octree.octants.size();
3548 uint32_t headSize = (uint32_t)(lh + 1);
3549 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
3550 uint32_t headOffset = headSize;
3551 uint32_t tailOffset = tailSize;
3554 map<int,Class_Comm_Buffer> sendBuffers;
3557 int64_t firstOctantGlobalIdx = 0;
3558 int64_t globalLastHead = (int64_t) lh;
3559 int64_t globalFirstTail = (int64_t) ft;
3560 int firstPredecessor = -1;
3561 int firstSuccessor = nproc;
3563 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
3564 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
3565 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
3566 for(
int pre = rank - 1; pre >=0; --pre){
3567 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
3568 firstPredecessor = pre;
3570 for(
int post = rank + 1; post < nproc; ++post){
3571 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
3572 firstSuccessor = post;
3587 uint32_t nofElementsFromSuccessiveToPrevious = 0;
3589 for(
int p = firstPredecessor; p >= 0; --p){
3590 if(headSize < partition[p]){
3591 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
3592 intBuffer = abs(intBuffer);
3593 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
3594 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
3595 nofElementsFromSuccessiveToPrevious = headSize;
3597 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3600 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
3603 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3604 buffSize += userData.
size(i);
3608 buffSize +=
sizeof(int);
3611 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3613 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
3620 for(
int j = 0; j < 12; ++j)
3621 info[j] = octant.info[j];
3622 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3623 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3624 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3625 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3626 for(
int j = 0; j < 12; ++j){
3627 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3630 userData.
gather(sendBuffers[p],i);
3632 if(nofElementsFromSuccessiveToPrevious == headSize)
3635 lh -= nofElementsFromSuccessiveToPrevious;
3636 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3641 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
3642 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3645 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
3648 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3649 buffSize += userData.
size(i);
3653 buffSize +=
sizeof(int);
3656 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3658 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
3665 for(
int j = 0; j < 12; ++j)
3666 info[j] = octant.info[j];
3667 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3668 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3669 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3670 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3671 for(
int j = 0; j < 12; ++j){
3672 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3674 userData.
gather(sendBuffers[p],i);
3676 lh -= nofElementsFromSuccessiveToPrevious;
3677 globalLastHead -= nofElementsFromSuccessiveToPrevious;
3685 uint32_t nofElementsFromPreviousToSuccessive = 0;
3689 for(
int p = firstSuccessor; p < nproc; ++p){
3690 if(tailSize < partition[p]){
3691 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3692 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
3693 nofElementsFromPreviousToSuccessive = tailSize;
3695 uint32_t octantsSize = (uint32_t)octree.octants.size();
3696 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3699 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
3702 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3703 buffSize += userData.
size(i);
3707 buffSize +=
sizeof(int);
3710 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3712 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
3719 for(
int j = 0; j < 12; ++j)
3720 info[j] = octant.info[j];
3721 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3722 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3723 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3724 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3725 for(
int j = 0; j < 12; ++j){
3726 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3728 userData.
gather(sendBuffers[p],i);
3730 if(nofElementsFromPreviousToSuccessive == tailSize)
3732 ft += nofElementsFromPreviousToSuccessive;
3733 globalFirstTail += nofElementsFromPreviousToSuccessive;
3734 tailSize -= nofElementsFromPreviousToSuccessive;
3738 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
3739 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
3740 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
3743 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
3746 for(uint32_t i = ft; i <= endOctants; ++i){
3747 buffSize += userData.
size(i);
3751 buffSize +=
sizeof(int);
3754 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
3755 for(uint32_t i = ft; i <= endOctants; ++i ){
3762 for(
int j = 0; j < 12; ++j)
3763 info[j] = octant.info[j];
3764 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3765 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3766 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3767 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3768 for(
int j = 0; j < 12; ++j){
3769 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
3771 userData.
gather(sendBuffers[p],i);
3773 ft += nofElementsFromPreviousToSuccessive;
3774 globalFirstTail += nofElementsFromPreviousToSuccessive;
3775 tailSize -= nofElementsFromPreviousToSuccessive;
3783 vector<Class_Array> recvs(nproc);
3784 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
3785 recvs[rank].array[0] = rank;
3787 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
3788 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
3789 recvs[rank].array[counter] = sit->first;
3792 int* nofRecvsPerProc =
new int[nproc];
3793 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
3794 int globalRecvsBuffSize = 0;
3795 int* displays =
new int[nproc];
3796 for(
int pp = 0; pp < nproc; ++pp){
3798 globalRecvsBuffSize += nofRecvsPerProc[pp];
3799 for(
int ppp = 0; ppp < pp; ++ppp){
3800 displays[pp] += nofRecvsPerProc[ppp];
3804 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
3805 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
3807 vector<set<int> > sendersPerProc(nproc);
3808 for(
int pin = 0; pin < nproc; ++pin){
3809 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
3810 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
3815 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
3816 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
3818 map<int,int> recvBufferSizePerProc;
3819 set<int>::iterator senditend = sendersPerProc[rank].end();
3820 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3821 recvBufferSizePerProc[*sendit] = 0;
3822 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
3825 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
3826 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3827 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
3830 MPI_Waitall(nReq,req,stats);
3835 uint32_t nofNewHead = 0;
3836 uint32_t nofNewTail = 0;
3837 map<int,Class_Comm_Buffer> recvBuffers;
3839 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
3840 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
3845 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
3846 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
3849 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
3850 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
3853 MPI_Waitall(nReq,req,stats);
3856 map<int,uint32_t> nofNewOverProcs;
3857 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
3858 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3859 uint32_t nofNewPerProc;
3860 MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
3861 nofNewOverProcs[rbit->first] = nofNewPerProc;
3862 if(rbit->first < rank)
3863 nofNewHead += nofNewPerProc;
3864 else if(rbit->first > rank)
3865 nofNewTail += nofNewPerProc;
3869 uint32_t resEnd = octree.getNumOctants() - tailOffset;
3870 uint32_t nofResidents = resEnd - headOffset;
3871 uint32_t octCounter = 0;
3872 for(uint32_t i = headOffset; i < resEnd; ++i){
3873 octree.octants[octCounter] = octree.octants[i];
3874 userData.
move(i,octCounter);
3877 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
3878 octree.octants.resize(newCounter);
3879 userData.
resize(newCounter);
3881 uint32_t resCounter = nofNewHead + nofResidents - 1;
3882 for(uint32_t k = 0; k < nofResidents ; ++k){
3883 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
3884 userData.
move(nofResidents - k - 1,resCounter - k);
3889 bool jumpResident =
false;
3891 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
3892 uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];
3893 if(rbit->first > rank && !jumpResident){
3894 newCounter += nofResidents ;
3895 jumpResident =
true;
3897 for(
int i = nofNewPerProc - 1; i >= 0; --i){
3898 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
3899 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
3900 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
3902 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
3903 octree.octants[newCounter].setMarker(m);
3904 for(
int j = 0; j < 12; ++j){
3905 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
3906 octree.octants[newCounter].info[j] = info[j];
3908 userData.
scatter(rbit->second,newCounter);
3912 #if defined(__INTEL_COMPILER) || defined(__ICC)
3914 octree.octants.shrink_to_fit();
3918 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
3919 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
3920 delete [] displays; displays = NULL;
3921 delete [] req; req = NULL;
3922 delete [] stats; stats = NULL;
3923 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
3926 updateLoadBalance();
3928 uint32_t nofGhosts = getNumGhosts();
3932 delete [] partition;
3937 log.writeLog(
" Final Parallel partition : ");
3938 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3939 for(
int ii=1; ii<nproc; ii++){
3940 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
3943 log.writeLog(
"---------------------------------------------");
3955 template<
class Impl>
3959 log.writeLog(
"---------------------------------------------");
3960 log.writeLog(
" LOAD BALANCE ");
3962 uint32_t* partition =
new uint32_t [nproc];
3963 computePartition(partition, level);
3967 log.writeLog(
" Initial Serial distribution : ");
3968 for(
int ii=0; ii<nproc; ii++){
3969 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]+1)));
3972 uint32_t stride = 0;
3973 for(
int i = 0; i < rank; ++i)
3974 stride += partition[i];
3978 octree.octants.assign(first, last);
3979 #if defined(__INTEL_COMPILER) || defined(__ICC)
3981 octree.octants.shrink_to_fit();
3983 first = octantsCopy.end();
3984 last = octantsCopy.end();
3986 userData.
assign(stride,partition[rank]);
3989 updateLoadBalance();
3996 log.writeLog(
" Initial Parallel partition : ");
3997 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
3998 for(
int ii=1; ii<nproc; ii++){
3999 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4003 octree.ghosts.clear();
4004 octree.size_ghosts = 0;
4006 uint64_t* newPartitionRangeGlobalidx =
new uint64_t[nproc];
4007 for(
int p = 0; p < nproc; ++p){
4008 newPartitionRangeGlobalidx[p] = 0;
4009 for(
int pp = 0; pp <= p; ++pp)
4010 newPartitionRangeGlobalidx[p] += (uint64_t)partition[pp];
4011 --newPartitionRangeGlobalidx[p];
4019 lh = (int32_t)(newPartitionRangeGlobalidx[rank-1] + 1 - partition_range_globalidx[rank-1] - 1 - 1);
4023 else if(lh > octree.octants.size() - 1)
4024 lh = octree.octants.size() - 1;
4026 if(rank == nproc - 1)
4027 ft = octree.octants.size();
4029 ft = (int32_t)(newPartitionRangeGlobalidx[rank] + 1);
4031 ft = (int32_t)(newPartitionRangeGlobalidx[rank] - partition_range_globalidx[rank -1]);
4033 if(ft > (int32_t)(octree.octants.size() - 1))
4034 ft = octree.octants.size();
4039 uint32_t headSize = (uint32_t)(lh + 1);
4040 uint32_t tailSize = (uint32_t)(octree.octants.size() - ft);
4041 uint32_t headOffset = headSize;
4042 uint32_t tailOffset = tailSize;
4045 map<int,Class_Comm_Buffer> sendBuffers;
4048 int64_t firstOctantGlobalIdx = 0;
4049 int64_t globalLastHead = (int64_t) lh;
4050 int64_t globalFirstTail = (int64_t) ft;
4051 int firstPredecessor = -1;
4052 int firstSuccessor = nproc;
4054 firstOctantGlobalIdx = (int64_t)(partition_range_globalidx[rank-1] + 1);
4055 globalLastHead = firstOctantGlobalIdx + (int64_t)lh;
4056 globalFirstTail = firstOctantGlobalIdx + (int64_t)ft;
4057 for(
int pre = rank - 1; pre >=0; --pre){
4058 if((uint64_t)globalLastHead <= newPartitionRangeGlobalidx[pre])
4059 firstPredecessor = pre;
4061 for(
int post = rank + 1; post < nproc; ++post){
4062 if((uint64_t)globalFirstTail <= newPartitionRangeGlobalidx[post] && (uint64_t)globalFirstTail > newPartitionRangeGlobalidx[post-1])
4063 firstSuccessor = post;
4078 uint32_t nofElementsFromSuccessiveToPrevious = 0;
4080 for(
int p = firstPredecessor; p >= 0; --p){
4081 if(headSize < partition[p]){
4082 intBuffer = (newPartitionRangeGlobalidx[p] - partition[p] );
4083 intBuffer = abs(intBuffer);
4084 nofElementsFromSuccessiveToPrevious = globalLastHead - intBuffer;
4085 if(nofElementsFromSuccessiveToPrevious > headSize || contatore == 1)
4086 nofElementsFromSuccessiveToPrevious = headSize;
4088 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
4091 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
4094 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4095 buffSize += userData.
size(i);
4099 buffSize +=
sizeof(int);
4102 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4104 for(uint32_t i = (uint32_t)(lh - nofElementsFromSuccessiveToPrevious + 1); i <= (uint32_t)lh; ++i){
4111 for(
int j = 0; j < 12; ++j)
4112 info[j] = octant.info[j];
4113 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4114 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4115 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4116 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4117 for(
int j = 0; j < 12; ++j){
4118 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4121 userData.
gather(sendBuffers[p],i);
4123 if(nofElementsFromSuccessiveToPrevious == headSize)
4126 lh -= nofElementsFromSuccessiveToPrevious;
4127 globalLastHead -= nofElementsFromSuccessiveToPrevious;
4132 nofElementsFromSuccessiveToPrevious = globalLastHead - (newPartitionRangeGlobalidx[p] - partition[p]);
4133 int buffSize = nofElementsFromSuccessiveToPrevious * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
4136 buffSize += userData.
fixedSize() * nofElementsFromSuccessiveToPrevious;
4139 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4140 buffSize += userData.
size(i);
4144 buffSize +=
sizeof(int);
4147 MPI_Pack(&nofElementsFromSuccessiveToPrevious,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4149 for(uint32_t i = lh - nofElementsFromSuccessiveToPrevious + 1; i <= lh; ++i){
4156 for(
int j = 0; j < 12; ++j)
4157 info[j] = octant.info[j];
4158 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4159 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4160 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4161 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4162 for(
int j = 0; j < 12; ++j){
4163 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4165 userData.
gather(sendBuffers[p],i);
4167 lh -= nofElementsFromSuccessiveToPrevious;
4168 globalLastHead -= nofElementsFromSuccessiveToPrevious;
4176 uint32_t nofElementsFromPreviousToSuccessive = 0;
4180 for(
int p = firstSuccessor; p < nproc; ++p){
4181 if(tailSize < partition[p]){
4182 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4183 if(nofElementsFromPreviousToSuccessive > tailSize || contatore == 1)
4184 nofElementsFromPreviousToSuccessive = tailSize;
4186 uint32_t octantsSize = (uint32_t)octree.octants.size();
4187 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
4190 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
4193 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4194 buffSize += userData.
size(i);
4198 buffSize +=
sizeof(int);
4201 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4203 for(uint32_t i = ft; i < ft + nofElementsFromPreviousToSuccessive; ++i){
4210 for(
int j = 0; j < 12; ++j)
4211 info[j] = octant.info[j];
4212 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4213 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4214 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4215 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4216 for(
int j = 0; j < 12; ++j){
4217 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4219 userData.
gather(sendBuffers[p],i);
4221 if(nofElementsFromPreviousToSuccessive == tailSize)
4223 ft += nofElementsFromPreviousToSuccessive;
4224 globalFirstTail += nofElementsFromPreviousToSuccessive;
4225 tailSize -= nofElementsFromPreviousToSuccessive;
4229 nofElementsFromPreviousToSuccessive = newPartitionRangeGlobalidx[p] - globalFirstTail + 1;
4230 uint32_t endOctants = ft + nofElementsFromPreviousToSuccessive - 1;
4231 int buffSize = nofElementsFromPreviousToSuccessive * (int)ceil((
double)global2D.
octantBytes / (double)(CHAR_BIT/8));
4234 buffSize += userData.
fixedSize() * nofElementsFromPreviousToSuccessive;
4237 for(uint32_t i = ft; i <= endOctants; ++i){
4238 buffSize += userData.
size(i);
4242 buffSize +=
sizeof(int);
4245 MPI_Pack(&nofElementsFromPreviousToSuccessive,1,MPI_UINT32_T,sendBuffers[p].commBuffer,sendBuffers[p].commBufferSize,&sendBuffers[p].pos,comm);
4246 for(uint32_t i = ft; i <= endOctants; ++i ){
4253 for(
int j = 0; j < 12; ++j)
4254 info[j] = octant.info[j];
4255 error_flag = MPI_Pack(&x,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4256 error_flag = MPI_Pack(&y,1,MPI_UINT32_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4257 error_flag = MPI_Pack(&l,1,MPI_UINT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4258 error_flag = MPI_Pack(&m,1,MPI_INT8_T,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4259 for(
int j = 0; j < 12; ++j){
4260 MPI_Pack(&info[j],1,MPI::BOOL,sendBuffers[p].commBuffer,buffSize,&sendBuffers[p].pos,comm);
4262 userData.
gather(sendBuffers[p],i);
4264 ft += nofElementsFromPreviousToSuccessive;
4265 globalFirstTail += nofElementsFromPreviousToSuccessive;
4266 tailSize -= nofElementsFromPreviousToSuccessive;
4274 vector<Class_Array> recvs(nproc);
4275 recvs[rank] =
Class_Array((uint32_t)sendBuffers.size()+1,-1);
4276 recvs[rank].array[0] = rank;
4278 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4279 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4280 recvs[rank].array[counter] = sit->first;
4283 int* nofRecvsPerProc =
new int[nproc];
4284 error_flag = MPI_Allgather(&recvs[rank].arraySize,1,MPI_INT,nofRecvsPerProc,1,MPI_INT,comm);
4285 int globalRecvsBuffSize = 0;
4286 int* displays =
new int[nproc];
4287 for(
int pp = 0; pp < nproc; ++pp){
4289 globalRecvsBuffSize += nofRecvsPerProc[pp];
4290 for(
int ppp = 0; ppp < pp; ++ppp){
4291 displays[pp] += nofRecvsPerProc[ppp];
4294 int* globalRecvsBuff =
new int[globalRecvsBuffSize];
4295 error_flag = MPI_Allgatherv(recvs[rank].array,recvs[rank].arraySize,MPI_INT,globalRecvsBuff,nofRecvsPerProc,displays,MPI_INT,comm);
4297 vector<set<int> > sendersPerProc(nproc);
4298 for(
int pin = 0; pin < nproc; ++pin){
4299 for(
int k = displays[pin]+1; k < displays[pin] + nofRecvsPerProc[pin]; ++k){
4300 sendersPerProc[globalRecvsBuff[k]].insert(globalRecvsBuff[displays[pin]]);
4305 MPI_Request* req =
new MPI_Request[sendBuffers.size()+sendersPerProc[rank].size()];
4306 MPI_Status* stats =
new MPI_Status[sendBuffers.size()+sendersPerProc[rank].size()];
4308 map<int,int> recvBufferSizePerProc;
4309 set<int>::iterator senditend = sendersPerProc[rank].end();
4310 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4311 recvBufferSizePerProc[*sendit] = 0;
4312 error_flag = MPI_Irecv(&recvBufferSizePerProc[*sendit],1,MPI_UINT32_T,*sendit,rank,comm,&req[nReq]);
4315 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4316 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4317 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4320 MPI_Waitall(nReq,req,stats);
4325 uint32_t nofNewHead = 0;
4326 uint32_t nofNewTail = 0;
4327 map<int,Class_Comm_Buffer> recvBuffers;
4329 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4330 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4335 for(set<int>::iterator sendit = sendersPerProc[rank].begin(); sendit != senditend; ++sendit){
4336 error_flag = MPI_Irecv(recvBuffers[*sendit].commBuffer,recvBuffers[*sendit].commBufferSize,MPI_PACKED,*sendit,rank,comm,&req[nReq]);
4339 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4340 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4343 MPI_Waitall(nReq,req,stats);
4346 map<int,uint32_t> nofNewOverProcs;
4347 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
4348 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4349 uint32_t nofNewPerProc;
4350 MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofNewPerProc,1,MPI_UINT32_T,comm);
4351 nofNewOverProcs[rbit->first] = nofNewPerProc;
4352 if(rbit->first < rank)
4353 nofNewHead += nofNewPerProc;
4354 else if(rbit->first > rank)
4355 nofNewTail += nofNewPerProc;
4359 uint32_t resEnd = octree.getNumOctants() - tailOffset;
4360 uint32_t nofResidents = resEnd - headOffset;
4361 uint32_t octCounter = 0;
4362 for(uint32_t i = headOffset; i < resEnd; ++i){
4363 octree.octants[octCounter] = octree.octants[i];
4365 userData.
move(i,octCounter);
4368 uint32_t newCounter = nofNewHead + nofNewTail + nofResidents;
4369 octree.octants.resize(newCounter);
4370 userData.
resize(newCounter);
4372 uint32_t resCounter = nofNewHead + nofResidents - 1;
4373 for(uint32_t k = 0; k < nofResidents ; ++k){
4374 octree.octants[resCounter - k] = octree.octants[nofResidents - k - 1];
4376 userData.
move(nofResidents - k - 1,resCounter - k);
4381 bool jumpResident =
false;
4383 for(map<int,Class_Comm_Buffer>::iterator rbit = recvBuffers.begin(); rbit != rbitend; ++rbit){
4385 uint32_t nofNewPerProc = nofNewOverProcs[rbit->first];
4386 if(rbit->first > rank && !jumpResident){
4387 newCounter += nofResidents ;
4388 jumpResident =
true;
4390 for(
int i = nofNewPerProc - 1; i >= 0; --i){
4391 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&x,1,MPI_UINT32_T,comm);
4392 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&y,1,MPI_UINT32_T,comm);
4393 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&l,1,MPI_UINT8_T,comm);
4395 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&m,1,MPI_INT8_T,comm);
4396 octree.octants[newCounter].setMarker(m);
4397 for(
int j = 0; j < 12; ++j){
4398 error_flag = MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&info[j],1,MPI::BOOL,comm);
4399 octree.octants[newCounter].info[j] = info[j];
4402 userData.
scatter(rbit->second,newCounter);
4406 #if defined(__INTEL_COMPILER) || defined(__ICC)
4408 octree.octants.shrink_to_fit();
4412 delete [] newPartitionRangeGlobalidx; newPartitionRangeGlobalidx = NULL;
4413 delete [] nofRecvsPerProc; nofRecvsPerProc = NULL;
4414 delete [] displays; displays = NULL;
4415 delete [] req; req = NULL;
4416 delete [] stats; stats = NULL;
4417 delete [] globalRecvsBuff; globalRecvsBuff = NULL;
4420 updateLoadBalance();
4422 uint32_t nofGhosts = getNumGhosts();
4426 delete [] partition;
4431 log.writeLog(
" Final Parallel partition : ");
4432 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(0))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[0]+1)));
4433 for(
int ii=1; ii<nproc; ii++){
4434 log.writeLog(
" Octants for proc "+ to_string(static_cast<unsigned long long>(ii))+
" : " + to_string(static_cast<unsigned long long>(partition_range_globalidx[ii]-partition_range_globalidx[ii-1])));
4437 log.writeLog(
"---------------------------------------------");
4445 void updateAdapt() {
4450 max_depth = octree.local_max_depth;
4451 global_num_octants = octree.getNumOctants();
4452 for(
int p = 0; p < nproc; ++p){
4453 partition_range_globalidx[p] = global_num_octants - 1;
4460 error_flag = MPI_Allreduce(&octree.local_max_depth,&max_depth,1,MPI_UINT8_T,MPI_MAX,comm);
4462 uint64_t local_num_octants = octree.getNumOctants();
4463 error_flag = MPI_Allreduce(&local_num_octants,&global_num_octants,1,MPI_UINT64_T,MPI_SUM,comm);
4465 uint64_t* rbuff =
new uint64_t[nproc];
4466 error_flag = MPI_Allgather(&local_num_octants,1,MPI_UINT64_T,rbuff,1,MPI_UINT64_T,comm);
4467 for(
int p = 0; p < nproc; ++p){
4468 partition_range_globalidx[p] = 0;
4469 for(
int pp = 0; pp <=p; ++pp)
4470 partition_range_globalidx[p] += rbuff[pp];
4471 --partition_range_globalidx[p];
4474 uint64_t lastDescMorton = octree.getLastDesc().
computeMorton();
4475 error_flag = MPI_Allgather(&lastDescMorton,1,MPI_UINT64_T,partition_last_desc,1,MPI_UINT64_T,comm);
4476 uint64_t firstDescMorton = octree.getFirstDesc().
computeMorton();
4477 error_flag = MPI_Allgather(&firstDescMorton,1,MPI_UINT64_T,partition_first_desc,1,MPI_UINT64_T,comm);
4478 delete [] rbuff; rbuff = NULL;
4485 void updateAfterCoarse(){
4495 uint64_t lastDescMortonPre, firstDescMortonPost;
4496 lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4497 firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4498 octree.checkCoarse(lastDescMortonPre, firstDescMortonPost);
4506 void updateAfterCoarse(u32vector & mapidx){
4516 uint64_t lastDescMortonPre, firstDescMortonPost;
4517 lastDescMortonPre = (rank!=0) * partition_last_desc[rank-1];
4518 firstDescMortonPost = (rank<nproc-1)*partition_first_desc[rank+1] + (rank==nproc-1)*partition_last_desc[rank];
4519 octree.checkCoarse(lastDescMortonPre, firstDescMortonPost, mapidx);
4539 map<int,Class_Comm_Buffer> sendBuffers;
4540 map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
4541 uint32_t pbordersOversize = 0;
4542 for(map<
int,vector<uint32_t> >::iterator bit = bordersPerProc.begin(); bit != bitend; ++bit){
4543 pbordersOversize += bit->second.size();
4544 int buffSize = bit->second.size() * (int)ceil((
double)(global2D.
markerBytes + global2D.
boolBytes) / (
double)(CHAR_BIT/8));
4545 int key = bit->first;
4546 const vector<uint32_t> & value = bit->second;
4549 int nofBorders = value.size();
4550 for(
int i = 0; i < nofBorders; ++i){
4554 mod = octant.info[11];
4555 error_flag = MPI_Pack(&marker,1,MPI_INT8_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4556 error_flag = MPI_Pack(&mod,1,MPI::BOOL,sendBuffers[key].commBuffer,buffSize,&pos,comm);
4563 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
4564 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
4566 map<int,int> recvBufferSizePerProc;
4567 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
4568 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4569 recvBufferSizePerProc[sit->first] = 0;
4570 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
4573 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
4574 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4575 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
4578 MPI_Waitall(nReq,req,stats);
4584 uint32_t nofBytesOverProc = 0;
4585 map<int,Class_Comm_Buffer> recvBuffers;
4586 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
4587 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
4591 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
4592 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
4593 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
4596 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
4597 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
4600 MPI_Waitall(nReq,req,stats);
4605 uint32_t ghostCounter = 0;
4606 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
4607 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
4609 int nofGhostsPerProc = int(rrit->second.commBufferSize / ((uint32_t) (global2D.
markerBytes + global2D.
boolBytes)));
4610 for(
int i = 0; i < nofGhostsPerProc; ++i){
4611 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&marker,1,MPI_INT8_T,comm);
4612 octree.ghosts[ghostCounter].setMarker(marker);
4613 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&mod,1,MPI::BOOL,comm);
4614 octree.ghosts[ghostCounter].info[11] = mod;
4618 recvBuffers.clear();
4619 sendBuffers.clear();
4620 recvBufferSizePerProc.clear();
4621 delete [] req; req = NULL;
4622 delete [] stats; stats = NULL;
4629 void balance21(
bool const first){
4631 bool globalDone =
true, localDone =
false;
4635 octree.preBalance21(
true);
4638 log.writeLog(
"---------------------------------------------");
4639 log.writeLog(
" 2:1 BALANCE (balancing Marker before Adapt)");
4641 log.writeLog(
" Iterative procedure ");
4643 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4647 localDone = octree.localBalance(
true);
4649 octree.preBalance21(
false);
4651 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4655 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4657 localDone = octree.localBalance(
false);
4659 octree.preBalance21(
false);
4660 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4664 log.writeLog(
" Iteration : Finalizing ");
4671 log.writeLog(
" 2:1 Balancing reached ");
4673 log.writeLog(
"---------------------------------------------");
4680 localDone = octree.localBalanceAll(
true);
4682 octree.preBalance21(
false);
4684 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4689 localDone = octree.localBalanceAll(
false);
4691 octree.preBalance21(
false);
4692 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4703 bool localDone =
false;
4706 octree.preBalance21(
true);
4709 log.writeLog(
"---------------------------------------------");
4710 log.writeLog(
" 2:1 BALANCE (balancing Marker before Adapt)");
4712 log.writeLog(
" Iterative procedure ");
4714 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4717 localDone = octree.localBalance(
true);
4718 octree.preBalance21(
false);
4722 log.writeLog(
" Iteration : " + to_string(static_cast<unsigned long long>(iteration)));
4723 localDone = octree.localBalance(
false);
4724 octree.preBalance21(
false);
4727 log.writeLog(
" Iteration : Finalizing ");
4732 log.writeLog(
" 2:1 Balancing reached ");
4734 log.writeLog(
"---------------------------------------------");
4739 localDone = octree.localBalanceAll(
true);
4740 octree.preBalance21(
false);
4744 localDone = octree.localBalanceAll(
false);
4745 octree.preBalance21(
false);
4762 bool globalDone =
false, localDone =
false, cDone =
false;
4763 uint32_t nocts = octree.getNumOctants();
4764 vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
4766 for (iter = octree.octants.begin(); iter != iterend; iter++){
4767 iter->info[8] =
false;
4768 iter->info[9] =
false;
4769 iter->info[11] =
false;
4774 log.writeLog(
"---------------------------------------------");
4775 log.writeLog(
" ADAPT (Refine/Coarse)");
4782 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4785 while(octree.refine());
4787 if (octree.getNumOctants() > nocts)
4789 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4790 nocts = octree.getNumOctants();
4794 while(octree.coarse());
4795 updateAfterCoarse();
4799 if (octree.getNumOctants() < nocts){
4802 nocts = octree.getNumOctants();
4804 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
4807 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4810 log.writeLog(
"---------------------------------------------");
4814 log.writeLog(
"---------------------------------------------");
4815 log.writeLog(
" ADAPT (Refine/Coarse)");
4822 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4825 while(octree.refine());
4826 if (octree.getNumOctants() > nocts)
4830 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4831 nocts = octree.getNumOctants();
4834 while(octree.coarse());
4835 updateAfterCoarse();
4841 if (octree.getNumOctants() < nocts){
4844 nocts = octree.getNumOctants();
4847 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4848 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4850 log.writeLog(
"---------------------------------------------");
4852 status += globalDone;
4855 status += localDone;
4866 bool adapt_mapidx() {
4869 bool globalDone =
false, localDone =
false;
4870 uint32_t nocts = octree.getNumOctants();
4871 vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
4873 for (iter = octree.octants.begin(); iter != iterend; iter++){
4874 iter->info[8] =
false;
4875 iter->info[9] =
false;
4876 iter->info[11] =
false;
4881 mapidx.resize(nocts);
4882 #if defined(__INTEL_COMPILER) || defined(__ICC)
4884 mapidx.shrink_to_fit();
4886 for (uint32_t i=0; i<nocts; i++){
4892 log.writeLog(
"---------------------------------------------");
4893 log.writeLog(
" ADAPT (Refine/Coarse)");
4900 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4903 while(octree.refine(mapidx));
4905 if (octree.getNumOctants() > nocts)
4907 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
4908 nocts = octree.getNumOctants();
4912 while(octree.coarse(mapidx));
4913 updateAfterCoarse(mapidx);
4917 if (octree.getNumOctants() < nocts){
4920 nocts = octree.getNumOctants();
4922 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
4925 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4928 log.writeLog(
"---------------------------------------------");
4932 log.writeLog(
"---------------------------------------------");
4933 log.writeLog(
" ADAPT (Refine/Coarse)");
4940 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4943 while(octree.refine(mapidx));
4944 if (octree.getNumOctants() > nocts)
4948 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4949 nocts = octree.getNumOctants();
4953 while(octree.coarse(mapidx));
4954 updateAfterCoarse(mapidx);
4960 if (octree.getNumOctants() < nocts){
4963 nocts = octree.getNumOctants();
4966 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
4967 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
4969 log.writeLog(
"---------------------------------------------");
4988 done = adapt_mapidx();
5011 done = adapt_mapidx();
5029 void getMapping(uint32_t & idx, u32vector & mapper, vector<bool> & isghost){
5031 uint32_t i, nocts = getNumOctants();
5032 uint32_t nghbro = octree.last_ghost_bros.size();;
5037 mapper.push_back(mapidx[idx]);
5038 isghost.push_back(
false);
5039 if (getIsNewC(idx)){
5040 if (idx < nocts-1 || !nghbro){
5042 mapper.push_back(mapidx[idx]+i);
5043 isghost.push_back(
false);
5046 else if (idx == nocts-1 && nghbro){
5047 for (i=1; i<global2D.
nchildren-nghbro; i++){
5048 mapper.push_back(mapidx[idx]+i);
5049 isghost.push_back(
false);
5051 for (i=0; i<nghbro; i++){
5052 mapper.push_back(octree.last_ghost_bros[i]);
5053 isghost.push_back(
true);
5065 bool globalDone =
false, localDone =
false, cDone =
false;
5066 uint32_t nocts = octree.getNumOctants();
5067 vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5069 for (iter = octree.octants.begin(); iter != iterend; iter++){
5070 iter->info[8] =
false;
5071 iter->info[9] =
false;
5072 iter->info[11] =
false;
5077 log.writeLog(
"---------------------------------------------");
5078 log.writeLog(
" ADAPT (GlobalRefine)");
5082 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5085 while(octree.globalRefine());
5087 if (octree.getNumOctants() > nocts)
5089 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5090 nocts = octree.getNumOctants();
5095 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5098 log.writeLog(
"---------------------------------------------");
5102 log.writeLog(
"---------------------------------------------");
5103 log.writeLog(
" ADAPT (Global Refine)");
5107 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5110 while(octree.globalRefine());
5111 if (octree.getNumOctants() > nocts)
5115 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5116 nocts = octree.getNumOctants();
5119 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5121 log.writeLog(
"---------------------------------------------");
5140 bool globalDone =
false, localDone =
false;
5141 uint32_t nocts = octree.getNumOctants();
5142 vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5144 for (iter = octree.octants.begin(); iter != iterend; iter++){
5145 iter->info[8] =
false;
5146 iter->info[9] =
false;
5147 iter->info[11] =
false;
5152 mapidx.resize(nocts);
5153 #if defined(__INTEL_COMPILER) || defined(__ICC)
5155 mapidx.shrink_to_fit();
5157 for (uint32_t i=0; i<nocts; i++){
5163 log.writeLog(
"---------------------------------------------");
5164 log.writeLog(
" ADAPT (Global Refine)");
5168 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5171 while(octree.globalRefine(mapidx));
5173 if (octree.getNumOctants() > nocts)
5175 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5176 nocts = octree.getNumOctants();
5181 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5184 log.writeLog(
"---------------------------------------------");
5188 log.writeLog(
"---------------------------------------------");
5189 log.writeLog(
" ADAPT (Global Refine)");
5193 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5196 while(octree.globalRefine(mapidx));
5197 if (octree.getNumOctants() > nocts)
5201 log.writeLog(
" Number of octants after Refine : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5202 nocts = octree.getNumOctants();
5205 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5207 log.writeLog(
"---------------------------------------------");
5220 bool globalDone =
false, localDone =
false, cDone =
false;
5221 uint32_t nocts = octree.getNumOctants();
5222 vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5224 for (iter = octree.octants.begin(); iter != iterend; iter++){
5225 iter->info[8] =
false;
5226 iter->info[9] =
false;
5227 iter->info[11] =
false;
5232 log.writeLog(
"---------------------------------------------");
5233 log.writeLog(
" ADAPT (Global Coarse)");
5240 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5243 while(octree.globalCoarse());
5244 updateAfterCoarse();
5246 while(octree.refine());
5248 if (octree.getNumOctants() < nocts){
5251 nocts = octree.getNumOctants();
5253 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5256 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5259 log.writeLog(
"---------------------------------------------");
5263 log.writeLog(
"---------------------------------------------");
5264 log.writeLog(
" ADAPT (Global Coarse)");
5271 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5274 while(octree.globalCoarse());
5275 updateAfterCoarse();
5278 while(octree.refine());
5281 if (octree.getNumOctants() < nocts){
5284 nocts = octree.getNumOctants();
5287 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5288 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5290 log.writeLog(
"---------------------------------------------");
5309 bool globalDone =
false, localDone =
false;
5310 uint32_t nocts = octree.getNumOctants();
5311 vector<Class_Octant<2> >::iterator iter, iterend = octree.octants.end();
5313 for (iter = octree.octants.begin(); iter != iterend; iter++){
5314 iter->info[8] =
false;
5315 iter->info[9] =
false;
5316 iter->info[11] =
false;
5321 mapidx.resize(nocts);
5322 #if defined(__INTEL_COMPILER) || defined(__ICC)
5324 mapidx.shrink_to_fit();
5326 for (uint32_t i=0; i<nocts; i++){
5332 log.writeLog(
"---------------------------------------------");
5333 log.writeLog(
" ADAPT (Global Coarse)");
5340 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(octree.getNumOctants())));
5343 while(octree.globalCoarse(mapidx));
5344 updateAfterCoarse(mapidx);
5346 while(octree.refine(mapidx));
5348 if (octree.getNumOctants() < nocts){
5351 nocts = octree.getNumOctants();
5353 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(nocts)));
5356 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5359 log.writeLog(
"---------------------------------------------");
5363 log.writeLog(
"---------------------------------------------");
5364 log.writeLog(
" ADAPT (Global Coarse)");
5371 log.writeLog(
" Initial Number of octants : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5374 while(octree.globalCoarse(mapidx));
5375 updateAfterCoarse(mapidx);
5378 while(octree.refine(mapidx));
5381 if (octree.getNumOctants() < nocts){
5384 nocts = octree.getNumOctants();
5387 error_flag = MPI_Allreduce(&localDone,&globalDone,1,MPI::BOOL,MPI_LOR,comm);
5388 log.writeLog(
" Number of octants after Coarse : " + to_string(static_cast<unsigned long long>(global_num_octants)));
5390 log.writeLog(
"---------------------------------------------");
5403 template<
class Impl>
5406 map<int,Class_Comm_Buffer> sendBuffers;
5407 size_t fixedDataSize = userData.
fixedSize();
5408 map<int,vector<uint32_t> >::iterator bitend = bordersPerProc.end();
5409 map<int,vector<uint32_t> >::iterator bitbegin = bordersPerProc.begin();
5410 for(map<
int,vector<uint32_t> >::iterator bit = bitbegin; bit != bitend; ++bit){
5411 const int & key = bit->first;
5412 const vector<uint32_t> & pborders = bit->second;
5413 size_t buffSize = 0;
5414 size_t nofPbordersPerProc = pborders.size();
5415 if(fixedDataSize != 0){
5416 buffSize = fixedDataSize*nofPbordersPerProc;
5419 for(
size_t i = 0; i < nofPbordersPerProc; ++i){
5420 buffSize += userData.
size(pborders[i]);
5424 buffSize +=
sizeof(int);
5428 MPI_Pack(&nofPbordersPerProc,1,MPI_INT,sendBuffers[key].commBuffer,sendBuffers[key].commBufferSize,&sendBuffers[key].pos,comm);
5431 for(
size_t j = 0; j < nofPbordersPerProc; ++j){
5432 userData.
gather(sendBuffers[key],pborders[j]);
5437 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
5438 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
5440 map<int,int> recvBufferSizePerProc;
5441 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5442 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5443 recvBufferSizePerProc[sit->first] = 0;
5444 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5447 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5448 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5449 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5452 MPI_Waitall(nReq,req,stats);
5455 map<int,Class_Comm_Buffer> recvBuffers;
5456 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5457 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5461 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5462 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5465 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5466 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5469 MPI_Waitall(nReq,req,stats);
5472 int ghostOffset = 0;
5473 map<int,Class_Comm_Buffer>::iterator rbitend = recvBuffers.end();
5474 map<int,Class_Comm_Buffer>::iterator rbitbegin = recvBuffers.begin();
5475 for(map<int,Class_Comm_Buffer>::iterator rbit = rbitbegin; rbit != rbitend; ++rbit){
5476 int nofGhostFromThisProc = 0;
5477 MPI_Unpack(rbit->second.commBuffer,rbit->second.commBufferSize,&rbit->second.pos,&nofGhostFromThisProc,1,MPI_INT,comm);
5478 for(
int k = 0; k < nofGhostFromThisProc; ++k){
5479 userData.
scatter(rbit->second, k+ghostOffset);
5481 ghostOffset += nofGhostFromThisProc;
5483 delete [] req; req = NULL;
5484 delete [] stats; stats = NULL;
5493 octree.computeConnectivity();
5501 octree.clearConnectivity();
5509 octree.updateConnectivity();
5517 octree.computeghostsConnectivity();
5525 octree.clearghostsConnectivity();
5533 octree.updateghostsConnectivity();
5541 return octree.nodes.size();
5550 return octree.connectivity;
5559 return octree.ghostsconnectivity;
5568 return octree.connectivity[idx];
5577 return octree.connectivity[getIdx(oct)];
5587 return octree.ghostsconnectivity[idx];
5597 return octree.ghostsconnectivity[getIdx(oct)];
5606 return octree.nodes;
5616 return octree.nodes[inode];
5625 return octree.ghostsnodes;
5635 vector<double> coords(3,0);
5636 coords[0] = trans.
mapX(octree.nodes[inode][0]);
5637 coords[1] = trans.
mapY(octree.nodes[inode][1]);
5638 coords[2] = trans.
mapZ(octree.nodes[inode][2]);
5649 return octree.ghostsnodes[inode];
5659 vector<double> coords(3,0);
5660 coords[0] = trans.
mapX(octree.ghostsnodes[inode][0]);
5661 coords[1] = trans.
mapY(octree.ghostsnodes[inode][1]);
5662 coords[2] = trans.
mapZ(octree.ghostsnodes[inode][2]);
5679 vector<pair<pair<uint32_t, uint32_t>, pair<int, int> > > mapper;
5680 uint64_t morton2 = 0, morton1 = 0, mortonlastdesc = 0, mortonfirstdesc = 0;
5681 uint32_t idx1 = 0, idx2 = 0;
5682 uint32_t nocts = octree.getNumOctants();
5683 uint32_t nocts2 = ptree.
octree.getNumOctants();
5685 mapper.resize(nocts);
5689 for (uint32_t i=0; i<nocts; i++){
5690 mapper[i].first.first = idx1;
5691 mapper[i].first.second = idx2;
5692 mapper[i].second.first = rank;
5693 mapper[i].second.second = rank;
5694 mortonfirstdesc = octree.octants[i].computeMorton();
5695 mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5696 while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5697 mapper[i].first.first = idx1;
5706 while(morton2 <= mortonlastdesc && idx2 < nocts2){
5707 mapper[i].first.second = idx2;
5720 map<int,vector<uint64_t> > FirstMortonperproc, SecondMortonperproc;
5721 map<int,vector<uint64_t> > FirstMortonReceived, SecondMortonReceived;
5722 map<int,vector<uint32_t> > FirstIndexperproc, SecondIndexperproc;
5723 map<int,vector<uint32_t> > FirstLocalIndex, SecondLocalIndex;
5728 for (uint32_t i=0; i<nocts; i++){
5729 mortonfirstdesc = octree.octants[i].computeMorton();
5730 owner = ptree.
findOwner(mortonfirstdesc);
5732 mapper[i].second.first = rank;
5733 while(morton1 <= mortonfirstdesc && idx1 < nocts2){
5734 mapper[i].first.first = idx1;
5743 mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5744 owner = ptree.
findOwner(mortonlastdesc);
5746 mapper[i].second.second = rank;
5747 mapper[i].first.second = idx2;
5748 while(morton2 <= mortonlastdesc && idx2 < nocts2){
5749 mapper[i].first.second = idx2;
5760 mapper[i].second.second = owner;
5761 SecondMortonperproc[owner].push_back(mortonfirstdesc);
5762 SecondLocalIndex[owner].push_back(i);
5766 mapper[i].second.first = owner;
5767 FirstMortonperproc[owner].push_back(mortonfirstdesc);
5768 FirstLocalIndex[owner].push_back(i);
5769 mortonlastdesc = octree.octants[i].buildLastDesc().computeMorton();
5770 owner = ptree.
findOwner(mortonlastdesc);
5772 mapper[i].second.second = rank;
5773 mapper[i].first.second = idx2;
5774 while(morton2 <= mortonlastdesc && idx2 < nocts2){
5775 mapper[i].first.second = idx2;
5786 mapper[i].second.second = owner;
5787 SecondMortonperproc[owner].push_back(mortonfirstdesc);
5788 SecondLocalIndex[owner].push_back(i);
5796 for(
int iproc=0; iproc<nproc; iproc++){
5797 FirstMortonperproc[iproc].push_back(-1);
5798 SecondMortonperproc[iproc].push_back(-1);
5804 map<int,Class_Comm_Buffer> sendBuffers;
5805 map<int,vector<uint64_t> >::iterator bitend = FirstMortonperproc.end();
5806 for(map<
int,vector<uint64_t> >::iterator bit = FirstMortonperproc.begin(); bit != bitend; ++bit){
5807 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint64_t)) / (double)(CHAR_BIT/8));
5808 int key = bit->first;
5809 vector<uint64_t> & value = bit->second;
5812 int nofMortons = value.size();
5813 for(
int i = 0; i < nofMortons; ++i){
5815 uint64_t Morton = value[i];
5816 error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
5823 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
5824 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
5826 map<int,int> recvBufferSizePerProc;
5827 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5828 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5829 recvBufferSizePerProc[sit->first] = 0;
5830 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5833 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5834 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5835 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5838 MPI_Waitall(nReq,req,stats);
5844 uint32_t nofBytesOverProc = 0;
5845 map<int,Class_Comm_Buffer> recvBuffers;
5846 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5847 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5851 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5852 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5853 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5856 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5857 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5860 MPI_Waitall(nReq,req,stats);
5865 uint32_t Mortoncounter = 0;
5866 uint64_t Morton = 0;
5867 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
5868 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
5870 int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint64_t)));
5871 for(
int i = 0; i < nofMortonPerProc-1; ++i){
5872 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
5873 FirstMortonReceived[rrit->first].push_back(Morton);
5878 recvBuffers.clear();
5879 sendBuffers.clear();
5880 recvBufferSizePerProc.clear();
5881 delete [] req; req = NULL;
5882 delete [] stats; stats = NULL;
5888 map<int,Class_Comm_Buffer> sendBuffers;
5889 map<int,vector<uint64_t> >::iterator bitend = SecondMortonperproc.end();
5890 uint32_t pbordersOversize = 0;
5891 for(map<
int,vector<uint64_t> >::iterator bit = SecondMortonperproc.begin(); bit != bitend; ++bit){
5892 pbordersOversize += bit->second.size();
5893 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint64_t)) / (double)(CHAR_BIT/8));
5894 int key = bit->first;
5895 vector<uint64_t> & value = bit->second;
5898 int nofMortons = value.size();
5899 for(
int i = 0; i < nofMortons; ++i){
5901 uint64_t Morton = value[i];
5902 error_flag = MPI_Pack(&Morton,1,MPI_UINT64_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
5909 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
5910 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
5912 map<int,int> recvBufferSizePerProc;
5913 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
5914 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5915 recvBufferSizePerProc[sit->first] = 0;
5916 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
5919 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
5920 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5921 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
5924 MPI_Waitall(nReq,req,stats);
5930 uint32_t nofBytesOverProc = 0;
5931 map<int,Class_Comm_Buffer> recvBuffers;
5932 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
5933 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
5937 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
5938 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
5939 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
5942 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
5943 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
5946 MPI_Waitall(nReq,req,stats);
5951 uint32_t Mortoncounter = 0;
5952 uint64_t Morton = 0;
5953 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
5954 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
5956 int nofMortonPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint64_t)));
5957 for(
int i = 0; i < nofMortonPerProc-1; ++i){
5958 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Morton,1,MPI_UINT64_T,comm);
5959 SecondMortonReceived[rrit->first].push_back(Morton);
5963 recvBuffers.clear();
5964 sendBuffers.clear();
5965 recvBufferSizePerProc.clear();
5966 delete [] req; req = NULL;
5967 delete [] stats; stats = NULL;
5971 for (
int iproc=0; iproc<nproc; iproc++){
5972 vector<Class_Octant<2> >::iterator oend = octree.octants.end();
5973 vector<Class_Octant<2> >::iterator obegin = octree.octants.begin();
5974 vector<Class_Octant<2> >::iterator it = obegin;
5975 int nmortons = FirstMortonReceived[iproc].size();
5976 FirstIndexperproc[iproc].resize(nmortons);
5977 for (
int idx=0; idx<nmortons; idx++){
5978 FirstIndexperproc[iproc][idx] = octree.getNumOctants()-1;
5980 mortonfirstdesc = FirstMortonReceived[iproc][idx];
5981 morton1 = it->computeMorton();
5982 while(morton1 <= mortonfirstdesc && it != oend){
5984 FirstIndexperproc[iproc][idx] = idx1;
5987 morton1 = it->computeMorton();
5998 for (
int iproc=0; iproc<nproc; iproc++){
5999 vector<Class_Octant<2> >::iterator oend = octree.octants.end();
6000 vector<Class_Octant<2> >::iterator obegin = octree.octants.begin();
6001 vector<Class_Octant<2> >::iterator it = obegin;
6002 int nmortons = SecondMortonReceived[iproc].size();
6003 SecondIndexperproc[iproc].resize(nmortons);
6004 for (
int idx=0; idx<nmortons; idx++){
6005 SecondIndexperproc[iproc][idx] = octree.getNumOctants()-1;
6007 mortonlastdesc = SecondMortonReceived[iproc][idx];
6008 morton2 = it->computeMorton();
6009 while(morton2 <= mortonlastdesc && it != oend){
6010 SecondIndexperproc[iproc][idx] = idx2;
6014 morton2 = it->computeMorton();
6024 for(
int iproc=0; iproc<nproc; iproc++){
6025 FirstIndexperproc[iproc].push_back(-1);
6026 SecondIndexperproc[iproc].push_back(-1);
6032 map<int,Class_Comm_Buffer> sendBuffers;
6033 map<int,vector<uint32_t> >::iterator bitend = FirstIndexperproc.end();
6034 for(map<
int,vector<uint32_t> >::iterator bit = FirstIndexperproc.begin(); bit != bitend; ++bit){
6035 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6036 int key = bit->first;
6037 vector<uint32_t> & value = bit->second;
6040 int nofIndices = value.size();
6041 for(
int i = 0; i < nofIndices; ++i){
6043 uint32_t Index = value[i];
6044 error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6051 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
6052 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
6054 map<int,int> recvBufferSizePerProc;
6055 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6056 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6057 recvBufferSizePerProc[sit->first] = 0;
6058 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6061 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6062 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6063 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6066 MPI_Waitall(nReq,req,stats);
6072 uint32_t nofBytesOverProc = 0;
6073 map<int,Class_Comm_Buffer> recvBuffers;
6074 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6075 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6079 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6080 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6081 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6084 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6085 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6088 MPI_Waitall(nReq,req,stats);
6093 uint32_t Indexcounter = 0;
6095 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6096 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6098 int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint32_t)));
6099 for(
int i = 0; i < nofIndexPerProc-1; ++i){
6100 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6101 mapper[FirstLocalIndex[rrit->first][i]].first.first = Index;
6106 recvBuffers.clear();
6107 sendBuffers.clear();
6108 recvBufferSizePerProc.clear();
6109 delete [] req; req = NULL;
6110 delete [] stats; stats = NULL;
6115 map<int,Class_Comm_Buffer> sendBuffers;
6116 map<int,vector<uint32_t> >::iterator bitend = SecondIndexperproc.end();
6117 uint32_t pbordersOversize = 0;
6118 for(map<
int,vector<uint32_t> >::iterator bit = SecondIndexperproc.begin(); bit != bitend; ++bit){
6119 pbordersOversize += bit->second.size();
6120 int buffSize = bit->second.size() * (int)ceil((
double)(
sizeof(uint32_t)) / (double)(CHAR_BIT/8));
6121 int key = bit->first;
6122 vector<uint32_t> & value = bit->second;
6125 int nofIndices = value.size();
6126 for(
int i = 0; i < nofIndices; ++i){
6128 uint64_t Index = value[i];
6129 error_flag = MPI_Pack(&Index,1,MPI_UINT32_T,sendBuffers[key].commBuffer,buffSize,&pos,comm);
6136 MPI_Request* req =
new MPI_Request[sendBuffers.size()*2];
6137 MPI_Status* stats =
new MPI_Status[sendBuffers.size()*2];
6139 map<int,int> recvBufferSizePerProc;
6140 map<int,Class_Comm_Buffer>::iterator sitend = sendBuffers.end();
6141 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6142 recvBufferSizePerProc[sit->first] = 0;
6143 error_flag = MPI_Irecv(&recvBufferSizePerProc[sit->first],1,MPI_UINT32_T,sit->first,rank,comm,&req[nReq]);
6146 map<int,Class_Comm_Buffer>::reverse_iterator rsitend = sendBuffers.rend();
6147 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6148 error_flag = MPI_Isend(&rsit->second.commBufferSize,1,MPI_UINT32_T,rsit->first,rsit->first,comm,&req[nReq]);
6151 MPI_Waitall(nReq,req,stats);
6157 uint32_t nofBytesOverProc = 0;
6158 map<int,Class_Comm_Buffer> recvBuffers;
6159 map<int,int>::iterator ritend = recvBufferSizePerProc.end();
6160 for(map<int,int>::iterator rit = recvBufferSizePerProc.begin(); rit != ritend; ++rit){
6164 for(map<int,Class_Comm_Buffer>::iterator sit = sendBuffers.begin(); sit != sitend; ++sit){
6165 nofBytesOverProc += recvBuffers[sit->first].commBufferSize;
6166 error_flag = MPI_Irecv(recvBuffers[sit->first].commBuffer,recvBuffers[sit->first].commBufferSize,MPI_PACKED,sit->first,rank,comm,&req[nReq]);
6169 for(map<int,Class_Comm_Buffer>::reverse_iterator rsit = sendBuffers.rbegin(); rsit != rsitend; ++rsit){
6170 error_flag = MPI_Isend(rsit->second.commBuffer,rsit->second.commBufferSize,MPI_PACKED,rsit->first,rsit->first,comm,&req[nReq]);
6173 MPI_Waitall(nReq,req,stats);
6178 uint32_t Indexcounter = 0;
6180 map<int,Class_Comm_Buffer>::iterator rritend = recvBuffers.end();
6181 for(map<int,Class_Comm_Buffer>::iterator rrit = recvBuffers.begin(); rrit != rritend; ++rrit){
6183 int nofIndexPerProc = int(rrit->second.commBufferSize / (uint32_t) (
sizeof(uint32_t)));
6184 for(
int i = 0; i < nofIndexPerProc-1; ++i){
6185 error_flag = MPI_Unpack(rrit->second.commBuffer,rrit->second.commBufferSize,&pos,&Index,1,MPI_UINT32_T,comm);
6186 mapper[SecondLocalIndex[rrit->first][i]].first.second = Index;
6190 recvBuffers.clear();
6191 sendBuffers.clear();
6192 recvBufferSizePerProc.clear();
6193 delete [] req; req = NULL;
6194 delete [] stats; stats = NULL;
6212 if (octree.connectivity.size() == 0) {
6213 octree.computeConnectivity();
6218 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6220 ofstream out(name.str().c_str());
6223 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6224 log.writeLog(ss.str());
6227 int nofNodes = octree.nodes.size();
6228 int nofGhostNodes = octree.ghostsnodes.size();
6229 int nofOctants = octree.connectivity.size();
6230 int nofGhosts = octree.ghostsconnectivity.size();
6231 int nofAll = nofGhosts + nofOctants;
6232 out <<
"<?xml version=\"1.0\"?>" << endl
6233 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6234 <<
" <UnstructuredGrid>" << endl
6235 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() <<
"\">" << endl;
6236 out <<
" <Points>" << endl
6237 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6238 <<
" " << std::fixed;
6239 for(
int i = 0; i < nofNodes; i++)
6241 for(
int j = 0; j < 3; ++j)
6242 out << std::setprecision(6) << octree.nodes[i][j] <<
" ";
6243 if((i+1)%4==0 && i!=nofNodes-1)
6246 for(
int i = 0; i < nofGhostNodes; i++)
6248 for(
int j = 0; j < 3; ++j)
6249 out << std::setprecision(6) << octree.ghostsnodes[i][j] <<
" ";
6250 if((i+1)%4==0 && i!=nofNodes-1)
6253 out << endl <<
" </DataArray>" << endl
6254 <<
" </Points>" << endl
6255 <<
" <Cells>" << endl
6256 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6258 for(
int i = 0; i < nofOctants; i++)
6260 for(
int j = 0; j < global2D.
nnodes; j++)
6272 out << octree.connectivity[i][jj] <<
" ";
6274 if((i+1)%3==0 && i!=nofOctants-1)
6277 for(
int i = 0; i < nofGhosts; i++)
6279 for(
int j = 0; j < global2D.
nnodes; j++)
6291 out << octree.ghostsconnectivity[i][jj] + nofNodes <<
" ";
6293 if((i+1)%3==0 && i!=nofGhosts-1)
6296 out << endl <<
" </DataArray>" << endl
6297 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6299 for(
int i = 0; i < nofAll; i++)
6301 out << (i+1)*global2D.
nnodes <<
" ";
6302 if((i+1)%12==0 && i!=nofAll-1)
6305 out << endl <<
" </DataArray>" << endl
6306 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6308 for(
int i = 0; i < nofAll; i++)
6313 if((i+1)%12==0 && i!=nofAll-1)
6316 out << endl <<
" </DataArray>" << endl
6317 <<
" </Cells>" << endl
6318 <<
" </Piece>" << endl
6319 <<
" </UnstructuredGrid>" << endl
6320 <<
"</VTKFile>" << endl;
6325 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6326 ofstream pout(name.str().c_str());
6327 if(!pout.is_open()){
6329 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6330 log.writeLog(ss.str());
6334 pout <<
"<?xml version=\"1.0\"?>" << endl
6335 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6336 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6337 <<
" <PPointData>" << endl
6338 <<
" </PPointData>" << endl
6339 <<
" <PCellData Scalars=\"\">" << endl;
6340 pout <<
" </PCellData>" << endl
6341 <<
" <PPoints>" << endl
6342 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6343 <<
" </PPoints>" << endl;
6344 for(
int i = 0; i < nproc; i++)
6345 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6346 pout <<
" </PUnstructuredGrid>" << endl
6357 octree.clearConnectivity();
6373 if (octree.connectivity.size() == 0) {
6374 octree.computeConnectivity();
6379 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6381 ofstream out(name.str().c_str());
6384 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6385 log.writeLog(ss.str());
6388 int nofNodes = octree.nodes.size();
6389 int nofGhostNodes = octree.ghostsnodes.size();
6390 int nofOctants = octree.connectivity.size();
6391 int nofGhosts = octree.ghostsconnectivity.size();
6392 int nofAll = nofGhosts + nofOctants;
6393 out <<
"<?xml version=\"1.0\"?>" << endl
6394 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6395 <<
" <UnstructuredGrid>" << endl
6396 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() + octree.ghostsconnectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() + octree.ghostsnodes.size() <<
"\">" << endl;
6397 out <<
" <Points>" << endl
6398 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6399 <<
" " << std::fixed;
6400 for(
int i = 0; i < nofNodes; i++)
6402 for(
int j = 0; j < 3; ++j){
6403 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.nodes[i][j]) <<
" ";
6404 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.nodes[i][j]) <<
" ";
6405 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.nodes[i][j]) <<
" ";
6407 if((i+1)%4==0 && i!=nofNodes-1)
6410 for(
int i = 0; i < nofGhostNodes; i++)
6412 for(
int j = 0; j < 3; ++j){
6413 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.ghostsnodes[i][j]) <<
" ";
6414 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.ghostsnodes[i][j]) <<
" ";
6415 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.ghostsnodes[i][j]) <<
" ";
6417 if((i+1)%4==0 && i!=nofNodes-1)
6420 out << endl <<
" </DataArray>" << endl
6421 <<
" </Points>" << endl
6422 <<
" <Cells>" << endl
6423 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6425 for(
int i = 0; i < nofOctants; i++)
6427 for(
int j = 0; j < global2D.
nnodes; j++)
6439 out << octree.connectivity[i][jj] <<
" ";
6441 if((i+1)%3==0 && i!=nofOctants-1)
6444 for(
int i = 0; i < nofGhosts; i++)
6446 for(
int j = 0; j < global2D.
nnodes; j++)
6458 out << octree.ghostsconnectivity[i][jj] + nofNodes <<
" ";
6460 if((i+1)%3==0 && i!=nofGhosts-1)
6463 out << endl <<
" </DataArray>" << endl
6464 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6466 for(
int i = 0; i < nofAll; i++)
6468 out << (i+1)*global2D.
nnodes <<
" ";
6469 if((i+1)%12==0 && i!=nofAll-1)
6472 out << endl <<
" </DataArray>" << endl
6473 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6475 for(
int i = 0; i < nofAll; i++)
6480 if((i+1)%12==0 && i!=nofAll-1)
6483 out << endl <<
" </DataArray>" << endl
6484 <<
" </Cells>" << endl
6485 <<
" </Piece>" << endl
6486 <<
" </UnstructuredGrid>" << endl
6487 <<
"</VTKFile>" << endl;
6492 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6493 ofstream pout(name.str().c_str());
6494 if(!pout.is_open()){
6496 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6497 log.writeLog(ss.str());
6501 pout <<
"<?xml version=\"1.0\"?>" << endl
6502 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6503 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6504 <<
" <PPointData>" << endl
6505 <<
" </PPointData>" << endl
6506 <<
" <PCellData Scalars=\"\">" << endl;
6507 pout <<
" </PCellData>" << endl
6508 <<
" <PPoints>" << endl
6509 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6510 <<
" </PPoints>" << endl;
6511 for(
int i = 0; i < nproc; i++)
6512 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6513 pout <<
" </PUnstructuredGrid>" << endl
6535 if (octree.connectivity.size() == 0) {
6536 octree.computeConnectivity();
6541 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-p" << std::setfill(
'0') << std::setw(4) << rank <<
"-" << filename <<
".vtu";
6543 ofstream out(name.str().c_str());
6546 ss << filename <<
"*.vtu cannot be opened and it won't be written.";
6547 log.writeLog(ss.str());
6550 int nofNodes = octree.nodes.size();
6551 int nofOctants = octree.connectivity.size();
6552 int nofAll = nofOctants;
6553 out <<
"<?xml version=\"1.0\"?>" << endl
6554 <<
"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6555 <<
" <UnstructuredGrid>" << endl
6556 <<
" <Piece NumberOfCells=\"" << octree.connectivity.size() <<
"\" NumberOfPoints=\"" << octree.nodes.size() <<
"\">" << endl;
6557 out <<
" <CellData Scalars=\"Data\">" << endl;
6558 out <<
" <DataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6559 <<
" " << std::fixed;
6560 int ndata = octree.connectivity.size();
6561 for(
int i = 0; i < ndata; i++)
6563 out << std::setprecision(6) << data[i] <<
" ";
6564 if((i+1)%4==0 && i!=ndata-1)
6567 out << endl <<
" </DataArray>" << endl
6568 <<
" </CellData>" << endl
6569 <<
" <Points>" << endl
6570 <<
" <DataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\""<< 3 <<
"\" format=\"ascii\">" << endl
6571 <<
" " << std::fixed;
6572 for(
int i = 0; i < nofNodes; i++)
6574 for(
int j = 0; j < 3; ++j){
6575 if (j==0) out << std::setprecision(6) << trans.
mapX(octree.nodes[i][j]) <<
" ";
6576 if (j==1) out << std::setprecision(6) << trans.
mapY(octree.nodes[i][j]) <<
" ";
6577 if (j==2) out << std::setprecision(6) << trans.
mapZ(octree.nodes[i][j]) <<
" ";
6579 if((i+1)%4==0 && i!=nofNodes-1)
6582 out << endl <<
" </DataArray>" << endl
6583 <<
" </Points>" << endl
6584 <<
" <Cells>" << endl
6585 <<
" <DataArray type=\"UInt64\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6587 for(
int i = 0; i < nofOctants; i++)
6589 for(
int j = 0; j < global2D.
nnodes; j++)
6601 out << octree.connectivity[i][jj] <<
" ";
6603 if((i+1)%3==0 && i!=nofOctants-1)
6606 out << endl <<
" </DataArray>" << endl
6607 <<
" <DataArray type=\"UInt64\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6609 for(
int i = 0; i < nofAll; i++)
6611 out << (i+1)*global2D.
nnodes <<
" ";
6612 if((i+1)%12==0 && i!=nofAll-1)
6615 out << endl <<
" </DataArray>" << endl
6616 <<
" <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"ascii\">" << endl
6618 for(
int i = 0; i < nofAll; i++)
6623 if((i+1)%12==0 && i!=nofAll-1)
6626 out << endl <<
" </DataArray>" << endl
6627 <<
" </Cells>" << endl
6628 <<
" </Piece>" << endl
6629 <<
" </UnstructuredGrid>" << endl
6630 <<
"</VTKFile>" << endl;
6635 name <<
"s" << std::setfill(
'0') << std::setw(4) << nproc <<
"-" << filename <<
".pvtu";
6636 ofstream pout(name.str().c_str());
6637 if(!pout.is_open()){
6639 ss << filename <<
"*.pvtu cannot be opened and it won't be written.";
6640 log.writeLog(ss.str());
6644 pout <<
"<?xml version=\"1.0\"?>" << endl
6645 <<
"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">" << endl
6646 <<
" <PUnstructuredGrid GhostLevel=\"0\">" << endl
6647 <<
" <PPointData>" << endl
6648 <<
" </PPointData>" << endl
6649 <<
" <PCellData Scalars=\"Data\">" << endl
6650 <<
" <PDataArray type=\"Float64\" Name=\"Data\" NumberOfComponents=\"1\"/>" << endl
6651 <<
" </PCellData>" << endl
6652 <<
" <PPoints>" << endl
6653 <<
" <PDataArray type=\"Float64\" Name=\"Coordinates\" NumberOfComponents=\"3\"/>" << endl
6654 <<
" </PPoints>" << endl;
6655 for(
int i = 0; i < nproc; i++)
6656 pout <<
" <Piece Source=\"s" << std::setw(4) << std::setfill(
'0') << nproc <<
"-p" << std::setw(4) << std::setfill(
'0') << i <<
"-" << filename <<
".vtu\"/>" << endl;
6657 pout <<
" </PUnstructuredGrid>" << endl
void setBalance(bool balance)
uint8_t getLevel(Class_Octant< 2 > *oct)
double getZ(Class_Octant< 2 > *oct)
double getArea(uint32_t idx)
void getCenter(uint32_t idx, vector< double > ¢er)
Class_Local_Tree< 2 > octree
Global variables used in PABLO - 2D specialization.
double getSize(uint32_t idx)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, uint8_t &level)
Bundle char container for communications.
void clearGhostsConnectivity()
bool getPbound(Class_Intersection< 2 > *inter)
Class_Octant< 2 > * getLogicalPointOwner(dvector &point)
bool getPbound(Class_Octant< 2 > *oct, uint8_t iface)
void setMarker(uint32_t idx, int8_t marker)
Parallel Octree Manager Class.
dvector2D getNodes(Class_Intersection< 2 > *inter)
Class_Octant< 2 > * getOctant(uint32_t idx)
void updateGhostsConnectivity()
void writeTest(string filename, vector< double > data)
void findNeighbours(Class_Octant< 2 > *oct, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
u32vector getGhostNodeLogicalCoordinates(uint32_t inode)
bool getBalance(Class_Octant< 2 > *oct)
void assign(uint32_t stride, uint32_t length)
const Class_Global< 2 > & getGlobal()
Base class for data communications.
Parallel Octree Manager Class - 2D specialization.
uint32_t getNumIntersections()
Octant class definition - 2D specialization.
uint64_t * partition_last_desc
uint64_t computeMorton() const
void getFaceCenter(uint32_t idx, uint8_t iface, vector< double > ¢er)
void findNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours, vector< bool > &isghost)
double getX(Class_Octant< 2 > *oct)
void setMarker(Class_Octant< 2 > *oct, int8_t marker)
u32vector getNodeLogicalCoordinates(uint32_t inode)
bool getIsNewR(uint32_t idx)
bool getBound(Class_Octant< 2 > *oct)
bool adapt(bool mapper_flag)
int8_t getMarker(Class_Octant< 2 > *oct)
bool getIsNewC(Class_Octant< 2 > *oct)
u32vector getOwners(Class_Intersection< 2 > *inter)
uint8_t getBalanceCodimension() const
dvector getFaceCenter(uint8_t iface)
void findGhostNeighbours(uint32_t idx, uint8_t iface, uint8_t codim, u32vector &neighbours)
bool getPbound(Class_Octant< 2 > *oct)
void setMarker(int8_t marker)
void resizeGhost(uint32_t newSize)
dvector getGhostNodeCoordinates(uint32_t inode)
Local octree portion for each process - 2D specialization.
bool getIsGhost(uint32_t idx)
void computeGhostsConnectivity()
const u32vector2D & getNodes()
double mapSize(uint32_t const &size)
uint32_t getPointOwnerIdx(u32vector &point)
bool getIsNewC(uint32_t idx)
dvector getNodeCoordinates(uint32_t inode)
void getNodes(u32vector2D &nodes)
uint64_t * partition_first_desc
uint64_t getVolume() const
void scatter(Buffer &buff, const uint32_t e)
double mapArea(uint64_t const &area)
bool getBound(uint8_t face) const
void getNormal(Class_Octant< 2 > *oct, uint8_t &iface, dvector &normal)
void getNodes(Class_Octant< 2 > *oct, dvector2D &nodes)
uint8_t getLocalMaxDepth() const
void getNormal(uint8_t &iface, vector< int8_t > &normal)
vector< double > getCenter(Class_Intersection< 2 > *inter)
void writeLogical(string filename)
bool getPbound(uint8_t face) const
uint64_t getGlobalIdx(Class_Octant< 2 > *oct)
void getFaceCenter(Class_Octant< 2 > *oct, uint8_t iface, vector< double > ¢er)
uint32_t getNumOctants() const
bool getBound(Class_Octant< 2 > *oct, uint8_t iface)
uint32_t getLogicalPointOwnerIdx(dvector &point)
dvector getNormal(uint32_t idx, uint8_t &iface)
double getSize(Class_Octant< 2 > *oct)
vector< double > getFaceCenter(Class_Octant< 2 > *oct, uint8_t iface)
void getNormal(uint32_t idx, uint8_t &iface, dvector &normal)
vector< double > getCenter(Class_Octant< 2 > *oct)
Base class for data communications.
Class_Para_Tree(double X, double Y, double Z, double L, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
Class_Octant< 2 > * getPointOwner(dvector &point)
uint8_t getFace(Class_Intersection< 2 > *inter)
void mapNodesIntersection(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
Customized array definition.
vector< double > getFaceCenter(uint32_t idx, uint8_t iface)
bool getBound(Class_Intersection< 2 > *inter)
Class_Para_Tree(string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
size_t size(const uint32_t e) const
void setBalance(uint32_t idx, bool balance)
void setBalanceCodimension(uint8_t b21codim)
bool getBalance(uint32_t idx)
Class_Para_Tree(double X, double Y, double Z, double L, ivector2D &XY, ivector &levels, string logfile="PABLO.log", MPI_Comm comm_=MPI_COMM_WORLD)
void scatter(Buffer &buff, const uint32_t e)
void mapCenter(double *¢er, vector< double > &mapcenter)
void mapNodes(uint32_t(*nodes)[3], vector< vector< double > > &mapnodes)
dvector2D getNodes(uint32_t idx)
uint32_t getNumGhosts() const
uint8_t getLevel(Class_Intersection< 2 > *inter)
vector< double > getCenter(uint32_t idx)
void getMapping(uint32_t &idx, u32vector &mapper, vector< bool > &isghost)
void move(const uint32_t from, const uint32_t to)
uint32_t getIdx(Class_Octant< 2 > *oct)
dvector getNormal(Class_Intersection< 2 > *inter)
u32vector getOctantConnectivity(uint32_t idx)
Intersection class definition - 2D specialization.
void mapNode(vector< uint32_t > &node, vector< double > &mapnode)
u32vector getGhostOctantConnectivity(uint32_t idx)
double getVolume(uint32_t idx)
Class_Intersection< 2 > * getIntersection(uint32_t idx)
uint64_t * partition_range_globalidx
void updateConnectivity()
vector< pair< pair< uint32_t, uint32_t >, pair< int, int > > > mapPablos(Class_Para_Tree< 2 > &ptree)
u32vector getOctantConnectivity(Class_Octant< 2 > *oct)
void gather(Buffer &buff, const uint32_t e)
void communicate(Class_Data_Comm_Interface< Impl > &userData)
int8_t getMarker(uint32_t idx)
double getY(Class_Octant< 2 > *oct)
void loadBalance(Class_Data_LB_Interface< Impl > &userData, dvector *weight=NULL)
uint32_t getPointOwnerIdx(dvector &point)
bool getIsNewR(Class_Octant< 2 > *oct)
const u32vector2D & getGhostConnectivity()
double getArea(Class_Octant< 2 > *oct)
void resize(uint32_t newSize)
Local octree portion for each process.
void write(string filename)
void gather(Buffer &buff, const uint32_t e)
Class_Octant< 2 > * getGhostOctant(uint32_t idx)
dvector2D getNodes(Class_Octant< 2 > *oct)
const u32vector2D & getGhostNodes()
bool adapt(u32vector &mapper)
void loadBalance(uint8_t &level)
uint64_t global_num_octants
bool getFiner(Class_Intersection< 2 > *inter)
double getY(uint32_t idx)
double getArea(Class_Intersection< 2 > *inter)
double getZ(uint32_t idx)
uint64_t getGhostGlobalIdx(uint32_t idx)
uint64_t getGlobalIdx(uint32_t idx)
const u32vector2D & getConnectivity()
double getSize(Class_Intersection< 2 > *inter)
bool adaptGlobalRefine(u32vector &mapidx)
map< int, vector< uint32_t > > bordersPerProc
size_t size(const uint32_t e) const
bool getIsGhost(Class_Octant< 2 > *oct)
vector< double > getNode(uint32_t idx, uint8_t inode)
bool getNotBalance() const
void setBalance(Class_Octant< 2 > *oct, bool balance)
void getCenter(Class_Octant< 2 > *oct, vector< double > ¢er)
Class_Octant< 2 > * getPointOwner(u32vector &point)
void getNode(uint32_t idx, uint8_t inode, vector< double > &node)
void mapNormals(vector< int8_t > normal, vector< double > &mapnormal)
double getX(uint32_t idx)
dvector getNormal(Class_Octant< 2 > *oct, uint8_t &iface)
u32vector getGhostOctantConnectivity(Class_Octant< 2 > *oct)
void computeConnectivity()
int findOwner(const uint64_t &morton)
double mapX(uint32_t const &X)
uint8_t getLevel(uint32_t idx)
void getNodes(uint32_t idx, dvector2D &nodes)
double mapZ(uint32_t const &Z)
double getVolume(Class_Octant< 2 > *oct)
void computeIntersections()
double mapY(uint32_t const &Y)
bool getIsGhost(Class_Intersection< 2 > *inter)
bool adaptGlobalCoarse(u32vector &mapidx)