/* * Copyright 1997, Regents of the University of Minnesota * * xyzpart.c * * This file contains code that implements a coordinate based partitioning * * Started 7/11/97 * George * * $Id: xyzpart.c 10755 2011-09-15 12:28:34Z karypis $ * */ #include /*************************************************************************/ /*! This function implements a simple coordinate based partitioning */ /*************************************************************************/ void Coordinate_Partition(ctrl_t *ctrl, graph_t *graph, idx_t ndims, real_t *xyz, idx_t setup) { idx_t i, j, k, nvtxs, firstvtx, icoord, nbits; idx_t *vtxdist, *bxyz; ikv_t *cand; WCOREPUSH; if (setup) CommSetup(ctrl, graph); else graph->nrecv = 0; nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; firstvtx = vtxdist[ctrl->mype]; cand = ikvwspacemalloc(ctrl, nvtxs); bxyz = iwspacemalloc(ctrl, nvtxs*ndims); /* Assign the coordinates into bins */ nbits = 9; /* 2^nbits # of bins */ IRBinCoordinates(ctrl, graph, ndims, xyz, 1<=0; j--) { for (k=0; knpes, mype=ctrl->mype; idx_t i, j, k, l, gnvtxs, nvtxs; idx_t csize, psize; idx_t *vtxdist, *lcounts, *gcounts; real_t gmin, gmax, *emarkers, *nemarkers; rkv_t *cand; WCOREPUSH; gnvtxs = graph->gnvtxs; nvtxs = graph->nvtxs; cand = rkvwspacemalloc(ctrl, nvtxs); lcounts = iwspacemalloc(ctrl, nbins); gcounts = iwspacemalloc(ctrl, nbins); emarkers = rwspacemalloc(ctrl, nbins+1); nemarkers = rwspacemalloc(ctrl, nbins+1); /* Go over each dimension */ for (k=0; kcomm); gkMPI_Allreduce((void *)&cand[nvtxs-1].key, (void *)&gmax, 1, REAL_T, MPI_MAX, ctrl->comm); for (i=0; icomm); /* if (mype == 0) { printf("Distribution [%"PRIDX"]...\n", l); for (i=0; i %"PRIDX"\n", emarkers[i], emarkers[i+1], gcounts[i]); } */ /* break-out if things look reasonably balanced */ if (imax(nbins, gcounts) < 4*gnvtxs/nbins) break; /* refine buckets */ rset(nbins, -1, nemarkers); for (j=0, i=0; inpes, mype=ctrl->mype; idx_t i, j, k, l, gnvtxs, nvtxs, cnbins; idx_t *vtxdist, *lcounts, *gcounts; real_t sum, gmin, gmax, gsum, *emarkers, *nemarkers, *lsums, *gsums; rkv_t *cand; ikv_t *buckets; WCOREPUSH; gnvtxs = graph->gnvtxs; nvtxs = graph->nvtxs; buckets = ikvwspacemalloc(ctrl, nbins); cand = rkvwspacemalloc(ctrl, nvtxs); lcounts = iwspacemalloc(ctrl, nbins); gcounts = iwspacemalloc(ctrl, nbins); lsums = rwspacemalloc(ctrl, nbins); gsums = rwspacemalloc(ctrl, nbins); emarkers = rwspacemalloc(ctrl, nbins+1); nemarkers = rwspacemalloc(ctrl, nbins+1); /* Go over each dimension */ for (k=0; kcomm); gkMPI_Allreduce((void *)&cand[nvtxs-1].key, (void *)&gmax, 1, REAL_T, MPI_MAX, ctrl->comm); gkMPI_Allreduce((void *)&sum, (void *)&gsum, 1, REAL_T, MPI_MAX, ctrl->comm); emarkers[0] = gmin; emarkers[1] = gsum/gnvtxs; emarkers[2] = gmax*(1.0+2.0*REAL_EPSILON); cnbins = 2; /* get into a iterative backet boundary refinement */ while (cnbins < nbins) { /* determine bucket counts */ iset(cnbins, 0, lcounts); rset(cnbins, 0, lsums); for (j=0, i=0; icomm); gkMPI_Allreduce((void *)lsums, (void *)gsums, cnbins, REAL_T, MPI_SUM, ctrl->comm); /* if (mype == 0) { printf("Distribution [%"PRIDX"]...\n", cnbins); for (i=0; i %"PRIDX"\n", emarkers[i], emarkers[i+1], gcounts[i]); } */ /* split over-weight buckets */ for (i=0; i=0; i--, j++) { l = buckets[i].val; if (buckets[i].key > gnvtxs/nbins && cnbins < nbins) { /* if (mype == 0) printf("\t\t %f %f\n", (float)emarkers[l], (float)emarkers[l+1]); */ nemarkers[j++] = (emarkers[l]+emarkers[l+1])/2; cnbins++; } nemarkers[j] = emarkers[l]; } PASSERT(ctrl, cnbins == j); rsorti(cnbins, nemarkers); rcopy(cnbins, nemarkers, emarkers); emarkers[cnbins] = gmax*(1.0+2.0*REAL_EPSILON); } /* assign the coordinate to the appropriate bin */ for (j=0, i=0; inpes, mype=ctrl->mype, firstvtx, lastvtx; idx_t *scounts, *rcounts, *vtxdist, *perm; ikv_t *relmnts, *mypicks, *allpicks; WCOREPUSH; CommUpdateNnbrs(ctrl, npes); nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; /* get memory for the counts */ scounts = iwspacemalloc(ctrl, npes+1); rcounts = iwspacemalloc(ctrl, npes+1); /* get memory for the splitters */ mypicks = ikvwspacemalloc(ctrl, npes+1); WCOREPUSH; /* for freeing allpicks */ allpicks = ikvwspacemalloc(ctrl, npes*npes); /* Sort the local elements */ ikvsorti(nvtxs, elmnts); /* Select the local npes-1 equally spaced elements */ for (i=1; icomm); /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */ /* Sort all the picks */ ikvsortii(npes*(npes-1), allpicks); /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */ /* Select the final splitters. Set the boundaries to simplify coding */ for (i=1; icomm); MAKECSR(i, npes, scounts); MAKECSR(i, npes, rcounts); /* PrintVector(ctrl, npes+1, 0, scounts, "Scounts"); PrintVector(ctrl, npes+1, 0, rcounts, "Rcounts"); */ /* Allocate memory for sorted elements and receive them */ nrecv = rcounts[npes]; relmnts = ikvwspacemalloc(ctrl, nrecv); /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); /* Issue the sends next */ for (i=0; icomm, ctrl->sreq+i); gkMPI_Waitall(npes, ctrl->rreq, ctrl->statuses); gkMPI_Waitall(npes, ctrl->sreq, ctrl->statuses); /* OK, now do the local sort of the relmnts. Use perm to keep track original order */ perm = iwspacemalloc(ctrl, nrecv); for (i=0; icomm); firstvtx = lastvtx-nrecv; /*myprintf(ctrl, "first, last: %"PRIDX" %"PRIDX"\n", firstvtx, lastvtx); */ for (j=0, i=0; i firstvtx) { /* Found the first PE that is passed me */ if (vtxdist[i+1] >= lastvtx) { /* myprintf(ctrl, "Shifting %"PRIDX" elements to processor %"PRIDX"\n", lastvtx-firstvtx, i); */ for (k=0; k= lastvtx) break; } /* Reverse the ordering on the relmnts[].val */ for (i=0; i=0 && relmnts[i].keycomm, ctrl->rreq+i); /* Issue the sends next */ for (i=0; icomm, ctrl->sreq+i); gkMPI_Waitall(npes, ctrl->rreq, ctrl->statuses); gkMPI_Waitall(npes, ctrl->sreq, ctrl->statuses); /* Construct a partition for the graph */ graph->where = imalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where"); firstvtx = vtxdist[mype]; for (i=0; i=0 && elmnts[i].key=vtxdist[mype] && elmnts[i].valwhere[elmnts[i].val-firstvtx] = elmnts[i].key; } WCOREPOP; } /**************************************************************************/ /*! This function sorts a distributed list of ikv_t in increasing order, and uses it to compute a partition. It uses a samplesort variant whose number of local samples can potentially be smaller than npes. */ /**************************************************************************/ void PseudoSampleSort(ctrl_t *ctrl, graph_t *graph, ikv_t *elmnts) { idx_t npes=ctrl->npes, mype=ctrl->mype; idx_t i, j, k, nlsamples, ntsamples, nvtxs, nrecv, firstvtx, lastvtx; idx_t *scounts, *rcounts, *sdispls, *rdispls, *vtxdist, *perm; ikv_t *relmnts, *mypicks, *allpicks; STARTTIMER(ctrl, ctrl->AuxTmr1); WCOREPUSH; nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; /* determine the number of local samples */ //nlsamples = (GlobalSESum(ctrl, graph->nedges) + graph->gnvtxs)/(npes*npes); nlsamples = graph->gnvtxs/(npes*npes); if (nlsamples > npes) nlsamples = npes; else if (nlsamples < 75) nlsamples = gk_min(75, npes); /* the 'npes' in the min is to account for small graphs */ IFSET(ctrl->dbglvl, DBG_INFO, rprintf(ctrl, "PseudoSampleSort: nlsamples=%"PRIDX" of %"PRIDX"\n", nlsamples, npes)); /* get memory for the counts and displacements */ scounts = iwspacemalloc(ctrl, npes+1); rcounts = iwspacemalloc(ctrl, npes+1); sdispls = iwspacemalloc(ctrl, npes+1); rdispls = iwspacemalloc(ctrl, npes+1); /* get memory for the splitters */ mypicks = ikvwspacemalloc(ctrl, npes+1); WCOREPUSH; /* for freeing allpicks */ allpicks = ikvwspacemalloc(ctrl, npes*nlsamples); /* Sort the local elements */ ikvsorti(nvtxs, elmnts); /* Select the local nlsamples-1 equally spaced elements */ for (i=0; i 0) { k = (nvtxs/(3*nlsamples) /* initial offset */ + i*nvtxs/nlsamples /* increament */ + mype*nvtxs/(npes*nlsamples) /* per-pe shift for nlsamplesAuxTmr1); STARTTIMER(ctrl, ctrl->AuxTmr2); /* Gather the picks to all the processors */ gkMPI_Allgather((void *)mypicks, 2*(nlsamples-1), IDX_T, (void *)allpicks, 2*(nlsamples-1), IDX_T, ctrl->comm); /* PrintPairs(ctrl, npes*(nlsamples-1), allpicks, "Allpicks"); */ /* Remove any samples that have .val == -1 */ for (ntsamples=0, i=0; iAuxTmr2); STARTTIMER(ctrl, ctrl->AuxTmr3); /* Compute the number of elements that belong to each bucket */ iset(npes, 0, scounts); for (j=i=0; icomm); /* multiply raw counts by 2 to account for the ikv_t type */ sdispls[0] = rdispls[0] = 0; for (i=0; iAuxTmr3); STARTTIMER(ctrl, ctrl->AuxTmr4); /* PrintVector(ctrl, npes+1, 0, scounts, "Scounts"); PrintVector(ctrl, npes+1, 0, rcounts, "Rcounts"); */ /* Allocate memory for sorted elements and receive them */ nrecv = rdispls[npes]/2; /* The divide by 2 is to get the # of ikv_t elements */ relmnts = ikvwspacemalloc(ctrl, nrecv); IFSET(ctrl->dbglvl, DBG_INFO, rprintf(ctrl, "PseudoSampleSort: max_nrecv: %"PRIDX" of %"PRIDX"\n", GlobalSEMax(ctrl, nrecv), graph->gnvtxs/npes)); if (mype == 0 || mype == npes-1) IFSET(ctrl->dbglvl, DBG_INFO, myprintf(ctrl, "PseudoSampleSort: nrecv: %"PRIDX" of %"PRIDX"\n", nrecv, graph->gnvtxs/npes)); gkMPI_Alltoallv((void *)elmnts, scounts, sdispls, IDX_T, (void *)relmnts, rcounts, rdispls, IDX_T, ctrl->comm); STOPTIMER(ctrl, ctrl->AuxTmr4); STARTTIMER(ctrl, ctrl->AuxTmr5); /* OK, now do the local sort of the relmnts. Use perm to keep track original order */ perm = iwspacemalloc(ctrl, nrecv); for (i=0; icomm); firstvtx = lastvtx-nrecv; for (j=0, i=0; i firstvtx) { /* Found the first PE that is passed me */ if (vtxdist[i+1] >= lastvtx) { /* myprintf(ctrl, "Shifting %"PRIDX" elements to processor %"PRIDX"\n", lastvtx-firstvtx, i); */ for (k=0; k= lastvtx) break; } /* Reverse the ordering on the relmnts[].val */ for (i=0; i=0 && relmnts[i].keyAuxTmr5); STARTTIMER(ctrl, ctrl->AuxTmr6); /* OK, now sent it back. The role of send/recv arrays is now reversed. */ gkMPI_Alltoallv((void *)relmnts, rcounts, rdispls, IDX_T, (void *)elmnts, scounts, sdispls, IDX_T, ctrl->comm); /* Construct a partition for the graph */ graph->where = imalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where"); firstvtx = vtxdist[mype]; for (i=0; i=0 && elmnts[i].key=vtxdist[mype] && elmnts[i].valwhere[elmnts[i].val-firstvtx] = elmnts[i].key; } WCOREPOP; STOPTIMER(ctrl, ctrl->AuxTmr6); }