/* * Copyright 1997, Regents of the University of Minnesota * * node_refine.c * * This file contains code that performs the k-way refinement * * Started 3/1/96 * George * * $Id: node_refine.c 10391 2011-06-23 19:00:08Z karypis $ */ #include /************************************************************************************/ /*! This function allocates the memory required for the nodeND refinement code. The only refinement-related information that is has is the \c graph->where vector and allocates the memory for the remaining of the refinement-related data-structures. */ /************************************************************************************/ void AllocateNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { idx_t nparts, nvtxs; idx_t *vwgt; NRInfoType *rinfo, *myrinfo; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); nvtxs = graph->nvtxs; nparts = ctrl->nparts; graph->nrinfo = (NRInfoType *)gk_malloc(sizeof(NRInfoType)*nvtxs, "AllocateNodePartitionParams: rinfo"); graph->lpwgts = imalloc(2*nparts, "AllocateNodePartitionParams: lpwgts"); graph->gpwgts = imalloc(2*nparts, "AllocateNodePartitionParams: gpwgts"); graph->sepind = imalloc(nvtxs, "AllocateNodePartitionParams: sepind"); /* Allocate additional memory for graph->vwgt in order to store the weights of the remote vertices */ vwgt = graph->vwgt; graph->vwgt = imalloc(nvtxs+graph->nrecv, "AllocateNodePartitionParams: graph->vwgt"); icopy(nvtxs, vwgt, graph->vwgt); gk_free((void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); } /************************************************************************************/ /*! This function computes the initial node refinment information for the parallel nodeND code. It requires that the required data-structures have already been allocated via a call to AllocateNodePartitionParams. */ /************************************************************************************/ void ComputeNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { idx_t i, j, nparts, nvtxs, nsep; idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind; idx_t *where; NRInfoType *rinfo, *myrinfo; idx_t me, other, otherwgt; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); nvtxs = graph->nvtxs; nparts = ctrl->nparts; vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; vwgt = graph->vwgt; where = graph->where; rinfo = graph->nrinfo; lpwgts = graph->lpwgts; gpwgts = graph->gpwgts; sepind = graph->sepind; /* Reset refinement data structures */ iset(2*nparts, 0, lpwgts); /* Send/Receive the where and vwgt information of interface vertices. */ CommInterfaceData(ctrl, graph, where, where+nvtxs); CommInterfaceData(ctrl, graph, vwgt, vwgt+nvtxs); /*------------------------------------------------------------ / Compute now the degrees /------------------------------------------------------------*/ for (nsep=i=0; i= 0 && me < 2*nparts); lpwgts[me] += vwgt[i]; if (me >= nparts) { /* If it is a separator vertex */ sepind[nsep++] = i; lpwgts[2*nparts-1] += vwgt[i]; /* Keep track of total separator weight */ myrinfo = rinfo+i; myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0; for (j=xadj[i]; jedegrees[other%2] += vwgt[adjncy[j]]; } } } graph->nsep = nsep; /* Finally, sum-up the partition weights */ gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); } /************************************************************************************/ /*! This function updates the node refinment information after a where[] change */ /************************************************************************************/ void UpdateNodePartitionParams(ctrl_t *ctrl, graph_t *graph) { idx_t i, j, nparts, nvtxs, nsep; idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind; idx_t *where; NRInfoType *rinfo, *myrinfo; idx_t me, other, otherwgt; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); nvtxs = graph->nvtxs; nparts = ctrl->nparts; vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; vwgt = graph->vwgt; where = graph->where; rinfo = graph->nrinfo; lpwgts = graph->lpwgts; gpwgts = graph->gpwgts; sepind = graph->sepind; /* Reset refinement data structures */ iset(2*nparts, 0, lpwgts); /* Send/Receive the where and vwgt information of interface vertices. */ CommInterfaceData(ctrl, graph, where, where+nvtxs); /*------------------------------------------------------------ / Compute now the degrees /------------------------------------------------------------*/ for (nsep=i=0; i= 0 && me < 2*nparts); lpwgts[me] += vwgt[i]; if (me >= nparts) { /* If it is a separator vertex */ sepind[nsep++] = i; lpwgts[2*nparts-1] += vwgt[i]; /* Keep track of total separator weight */ myrinfo = rinfo+i; myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0; for (j=xadj[i]; jedegrees[other%2] += vwgt[adjncy[j]]; } } } graph->nsep = nsep; /* Finally, sum-up the partition weights */ gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); } /************************************************************************************/ /*! This function performs k-way node-based refinement. The refinement is done concurrently for all the different partitions. It works because each of the partitions is disconnected from each other due to the removal of the previous level separators. This version uses a priority queue to order the nodes and incorporates gain updates from the local information within each inner iteration. The refinement exits when there is no improvement in two successive itereations in order to account for the fact that a 0 => 1 iteration may have no gain but a 1 => 0 iteration may have a gain. */ /************************************************************************************/ void KWayNodeRefine_Greedy(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac) { idx_t i, ii, iii, j, jj, k, pass, nvtxs, nrecv, firstvtx, lastvtx, otherlastvtx, side, c, cc, nmoves, nlupd, nsupd, nnbrs, nchanged, nsep, nzerogainiterations; idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; idx_t *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt; idx_t *where, *lpwgts, *gpwgts, *sepind; idx_t *peind, *recvptr, *sendptr; idx_t *update, *supdate, *rupdate, *pe_updates, *marker, *changed; idx_t *badmaxpwgt; ikv_t *swchanges, *rwchanges; idx_t *nupds_pe; NRInfoType *rinfo, *myrinfo; idx_t from, to, me, other, oldcut; rpq_t *queue; idx_t *inqueue; idx_t *rxadj, *radjncy; char title[1024]; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); WCOREPUSH; nvtxs = graph->nvtxs; nrecv = graph->nrecv; vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; vwgt = graph->vwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->nrinfo; lpwgts = graph->lpwgts; gpwgts = graph->gpwgts; nsep = graph->nsep; sepind = graph->sepind; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; badmaxpwgt = iwspacemalloc(ctrl, nparts); nupds_pe = iwspacemalloc(ctrl, npes); changed = iwspacemalloc(ctrl, nvtxs); update = iwspacemalloc(ctrl, nvtxs); marker = iset(nvtxs+nrecv, 0, iwspacemalloc(ctrl, nvtxs+nrecv)); inqueue = iwspacemalloc(ctrl, nvtxs+nrecv); rwchanges = ikvwspacemalloc(ctrl, graph->nrecv); swchanges = ikvwspacemalloc(ctrl, graph->nsend); supdate = iwspacemalloc(ctrl, graph->nrecv); rupdate = iwspacemalloc(ctrl, graph->nsend); queue = rpqCreate(nvtxs); for (i=0; i= 0) rxadj[k]++; } } MAKECSR(i, nrecv, rxadj); radjncy = iwspacemalloc(ctrl, rxadj[nrecv]); for (i=0; i= 0) radjncy[rxadj[k]++] = i; } } SHIFTCSR(i, nrecv, rxadj); IFSET(ctrl->dbglvl, DBG_REFINEINFO, PrintNodeBalanceInfo(ctrl, nparts, gpwgts, badmaxpwgt, "K-way sep-refinement")); c = GlobalSESum(ctrl, RandomInRange(2))%2; nzerogainiterations = 0; for (pass=0; passmincut; for (side=0; side<2; side++) { cc = (c+1)%2; #ifndef NDEBUG /* This is for debugging purposes */ for (ii=0, i=0; i= nparts) ii++; } PASSERT(ctrl, ii == nsep); #endif /* Put the separator nodes in queue */ rpqReset(queue); iset(nvtxs+nrecv, 0, inqueue); for (ii=0; ii= nparts); rpqInsert(queue, i, vwgt[i] - rinfo[i].edegrees[cc]); inqueue[i] = 1; } nlupd = nsupd = nmoves = nchanged = nsep = 0; while ((i = rpqGetTop(queue)) != -1) { inqueue[i] = 0; from = where[i]; PASSERT(ctrl, from >= nparts); /* It is a one-sided move so it will go to the other partition. Look at the comments in InitMultisection to understand the meaning of from%nparts */ to = from%nparts+c; /* where to move the separator node */ other = from%nparts+cc; /* the other partition involved in the 3-way view */ /* Go through the loop to see if gain is possible for the separator vertex */ if (gpwgts[to]+vwgt[i] <= badmaxpwgt[to] && vwgt[i] - rinfo[i].edegrees[cc] >= 0) { /* Update the where information of the vertex you moved */ where[i] = to; lpwgts[from] -= vwgt[i]; lpwgts[2*nparts-1] -= vwgt[i]; lpwgts[to] += vwgt[i]; gpwgts[to] += vwgt[i]; /* Update and record future updates */ for (j=xadj[i]; j= vwgt[ii]); rinfo[iii].edegrees[cc] -= vwgt[ii]; rpqUpdate(queue, iii, vwgt[iii]-rinfo[iii].edegrees[cc]); } } } else { /* remote vertices */ for (jj=rxadj[ii-nvtxs]; jj= vwgt[ii]); rinfo[iii].edegrees[cc] -= vwgt[ii]; rpqUpdate(queue, iii, vwgt[iii]-rinfo[iii].edegrees[cc]); } } } } /* Put the vertices adjacent to i that belong to either the separator or the cc partition into the update array */ if (marker[ii] == 0 && where[ii] != to) { marker[ii] = 1; if (iipexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } else { /* This node will remain in the separator for the next iteration */ sepind[nsep++] = i; } } /* myprintf(ctrl, "nmoves: %"PRIDX", nlupd: %"PRIDX", nsupd: %"PRIDX"\n", nmoves, nlupd, nsupd); */ IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%"PRIDX" %"PRIDX"], [%"PRIDX" %"PRIDX" %"PRIDX"]\n", pass, c, GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*----------------------------------------------------------------------- / Time to communicate with processors to send the vertices whose degrees / need to be updated. /-----------------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; icomm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; iimap[supdate[i]]; } isorti(nsupd, supdate); for (j=i=0; icomm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; istatuses+i, IDX_T, nupds_pe+i); gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the received to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; ipexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; lpwgts[where[i]] += vwgt[i]; lpwgts[2*nparts-1] += vwgt[i]; /* myprintf(ctrl, "Vertex %"PRIDX" moves into the separator from %"PRIDX" to %"PRIDX"\n", i+firstvtx, me, where[i]); */ } } /* Tell everybody interested what the new where[] info is for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges); /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii= nparts) { /* If it is a separator vertex */ /* myprintf(ctrl, "Updating %"PRIDX" %"PRIDX"\n", i+firstvtx, me); */ myrinfo = rinfo+i; myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0; for (j=xadj[i]; jedegrees[other%2] += vwgt[adjncy[j]]; } } } /* Finally, sum-up the partition weights */ gkMPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_T, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; sprintf(title, "\tTotalSep [%"PRIDX"]", c); IFSET(ctrl->dbglvl, DBG_REFINEINFO, PrintNodeBalanceInfo(ctrl, nparts, gpwgts, badmaxpwgt, title)); /* break out if there is no improvement in two successive inner iterations that can span successive outer iterations */ if (graph->mincut == oldcut) { if (++nzerogainiterations == 2) break; } else { nzerogainiterations = 0; } c = cc; } /* break out if there is no improvement in two successive inner iterations that can span successive outer iterations */ if (graph->mincut == oldcut && nzerogainiterations == 2) break; } rpqDestroy(queue); WCOREPOP; IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); } /************************************************************************************/ /*! This function performs k-way node-based refinement. The key difference between this and the previous routine is that the well-interior nodes are refined using a serial node-based refinement algortihm. */ /************************************************************************************/ void KWayNodeRefine2Phase(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac) { idx_t i, oldcut; oldcut = graph->mincut+1; for (i=0; imincut == oldcut) break; oldcut = graph->mincut; KWayNodeRefineInterior(ctrl, graph, 2, ubfrac); UpdateNodePartitionParams(ctrl, graph); if (graph->mincut == oldcut) break; oldcut = graph->mincut; } } /************************************************************************************/ /*! This function performs k-way node-based refinement of the interior nodes of the graph assigned to each processor using a serial node-refinement algorithm. */ /************************************************************************************/ void KWayNodeRefineInterior(ctrl_t *ctrl, graph_t *graph, idx_t npasses, real_t ubfrac) { idx_t i, j, k, ii, gnnz, gid, qsize; idx_t npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; idx_t nvtxs, *xadj, *adjncy, *vwgt, *where, *pexadj; idx_t gnvtxs, *gxadj, *gadjncy, *gvwgt, *gwhere, *ghmarker; idx_t *gmap, *gimap; idx_t *pptr, *pind; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr1)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); WCOREPUSH; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; where = graph->where; pexadj = graph->pexadj; gxadj = iwspacemalloc(ctrl, nvtxs+1); gvwgt = iwspacemalloc(ctrl, nvtxs); gadjncy = iwspacemalloc(ctrl, xadj[nvtxs]); gwhere = iwspacemalloc(ctrl, nvtxs); ghmarker = iwspacemalloc(ctrl, nvtxs); gmap = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); gimap = iwspacemalloc(ctrl, nvtxs); pptr = iset(2*nparts+1, 0, iwspacemalloc(ctrl, 2*nparts+1)); pind = iwspacemalloc(ctrl, nvtxs); /* Set pptr/pind to contain the vertices in each one of the partitions */ for (i=0; ilpwgts[nparts+gid] == 0) continue; /* a quick test to determine if there are sufficient non-interface separator nodes */ for (qsize=0, ii=pptr[nparts+gid]; ii= 2) break; } } if (ii == pptr[nparts+gid+1]) break; /* no need to proceed. not enough non-interface separator nodes */ /* compute the gmap/gimap and other node info for the extracted graph */ for (gnvtxs=0, ii=pptr[nparts+gid]; ii 0 ? gwhere[gnvtxs] : -1); } for (ii=pptr[gid]; ii 0 ? gwhere[gnvtxs] : -1); PASSERT(ctrl, gwhere[gnvtxs] >= 0 && gwhere[gnvtxs] <= 1); } gxadj[0]=0; gnvtxs=0; gnnz=0; /* go over the separator nodes */ for (ii=pptr[nparts+gid]; iidbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr1)); } /************************************************************************************/ /*! This function prints balance information for the parallel k-section refinement algorithm. */ /************************************************************************************/ void PrintNodeBalanceInfo(ctrl_t *ctrl, idx_t nparts, idx_t *gpwgts, idx_t *badmaxpwgt, char *title) { idx_t i; if (ctrl->mype == 0) { printf("%s: %"PRIDX", ", title, gpwgts[2*nparts-1]); for (i=0; icomm); }