/* * serial.c * * This file contains code that implements k-way refinement * * Started 7/28/97 * George * * $Id: serial.c 13927 2013-03-27 22:42:41Z karypis $ * */ #include <parmetislib.h> /************************************************************************* * This function computes the initial id/ed **************************************************************************/ void Mc_ComputeSerialPartitionParams(ctrl_t *ctrl, graph_t *graph, idx_t nparts) { idx_t i, j, k; idx_t nvtxs, nedges, ncon, mincut, me, other; idx_t *xadj, *adjncy, *adjwgt, *where; ckrinfo_t *myrinfo; cnbr_t *mynbrs; real_t *nvwgt, *npwgts; idx_t mype; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = rset(ncon*nparts, 0.0, graph->gnpwgts); PASSERT(ctrl, graph->ckrinfo != NULL); PASSERT(ctrl, ctrl->cnbrpool != NULL); memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); cnbrpoolReset(ctrl); /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ nedges = mincut = 0; for (i=0; i<nvtxs; i++) { me = where[i]; raxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1); myrinfo = graph->ckrinfo+i; for (j=xadj[i]; j<xadj[i+1]; j++) { if (me == where[adjncy[j]]) { myrinfo->id += adjwgt[j]; } else { myrinfo->ed += adjwgt[j]; } } mincut += myrinfo->ed; /* Time to compute the particular external degrees */ if (myrinfo->ed > 0) { myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); mynbrs = ctrl->cnbrpool + myrinfo->inbr; for (j=xadj[i]; j<xadj[i+1]; j++) { other = where[adjncy[j]]; if (me != other) { for (k=0; k<myrinfo->nnbrs; k++) { if (mynbrs[k].pid == other) { mynbrs[k].ed += adjwgt[j]; break; } } if (k == myrinfo->nnbrs) { mynbrs[k].pid = other; mynbrs[k].ed = adjwgt[j]; myrinfo->nnbrs++; } } } } else { myrinfo->inbr = -1; } } graph->mincut = mincut/2; return; } /************************************************************************* * This function performs k-way refinement **************************************************************************/ void Mc_SerialKWayAdaptRefine(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *home, real_t *orgubvec, idx_t npasses) { idx_t i, ii, iii, j, k; idx_t nvtxs, ncon, pass, nmoves; idx_t from, me, myhome, to, oldcut, gain, tmp; idx_t *xadj, *adjncy, *adjwgt; idx_t *where; real_t *npwgts, *nvwgt, *minwgt, *maxwgt, *ubvec; idx_t gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home; idx_t zero_gain, better_balance_ft, better_balance_tt; ikv_t *cand; idx_t mype; ckrinfo_t *myrinfo; cnbr_t *mynbrs; WCOREPUSH; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = graph->gnpwgts; /* Setup the weight intervals of the various subdomains */ cand = ikvwspacemalloc(ctrl, nvtxs); minwgt = rwspacemalloc(ctrl, nparts*ncon); maxwgt = rwspacemalloc(ctrl, nparts*ncon); ubvec = rwspacemalloc(ctrl, ncon); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); for (i=0; i<ncon; i++) ubvec[i] =gk_max(ubvec[i], orgubvec[i]); for (i=0; i<nparts; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = ubvec[j]/(real_t)nparts; minwgt[i*ncon+j] = ubvec[j]*(real_t)nparts; } } for (pass=0; pass<npasses; pass++) { oldcut = graph->mincut; for (i=0; i<nvtxs; i++) { cand[i].key = graph->ckrinfo[i].ed-graph->ckrinfo[i].id; cand[i].val = i; } ikvsortd(nvtxs, cand); nmoves = 0; for (iii=0; iii<nvtxs; iii++) { i = cand[iii].val; myrinfo = graph->ckrinfo+i; if (myrinfo->ed >= myrinfo->id) { from = where[i]; myhome = home[i]; nvwgt = graph->nvwgt+i*ncon; if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; mynbrs = ctrl->cnbrpool + myrinfo->inbr; for (k=myrinfo->nnbrs-1; k>=0; k--) { to = mynbrs[k].pid; gain = mynbrs[k].ed - myrinfo->id; if (gain >= 0 && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) { break; } } /* break out if you did not find a candidate */ if (k < 0) continue; for (j=k-1; j>=0; j--) { to = mynbrs[j].pid; going_home = (myhome == to); gain_is_same = (mynbrs[j].ed == mynbrs[k].ed); gain_is_greater = (mynbrs[j].ed > mynbrs[k].ed); fit_in_to = AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon); better_balance_ft = IsHBalanceBetterFT(ncon, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec); better_balance_tt = IsHBalanceBetterTT(ncon, npwgts+mynbrs[k].pid*ncon, npwgts+to*ncon,nvwgt,ubvec); if ( (gain_is_greater && (fit_in_to || better_balance_ft) ) || (gain_is_same && ( (fit_in_to && going_home) || better_balance_tt ) ) ) { k = j; } } to = mynbrs[k].pid; going_home = (myhome == to); zero_gain = (mynbrs[k].ed == myrinfo->id); fit_in_from = AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon); better_balance_ft = IsHBalanceBetterFT(ncon, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec); if (zero_gain && !going_home && !better_balance_ft && fit_in_from) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= mynbrs[k].ed-myrinfo->id; /* Update where, weight, and ID/ED information of the vertex you moved */ raxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; myrinfo->ed += myrinfo->id-mynbrs[k].ed; gk_SWAP(myrinfo->id, mynbrs[k].ed, tmp); if (mynbrs[k].ed == 0) mynbrs[k] = mynbrs[--myrinfo->nnbrs]; else mynbrs[k].pid = from; /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = graph->ckrinfo+ii; if (myrinfo->inbr == -1) { myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[ii+1]-xadj[ii]+1); myrinfo->nnbrs = 0; } mynbrs = ctrl->cnbrpool + myrinfo->inbr; if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution of the ed from 'from' */ if (me != from) { for (k=0; k<myrinfo->nnbrs; k++) { if (mynbrs[k].pid == from) { if (mynbrs[k].ed == adjwgt[j]) mynbrs[k] = mynbrs[--myrinfo->nnbrs]; else mynbrs[k].ed -= adjwgt[j]; break; } } } /* Add contribution of the ed to 'to' */ if (me != to) { for (k=0; k<myrinfo->nnbrs; k++) { if (mynbrs[k].pid == to) { mynbrs[k].ed += adjwgt[j]; break; } } if (k == myrinfo->nnbrs) { mynbrs[k].pid = to; mynbrs[k].ed = adjwgt[j]; myrinfo->nnbrs++; } } } nmoves++; } } if (graph->mincut == oldcut) break; } WCOREPOP; return; } /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ idx_t AreAllHVwgtsBelow(idx_t ncon, real_t alpha, real_t *vwgt1, real_t beta, real_t *vwgt2, real_t *limit) { idx_t i; for (i=0; i<ncon; i++) if (alpha*vwgt1[i] + beta*vwgt2[i] > limit[i]) return 0; return 1; } /************************************************************************* * This function computes the load imbalance over all the constrains * For now assume that we just want balanced partitionings **************************************************************************/ void ComputeHKWayLoadImbalance(idx_t ncon, idx_t nparts, real_t *npwgts, real_t *lbvec) { idx_t i, j; real_t max; for (i=0; i<ncon; i++) { max = 0.0; for (j=0; j<nparts; j++) { if (npwgts[j*ncon+i] > max) max = npwgts[j*ncon+i]; } lbvec[i] = max*nparts; } } /************************************************************** * This subroutine remaps a partitioning on a single processor **************************************************************/ void SerialRemap(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *base, idx_t *scratch, idx_t *remap, real_t *tpwgts) { idx_t i, ii, j, k; idx_t nvtxs, nmapped, max_mult; idx_t from, to, current_from, smallcount, bigcount; ikv_t *flowto, *bestflow; i2kv_t *sortvtx; idx_t *vsize, *htable, *map, *rowmap; WCOREPUSH; nvtxs = graph->nvtxs; vsize = graph->vsize; max_mult = gk_min(MAX_NPARTS_MULTIPLIER, nparts); sortvtx = (i2kv_t *)wspacemalloc(ctrl, nvtxs*sizeof(i2kv_t)); flowto = ikvwspacemalloc(ctrl, nparts); bestflow = ikvwspacemalloc(ctrl, nparts*max_mult); map = htable = iset(2*nparts, -1, iwspacemalloc(ctrl, 2*nparts)); rowmap = map+nparts; for (i=0; i<nvtxs; i++) { sortvtx[i].key1 = base[i]; sortvtx[i].key2 = vsize[i]; sortvtx[i].val = i; } qsort((void *)sortvtx, (size_t)nvtxs, (size_t)sizeof(i2kv_t), SSMIncKeyCmp); for (j=0; j<nparts; j++) { flowto[j].key = 0; flowto[j].val = j; } /* this step has nparts*nparts*log(nparts) computational complexity */ bigcount = smallcount = current_from = 0; for (ii=0; ii<nvtxs; ii++) { i = sortvtx[ii].val; from = base[i]; to = scratch[i]; if (from > current_from) { /* reset the hash table */ for (j=0; j<smallcount; j++) htable[flowto[j].val] = -1; ASSERT(isum(nparts, htable, 1) == -nparts); ikvsorti(smallcount, flowto); for (j=0; j<gk_min(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } smallcount = 0; current_from = from; } if (htable[to] == -1) { htable[to] = smallcount; flowto[smallcount].key = -vsize[i]; flowto[smallcount].val = to; smallcount++; } else { flowto[htable[to]].key += -vsize[i]; } } /* reset the hash table */ for (j=0; j<smallcount; j++) htable[flowto[j].val] = -1; ASSERT(isum(nparts, htable, 1) == -nparts); ikvsorti(smallcount, flowto); for (j=0; j<gk_min(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } ikvsorti(bigcount, bestflow); ASSERT(isum(nparts, map, 1) == -nparts); ASSERT(isum(nparts, rowmap, 1) == -nparts); nmapped = 0; /* now make as many assignments as possible */ for (ii=0; ii<bigcount; ii++) { i = bestflow[ii].val; j = i % nparts; /* to */ k = i / nparts; /* from */ if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; } if (nmapped == nparts) break; } /* remap the rest */ /* it may help try remapping to the same label first */ if (nmapped < nparts) { for (j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (ii=0; ii<nparts; ii++) { i = (j+ii) % nparts; if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) for (i=0; i<nparts; i++) map[i] = i; for (i=0; i<nvtxs; i++) remap[i] = map[remap[i]]; WCOREPOP; } /************************************************************************* * This is a comparison function for Serial Remap **************************************************************************/ int SSMIncKeyCmp(const void *fptr, const void *sptr) { i2kv_t *first, *second; first = (i2kv_t *)(fptr); second = (i2kv_t *)(sptr); if (first->key1 > second->key1) return 1; if (first->key1 < second->key1) return -1; if (first->key2 < second->key2) return 1; if (first->key2 > second->key2) return -1; return 0; } /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts, idx_t npasses) { idx_t i, ii, j, k; idx_t kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idx_t *moved, *swaps, *qnum; real_t *nvwgt, *npwgts, *mindiff, *tmpdiff, origbal, minbal, newbal; rpq_t **parts[2]; idx_t higain, mincut, initcut, newcut, mincutorder; real_t *rtpwgts; idx_t mype; WCOREPUSH; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; mindiff = rwspacemalloc(ctrl, ncon); tmpdiff = rwspacemalloc(ctrl, ncon); rtpwgts = rwspacemalloc(ctrl, 2*ncon); parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); moved = iwspacemalloc(ctrl, nvtxs); swaps = iwspacemalloc(ctrl, nvtxs); qnum = iwspacemalloc(ctrl, nvtxs); limit = gk_min(gk_max(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { parts[0][i] = rpqCreate(nvtxs); parts[1][i] = rpqCreate(nvtxs); } for (i=0; i<nvtxs; i++) qnum[i] = rargmax(ncon, nvwgt+i*ncon); origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); for (i=0; i<ncon; i++) { rtpwgts[i] = origbal*tpwgts[i]; rtpwgts[ncon+i] = origbal*tpwgts[ncon+i]; } iset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { rpqReset(parts[0][i]); rpqReset(parts[1][i]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); /* Insert boundary nodes in the priority queues */ nbnd = graph->gnvtxs; for (ii=0; ii<nbnd; ii++) { i = bndind[ii]; rpqInsert(parts[where[i]][qnum[i]], i, (real_t)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = rpqGetTop(parts[from][cnum])) == -1) break; raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff, tmpdiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ rpqDelete(parts[where[k]][qnum[k]], k); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) rpqUpdate(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) rpqInsert(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { rpqDestroy(parts[0][i]); rpqDestroy(parts[1][i]); } WCOREPOP; return; } /************************************************************************* * This function selects the partition number and the queue from which * we will move vertices out **************************************************************************/ void Serial_SelectQueue(idx_t ncon, real_t *npwgts, real_t *tpwgts, idx_t *from, idx_t *cnum, rpq_t **queues[2]) { idx_t i, part; real_t maxgain=0.0; real_t max = -1.0, maxdiff=0.0; idx_t mype; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); *from = -1; *cnum = -1; /* First determine the side and the queue, irrespective of the presence of nodes */ for (part=0; part<2; part++) { for (i=0; i<ncon; i++) { if (npwgts[part*ncon+i]-tpwgts[part*ncon+i] >= maxdiff) { maxdiff = npwgts[part*ncon+i]-tpwgts[part*ncon+i]; *from = part; *cnum = i; } } } if (*from != -1 && rpqLength(queues[*from][*cnum]) == 0) { /* The desired queue is empty, select a node from that side anyway */ for (i=0; i<ncon; i++) { if (rpqLength(queues[*from][i]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; break; } } for (i++; i<ncon; i++) { if (npwgts[(*from)*ncon + i] > max && rpqLength(queues[*from][i]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; } } } /* Check to see if you can focus on the cut */ if (maxdiff <= 0.0 || *from == -1) { maxgain = -100000.0; for (part=0; part<2; part++) { for (i=0; i<ncon; i++) { if (rpqLength(queues[part][i]) > 0 && rpqSeeTopKey(queues[part][i]) > maxgain) { maxgain = rpqSeeTopKey(queues[part][i]); *from = part; *cnum = i; } } } } return; } /************************************************************************* * This function checks if the balance achieved is better than the diff * For now, it uses a 2-norm measure **************************************************************************/ idx_t Serial_BetterBalance(idx_t ncon, real_t *npwgts, real_t *tpwgts, real_t *diff, real_t *tmpdiff) { idx_t i; for (i=0; i<ncon; i++) tmpdiff[i] = fabs(tpwgts[i]-npwgts[i]); return rnorm2(ncon, tmpdiff, 1) < rnorm2(ncon, diff, 1); } /************************************************************************* * This function computes the load imbalance over all the constrains **************************************************************************/ real_t Serial_Compute2WayHLoadImbalance(idx_t ncon, real_t *npwgts, real_t *tpwgts) { idx_t i; real_t max=0.0, temp; for (i=0; i<ncon; i++) { if (tpwgts[i] == 0.0) temp = 0.0; else temp = fabs(tpwgts[i]-npwgts[i])/tpwgts[i]; max = (max < temp ? temp : max); } return 1.0+max; } /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts, real_t lbfactor) { idx_t i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idx_t *moved, *swaps, *qnum; real_t *nvwgt, *npwgts, *mindiff, *tmpdiff, origbal, minbal, newbal; rpq_t **parts[2]; idx_t higain, mincut, newcut, mincutorder; idx_t *qsizes[2]; WCOREPUSH; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; mindiff = rwspacemalloc(ctrl, ncon); tmpdiff = rwspacemalloc(ctrl, ncon); parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); qsizes[0] = iset(ncon, 0, iwspacemalloc(ctrl, ncon)); qsizes[1] = iset(ncon, 0, iwspacemalloc(ctrl, ncon)); moved = iwspacemalloc(ctrl, nvtxs); swaps = iwspacemalloc(ctrl, nvtxs); qnum = iwspacemalloc(ctrl, nvtxs); limit = gk_min(gk_max(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i<ncon; i++) { parts[0][i] = rpqCreate(nvtxs); parts[1][i] = rpqCreate(nvtxs); } for (i=0; i<nvtxs; i++) { qnum[i] = rargmax(ncon, nvwgt+i*ncon); qsizes[qnum[i]][where[i]]++; } for (from=0; from<2; from++) { for (j=0; j<ncon; j++) { if (qsizes[j][from] == 0) { for (i=0; i<nvtxs; i++) { if (where[i] != from) continue; k = rargmax2(ncon, nvwgt+i*ncon); if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); newcut = mincut = graph->mincut; mincutorder = -1; iset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nvtxs; i++) rpqInsert(parts[where[i]][qnum[i]], i, (real_t)(ed[i]-id[i])); for (nswaps=0; nswaps<nvtxs; nswaps++) { if (minbal < lbfactor) break; Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = rpqGetTop(parts[from][cnum])) == -1) break; raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if (newbal < minbal || (newbal == minbal && (newcut < mincut || (newcut == mincut && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff, tmpdiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) rpqUpdate(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i<ncon; i++) { rpqDestroy(parts[0][i]); rpqDestroy(parts[1][i]); } WCOREPOP; return; } /************************************************************************* * This function balances two partitions by moving the highest gain * (including negative gain) vertices to the other domain. * It is used only when tha unbalance is due to non contigous * subdomains. That is, the are no boundary vertices. * It moves vertices from the domain that is overweight to the one that * is underweight. **************************************************************************/ void Mc_Serial_Init2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts) { idx_t i, ii, j, k; idx_t kwgt, nvtxs, nbnd, ncon, nswaps, from, to, cnum, tmp; idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idx_t *qnum; real_t *nvwgt, *npwgts; rpq_t **parts[2]; idx_t higain, mincut; WCOREPUSH; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; nvwgt = graph->nvwgt; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); qnum = iwspacemalloc(ctrl, nvtxs); /* This is called for initial partitioning so we know from where to pick nodes */ from = 1; to = (from+1)%2; for (i=0; i<ncon; i++) { parts[0][i] = rpqCreate(nvtxs); parts[1][i] = rpqCreate(nvtxs); } /* Compute the queues in which each vertex will be assigned to */ for (i=0; i<nvtxs; i++) qnum[i] = rargmax(ncon, nvwgt+i*ncon); /* Insert the nodes of the proper partition in the appropriate priority queue */ for (i=0; i<nvtxs; i++) { if (where[i] == from) { if (ed[i] > 0) rpqInsert(parts[0][qnum[i]], i, (real_t)(ed[i]-id[i])); else rpqInsert(parts[1][qnum[i]], i, (real_t)(ed[i]-id[i])); } } mincut = graph->mincut; nbnd = graph->gnvtxs; for (nswaps=0; nswaps<nvtxs; nswaps++) { if (Serial_AreAnyVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, tpwgts+from*ncon)) break; if ((cnum = Serial_SelectQueueOneWay(ncon, npwgts, tpwgts, from, parts)) == -1) break; if ((higain = rpqGetTop(parts[0][cnum])) == -1) higain = rpqGetTop(parts[1][cnum]); mincut -= (ed[higain]-id[higain]); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); where[higain] = to; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (where[k] == from) { if (ed[k] > 0 && bndptr[k] == -1) { /* It moves in boundary */ rpqDelete(parts[1][qnum[k]], k); rpqInsert(parts[0][qnum[k]], k, (real_t)(ed[k]-id[k])); } else { /* It must be in the boundary already */ rpqUpdate(parts[0][qnum[k]], k, (real_t)(ed[k]-id[k])); } } /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i<ncon; i++) { rpqDestroy(parts[0][i]); rpqDestroy(parts[1][i]); } WCOREPOP; } /************************************************************************* * This function selects the partition number and the queue from which * we will move vertices out **************************************************************************/ idx_t Serial_SelectQueueOneWay(idx_t ncon, real_t *npwgts, real_t *tpwgts, idx_t from, rpq_t **queues[2]) { idx_t i, cnum=-1; real_t max=0.0; for (i=0; i<ncon; i++) { if (npwgts[from*ncon+i]-tpwgts[from*ncon+i] >= max && rpqLength(queues[0][i]) + rpqLength(queues[1][i]) > 0) { max = npwgts[from*ncon+i]-tpwgts[i]; cnum = i; } } return cnum; } /************************************************************************* * This function computes the initial id/ed **************************************************************************/ void Mc_Serial_Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph) { idx_t i, j, me, nvtxs, ncon, nbnd, mincut; idx_t *xadj, *adjncy, *adjwgt; real_t *nvwgt, *npwgts; idx_t *id, *ed, *where; idx_t *bndptr, *bndind; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = rset(2*ncon, 0.0, graph->gnpwgts); id = iset(nvtxs, 0, graph->sendind); ed = iset(nvtxs, 0, graph->recvind); bndptr = iset(nvtxs, -1, graph->sendptr); bndind = graph->recvptr; /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ nbnd = mincut = 0; for (i=0; i<nvtxs; i++) { me = where[i]; raxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1); for (j=xadj[i]; j<xadj[i+1]; j++) { if (me == where[adjncy[j]]) id[i] += adjwgt[j]; else ed[i] += adjwgt[j]; } if (ed[i] > 0 || xadj[i] == xadj[i+1]) { mincut += ed[i]; BNDInsert(nbnd, bndind, bndptr, i); } } graph->mincut = mincut/2; graph->gnvtxs = nbnd; } /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ idx_t Serial_AreAnyVwgtsBelow(idx_t ncon, real_t alpha, real_t *vwgt1, real_t beta, real_t *vwgt2, real_t *limit) { idx_t i; for (i=0; i<ncon; i++) if (alpha*vwgt1[i] + beta*vwgt2[i] < limit[i]) return 1; return 0; } /************************************************************************* * This function computes the edge-cut of a serial graph. **************************************************************************/ idx_t ComputeSerialEdgeCut(graph_t *graph) { idx_t i, j; idx_t cut = 0; for (i=0; i<graph->nvtxs; i++) { for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; } graph->mincut = cut/2; return graph->mincut; } /************************************************************************* * This function computes the TotalV of a serial graph. **************************************************************************/ idx_t ComputeSerialTotalV(graph_t *graph, idx_t *home) { idx_t i; idx_t totalv = 0; for (i=0; i<graph->nvtxs; i++) if (graph->where[i] != home[i]) totalv += (graph->vsize == NULL) ? graph->vwgt[i] : graph->vsize[i]; return totalv; }