/* * serial.c * * This file contains code that implements k-way refinement * * Started 7/28/97 * George * * $Id: serial.c 13927 2013-03-27 22:42:41Z karypis $ * */ #include /************************************************************************* * This function computes the initial id/ed **************************************************************************/ void Mc_ComputeSerialPartitionParams(ctrl_t *ctrl, graph_t *graph, idx_t nparts) { idx_t i, j, k; idx_t nvtxs, nedges, ncon, mincut, me, other; idx_t *xadj, *adjncy, *adjwgt, *where; ckrinfo_t *myrinfo; cnbr_t *mynbrs; real_t *nvwgt, *npwgts; idx_t mype; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = rset(ncon*nparts, 0.0, graph->gnpwgts); PASSERT(ctrl, graph->ckrinfo != NULL); PASSERT(ctrl, ctrl->cnbrpool != NULL); memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); cnbrpoolReset(ctrl); /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ nedges = mincut = 0; for (i=0; ickrinfo+i; for (j=xadj[i]; jid += adjwgt[j]; } else { myrinfo->ed += adjwgt[j]; } } mincut += myrinfo->ed; /* Time to compute the particular external degrees */ if (myrinfo->ed > 0) { myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); mynbrs = ctrl->cnbrpool + myrinfo->inbr; for (j=xadj[i]; jnnbrs; k++) { if (mynbrs[k].pid == other) { mynbrs[k].ed += adjwgt[j]; break; } } if (k == myrinfo->nnbrs) { mynbrs[k].pid = other; mynbrs[k].ed = adjwgt[j]; myrinfo->nnbrs++; } } } } else { myrinfo->inbr = -1; } } graph->mincut = mincut/2; return; } /************************************************************************* * This function performs k-way refinement **************************************************************************/ void Mc_SerialKWayAdaptRefine(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *home, real_t *orgubvec, idx_t npasses) { idx_t i, ii, iii, j, k; idx_t nvtxs, ncon, pass, nmoves; idx_t from, me, myhome, to, oldcut, gain, tmp; idx_t *xadj, *adjncy, *adjwgt; idx_t *where; real_t *npwgts, *nvwgt, *minwgt, *maxwgt, *ubvec; idx_t gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home; idx_t zero_gain, better_balance_ft, better_balance_tt; ikv_t *cand; idx_t mype; ckrinfo_t *myrinfo; cnbr_t *mynbrs; WCOREPUSH; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = graph->gnpwgts; /* Setup the weight intervals of the various subdomains */ cand = ikvwspacemalloc(ctrl, nvtxs); minwgt = rwspacemalloc(ctrl, nparts*ncon); maxwgt = rwspacemalloc(ctrl, nparts*ncon); ubvec = rwspacemalloc(ctrl, ncon); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); for (i=0; imincut; for (i=0; ickrinfo[i].ed-graph->ckrinfo[i].id; cand[i].val = i; } ikvsortd(nvtxs, cand); nmoves = 0; for (iii=0; iiickrinfo+i; if (myrinfo->ed >= myrinfo->id) { from = where[i]; myhome = home[i]; nvwgt = graph->nvwgt+i*ncon; if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; mynbrs = ctrl->cnbrpool + myrinfo->inbr; for (k=myrinfo->nnbrs-1; k>=0; k--) { to = mynbrs[k].pid; gain = mynbrs[k].ed - myrinfo->id; if (gain >= 0 && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) { break; } } /* break out if you did not find a candidate */ if (k < 0) continue; for (j=k-1; j>=0; j--) { to = mynbrs[j].pid; going_home = (myhome == to); gain_is_same = (mynbrs[j].ed == mynbrs[k].ed); gain_is_greater = (mynbrs[j].ed > mynbrs[k].ed); fit_in_to = AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon); better_balance_ft = IsHBalanceBetterFT(ncon, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec); better_balance_tt = IsHBalanceBetterTT(ncon, npwgts+mynbrs[k].pid*ncon, npwgts+to*ncon,nvwgt,ubvec); if ( (gain_is_greater && (fit_in_to || better_balance_ft) ) || (gain_is_same && ( (fit_in_to && going_home) || better_balance_tt ) ) ) { k = j; } } to = mynbrs[k].pid; going_home = (myhome == to); zero_gain = (mynbrs[k].ed == myrinfo->id); fit_in_from = AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon); better_balance_ft = IsHBalanceBetterFT(ncon, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec); if (zero_gain && !going_home && !better_balance_ft && fit_in_from) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= mynbrs[k].ed-myrinfo->id; /* Update where, weight, and ID/ED information of the vertex you moved */ raxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; myrinfo->ed += myrinfo->id-mynbrs[k].ed; gk_SWAP(myrinfo->id, mynbrs[k].ed, tmp); if (mynbrs[k].ed == 0) mynbrs[k] = mynbrs[--myrinfo->nnbrs]; else mynbrs[k].pid = from; /* Update the degrees of adjacent vertices */ for (j=xadj[i]; jckrinfo+ii; if (myrinfo->inbr == -1) { myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[ii+1]-xadj[ii]+1); myrinfo->nnbrs = 0; } mynbrs = ctrl->cnbrpool + myrinfo->inbr; if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution of the ed from 'from' */ if (me != from) { for (k=0; knnbrs; k++) { if (mynbrs[k].pid == from) { if (mynbrs[k].ed == adjwgt[j]) mynbrs[k] = mynbrs[--myrinfo->nnbrs]; else mynbrs[k].ed -= adjwgt[j]; break; } } } /* Add contribution of the ed to 'to' */ if (me != to) { for (k=0; knnbrs; k++) { if (mynbrs[k].pid == to) { mynbrs[k].ed += adjwgt[j]; break; } } if (k == myrinfo->nnbrs) { mynbrs[k].pid = to; mynbrs[k].ed = adjwgt[j]; myrinfo->nnbrs++; } } } nmoves++; } } if (graph->mincut == oldcut) break; } WCOREPOP; return; } /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ idx_t AreAllHVwgtsBelow(idx_t ncon, real_t alpha, real_t *vwgt1, real_t beta, real_t *vwgt2, real_t *limit) { idx_t i; for (i=0; i limit[i]) return 0; return 1; } /************************************************************************* * This function computes the load imbalance over all the constrains * For now assume that we just want balanced partitionings **************************************************************************/ void ComputeHKWayLoadImbalance(idx_t ncon, idx_t nparts, real_t *npwgts, real_t *lbvec) { idx_t i, j; real_t max; for (i=0; i max) max = npwgts[j*ncon+i]; } lbvec[i] = max*nparts; } } /************************************************************** * This subroutine remaps a partitioning on a single processor **************************************************************/ void SerialRemap(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *base, idx_t *scratch, idx_t *remap, real_t *tpwgts) { idx_t i, ii, j, k; idx_t nvtxs, nmapped, max_mult; idx_t from, to, current_from, smallcount, bigcount; ikv_t *flowto, *bestflow; i2kv_t *sortvtx; idx_t *vsize, *htable, *map, *rowmap; WCOREPUSH; nvtxs = graph->nvtxs; vsize = graph->vsize; max_mult = gk_min(MAX_NPARTS_MULTIPLIER, nparts); sortvtx = (i2kv_t *)wspacemalloc(ctrl, nvtxs*sizeof(i2kv_t)); flowto = ikvwspacemalloc(ctrl, nparts); bestflow = ikvwspacemalloc(ctrl, nparts*max_mult); map = htable = iset(2*nparts, -1, iwspacemalloc(ctrl, 2*nparts)); rowmap = map+nparts; for (i=0; i current_from) { /* reset the hash table */ for (j=0; jncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; } if (nmapped == nparts) break; } /* remap the rest */ /* it may help try remapping to the same label first */ if (nmapped < nparts) { for (j=0; jncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) for (i=0; ikey1 > second->key1) return 1; if (first->key1 < second->key1) return -1; if (first->key2 < second->key2) return 1; if (first->key2 > second->key2) return -1; return 0; } /************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts, idx_t npasses) { idx_t i, ii, j, k; idx_t kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idx_t *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idx_t *moved, *swaps, *qnum; real_t *nvwgt, *npwgts, *mindiff, *tmpdiff, origbal, minbal, newbal; rpq_t **parts[2]; idx_t higain, mincut, initcut, newcut, mincutorder; real_t *rtpwgts; idx_t mype; WCOREPUSH; gkMPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; mindiff = rwspacemalloc(ctrl, ncon); tmpdiff = rwspacemalloc(ctrl, ncon); rtpwgts = rwspacemalloc(ctrl, 2*ncon); parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); moved = iwspacemalloc(ctrl, nvtxs); swaps = iwspacemalloc(ctrl, nvtxs); qnum = iwspacemalloc(ctrl, nvtxs); limit = gk_min(gk_max(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; imincut; for (i=0; ignvtxs; for (ii=0; ii limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) rpqInsert(parts[where[k]][qnum[k]], k, (real_t)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; imincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i= maxdiff) { maxdiff = npwgts[part*ncon+i]-tpwgts[part*ncon+i]; *from = part; *cnum = i; } } } if (*from != -1 && rpqLength(queues[*from][*cnum]) == 0) { /* The desired queue is empty, select a node from that side anyway */ for (i=0; i 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; break; } } for (i++; i max && rpqLength(queues[*from][i]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; } } } /* Check to see if you can focus on the cut */ if (maxdiff <= 0.0 || *from == -1) { maxgain = -100000.0; for (part=0; part<2; part++) { for (i=0; i 0 && rpqSeeTopKey(queues[part][i]) > maxgain) { maxgain = rpqSeeTopKey(queues[part][i]); *from = part; *cnum = i; } } } } return; } /************************************************************************* * This function checks if the balance achieved is better than the diff * For now, it uses a 2-norm measure **************************************************************************/ idx_t Serial_BetterBalance(idx_t ncon, real_t *npwgts, real_t *tpwgts, real_t *diff, real_t *tmpdiff) { idx_t i; for (i=0; invtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; mindiff = rwspacemalloc(ctrl, ncon); tmpdiff = rwspacemalloc(ctrl, ncon); parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); qsizes[0] = iset(ncon, 0, iwspacemalloc(ctrl, ncon)); qsizes[1] = iset(ncon, 0, iwspacemalloc(ctrl, ncon)); moved = iwspacemalloc(ctrl, nvtxs); swaps = iwspacemalloc(ctrl, nvtxs); qnum = iwspacemalloc(ctrl, nvtxs); limit = gk_min(gk_max(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } for (i=0; imincut; mincutorder = -1; iset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; gk_SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); raxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); raxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; invtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; nvwgt = graph->nvwgt; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; parts[0] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); parts[1] = (rpq_t **)wspacemalloc(ctrl, sizeof(rpq_t *)*ncon); qnum = iwspacemalloc(ctrl, nvtxs); /* This is called for initial partitioning so we know from where to pick nodes */ from = 1; to = (from+1)%2; for (i=0; i 0) rpqInsert(parts[0][qnum[i]], i, (real_t)(ed[i]-id[i])); else rpqInsert(parts[1][qnum[i]], i, (real_t)(ed[i]-id[i])); } } mincut = graph->mincut; nbnd = graph->gnvtxs; for (nswaps=0; nswaps 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j 0 && bndptr[k] == -1) { /* It moves in boundary */ rpqDelete(parts[1][qnum[k]], k); rpqInsert(parts[0][qnum[k]], k, (real_t)(ed[k]-id[k])); } else { /* It must be in the boundary already */ rpqUpdate(parts[0][qnum[k]], k, (real_t)(ed[k]-id[k])); } } /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i= max && rpqLength(queues[0][i]) + rpqLength(queues[1][i]) > 0) { max = npwgts[from*ncon+i]-tpwgts[i]; cnum = i; } } return cnum; } /************************************************************************* * This function computes the initial id/ed **************************************************************************/ void Mc_Serial_Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph) { idx_t i, j, me, nvtxs, ncon, nbnd, mincut; idx_t *xadj, *adjncy, *adjwgt; real_t *nvwgt, *npwgts; idx_t *id, *ed, *where; idx_t *bndptr, *bndind; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; npwgts = rset(2*ncon, 0.0, graph->gnpwgts); id = iset(nvtxs, 0, graph->sendind); ed = iset(nvtxs, 0, graph->recvind); bndptr = iset(nvtxs, -1, graph->sendptr); bndind = graph->recvptr; /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ nbnd = mincut = 0; for (i=0; i 0 || xadj[i] == xadj[i+1]) { mincut += ed[i]; BNDInsert(nbnd, bndind, bndptr, i); } } graph->mincut = mincut/2; graph->gnvtxs = nbnd; } /************************************************************************* * This function checks if the vertex weights of two vertices are below * a given set of values **************************************************************************/ idx_t Serial_AreAnyVwgtsBelow(idx_t ncon, real_t alpha, real_t *vwgt1, real_t beta, real_t *vwgt2, real_t *limit) { idx_t i; for (i=0; invtxs; i++) { for (j=graph->xadj[i]; jxadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; } graph->mincut = cut/2; return graph->mincut; } /************************************************************************* * This function computes the TotalV of a serial graph. **************************************************************************/ idx_t ComputeSerialTotalV(graph_t *graph, idx_t *home) { idx_t i; idx_t totalv = 0; for (i=0; invtxs; i++) if (graph->where[i] != home[i]) totalv += (graph->vsize == NULL) ? graph->vwgt[i] : graph->vsize[i]; return totalv; }