/* * Copyright 1997, Regents of the University of Minnesota * * mdiffusion.c * * This file contains code that performs mc-diffusion * * Started 9/16/99 * George * * $Id: mdiffusion.c 10542 2011-07-11 16:56:22Z karypis $ */ #include #define PE -1 /************************************************************************* * This function is the entry point of the initial partitioning algorithm. * This algorithm assembles the graph to all the processors and preceed * serially. **************************************************************************/ idx_t Mc_Diffusion(ctrl_t *ctrl, graph_t *graph, idx_t *vtxdist, idx_t *where, idx_t *home, idx_t npasses) { idx_t h, i, j; idx_t nvtxs, nedges, ncon, pass, iter, domain, processor; idx_t nparts, mype, npes, nlinks, me, you, wsize; idx_t nvisited, nswaps = -1, tnswaps, done, alldone = -1; idx_t *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap; idx_t *pack, *unpack, *match, *proc2sub, *sub2proc; idx_t *visited, *gvisited; real_t *transfer, *npwgts, maxdiff, minflow, maxflow; real_t lbavg, oldlbavg, ubavg, *lbvec; real_t *diff_flows, *sr_flows; real_t diff_lbavg, sr_lbavg, diff_cost, sr_cost; idx_t *rbuffer, *sbuffer; idx_t *rcount, *rdispl; real_t *solution, *load, *workspace; matrix_t matrix; graph_t *egraph; if (graph->ncon > 3) return 0; WCOREPUSH; nvtxs = graph->nvtxs; nedges = graph->nedges; ncon = graph->ncon; nparts = ctrl->nparts; mype = ctrl->mype; npes = ctrl->npes; ubavg = ravg(ncon, ctrl->ubvec); /* initialize variables and allocate memory */ lbvec = rwspacemalloc(ctrl, ncon); diff_flows = rwspacemalloc(ctrl, ncon); sr_flows = rwspacemalloc(ctrl, ncon); load = rwspacemalloc(ctrl, nparts); solution = rwspacemalloc(ctrl, nparts); npwgts = graph->gnpwgts = rwspacemalloc(ctrl, ncon*nparts); matrix.values = rwspacemalloc(ctrl, nedges); transfer = matrix.transfer = rwspacemalloc(ctrl, ncon*nedges); proc2sub = iwspacemalloc(ctrl, gk_max(nparts, npes*2)); sub2proc = iwspacemalloc(ctrl, nparts); match = iwspacemalloc(ctrl, nparts); rowptr = matrix.rowptr = iwspacemalloc(ctrl, nparts+1); colind = matrix.colind = iwspacemalloc(ctrl, nedges); rcount = iwspacemalloc(ctrl, npes); rdispl = iwspacemalloc(ctrl, npes+1); pack = iwspacemalloc(ctrl, nvtxs); unpack = iwspacemalloc(ctrl, nvtxs); rbuffer = iwspacemalloc(ctrl, nvtxs); sbuffer = iwspacemalloc(ctrl, nvtxs); map = iwspacemalloc(ctrl, nvtxs); rmap = iwspacemalloc(ctrl, nvtxs); diff_where = iwspacemalloc(ctrl, nvtxs); ehome = iwspacemalloc(ctrl, nvtxs); wsize = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1)); workspace = (real_t *)gk_malloc(wsize, "Mc_Diffusion: workspace"); graph->ckrinfo = (ckrinfo_t *)gk_malloc(nvtxs*sizeof(ckrinfo_t), "Mc_Diffusion: rinfo"); /* construct subdomain connectivity graph */ matrix.nrows = nparts; SetUpConnectGraph(graph, &matrix, (idx_t *)workspace); nlinks = (matrix.nnzs-nparts) / 2; visited = iwspacemalloc(ctrl, matrix.nnzs); gvisited = iwspacemalloc(ctrl, matrix.nnzs); for (pass=0; passtpwgts, h); lbvec[h] = (rmax(nparts, load)+1.0/nparts) * (real_t)nparts; ConjGrad2(&matrix, load, solution, 0.001, workspace); ComputeTransferVector(ncon, &matrix, solution, transfer, h); } oldlbavg = ravg(ncon, lbvec); tnswaps = 0; maxdiff = 0.0; for (i=0; i maxdiff) ? maxflow - minflow : maxdiff; } } while (nvisited < nlinks) { /* compute independent sets of subdomains */ iset(gk_max(nparts, npes*2), UNMATCHED, proc2sub); CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon); /* set up the packing arrays */ iset(nparts, UNMATCHED, sub2proc); for (i=0; i 0.0) diff_flows[h] = -1.0 * transfer[j*ncon+h]; } break; } } nswaps = 1; rcopy(ncon, diff_flows, sr_flows); iset(nvtxs, 0, sbuffer); for (i=0; invtxs, egraph->where, diff_where); for (j=0; jnvtxs; j++) ehome[j] = home[map[j]]; RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg); if (ncon <= 4) { sr_where = egraph->where; egraph->where = diff_where; nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, &diff_cost, &diff_lbavg, 1.0/(real_t)nvtxs); if ((sr_lbavg < diff_lbavg && (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) || (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) { for (i=0; invtxs; i++) where[map[i]] = sr_where[i]; } else { for (i=0; invtxs; i++) where[map[i]] = diff_where[i]; } } else { for (i=0; invtxs; i++) where[map[i]] = egraph->where[i]; } gk_free((void **)&egraph->xadj, &egraph->nvwgt, &egraph->adjncy, &egraph, LTERM); } /* Pack the flow data */ iset(nvtxs, UNMATCHED, sbuffer); for (i=0; icomm); /* Unpack the flow data */ for (i=0; icomm); nvisited = isum(matrix.nnzs, gvisited, 1)/2; tnswaps += GlobalSESum(ctrl, nswaps); if (iter++ == NGD_PASSES) break; } /* perform serial refinement */ Mc_ComputeSerialPartitionParams(ctrl, graph, nparts); Mc_SerialKWayAdaptRefine(ctrl, graph, nparts, home, ctrl->ubvec, 10); /* check for early breakout */ for (h=0; h= oldlbavg || lbavg <= ubavg + 0.035) done = 1; alldone = GlobalSEMax(ctrl, done); if (alldone == 1) break; } /* ensure that all subdomains have at least one vertex */ /* iset(nparts, 0, match); for (i=0; imype == PE) printf("WARNING: empty subdomain %"PRIDX" in Mc_Diffusion\n", me); you = iargmax(nparts, match); for (i=0; ickrinfo, LTERM); graph->gnpwgts = NULL; graph->ckrinfo = NULL; WCOREPOP; return 0; } /************************************************************************* * This function extracts a subgraph from a graph given an indicator array. **************************************************************************/ graph_t *ExtractGraph(ctrl_t *ctrl, graph_t *graph, idx_t *indicator, idx_t *map, idx_t *rmap) { idx_t h, i, j; idx_t nvtxs, envtxs, enedges, ncon; idx_t vtx, count; idx_t *xadj, *vsize, *adjncy, *adjwgt, *where; idx_t *exadj, *evsize, *eadjncy, *eadjwgt, *ewhere; real_t *nvwgt, *envwgt; graph_t *egraph; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; vsize = graph->vsize; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; count = 0; for (i=0; invtxs = count; egraph->ncon = graph->ncon; exadj = egraph->xadj = imalloc(envtxs*3+1, "exadj"); ewhere = egraph->where = exadj + envtxs + 1; evsize = egraph->vsize = exadj + 2*envtxs + 1; envwgt = egraph->nvwgt = rmalloc(envtxs*ncon, "envwgt"); /************************************************/ /* compute xadj, where, nvwgt, and vsize arrays */ /************************************************/ iset(envtxs+1, 0, exadj); for (i=0; ipartType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) evsize[i] = vsize[vtx]; for (j=xadj[vtx]; jnedges = exadj[envtxs]; eadjncy = egraph->adjncy = imalloc(enedges*2, "eadjncy"); eadjwgt = egraph->adjwgt = eadjncy + enedges; for (i=0; i0; i--) exadj[i] = exadj[i-1]; exadj[0] = 0; return egraph; }