COMBINATORIAL_BLAS 1.6
MCL.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30#include <mpi.h>
31
32// These macros should be defined before stdint.h is included
33#ifndef __STDC_CONSTANT_MACROS
34#define __STDC_CONSTANT_MACROS
35#endif
36#ifndef __STDC_LIMIT_MACROS
37#define __STDC_LIMIT_MACROS
38#endif
39#include <stdint.h>
40
41#include <sys/time.h>
42#include <iostream>
43#include <fstream>
44#include <string>
45#include <sstream> // Required for stringstreams
46#include <ctime>
47#include <cmath>
48#include "CombBLAS/CombBLAS.h"
49#include "CC.h"
50#include "WriteMCLClusters.h"
51
52using namespace std;
53using namespace combblas;
54
55#define EPS 0.0001
56
64
65/* Variables specific for timing communication avoiding setting in detail*/
76
77// for compilation (TODO: fix this dependency)
84
86double tIO;
87
88
89
90typedef struct
91{
92 //Input/Output file
93 string ifilename;
95 int base; // only usefule for matrix market files
96
97 string ofilename;
98
99 //Preprocessing
102
103 //inflation
104 double inflation;
105
106 //pruning
111 int kselectVersion; // 0: adapt based on k, 1: kselect1, 2: kselect2
113
114 //HipMCL optimization
117 bool isDoublePrecision; // true: double, false: float
118 bool is64bInt; // true: int64_t for local indexing, false: int32_t (for local indexing)
119 int layers; // Number of layers to use in communication avoiding SpGEMM.
121
122 //debugging
123 bool show;
124
125
127
128
130{
131 //Input/Output file
132 param.ifilename = "";
133 param.isInputMM = false;
134 param.ofilename = "";
135 param.base = 1;
136
137 //Preprocessing
138 // mcl removes isolated vertices by default,
139 // we don't do this because it will create different ordering of vertices!
140 param.remove_isolated = false;
141 param.randpermute = 0;
142
143 //inflation
144 param.inflation = 0.0;
145
146 //pruning
147 param.prunelimit = 1.0/10000.0;
148 param.select = 1100;
149 param.recover_num = 1400;
150 param.recover_pct = .9; // we allow both 90 or .9 as input. Internally, we keep it 0.9
151 param.kselectVersion = 1;
152 param.preprune = false;
153
154 //HipMCL optimization
155 param.layers = 1;
156 param.compute = 1; // 1 means hash-based computation, 2 means heap-based computation
157 param.phases = 1;
158 param.perProcessMem = 0;
159 param.isDoublePrecision = true;
160 param.is64bInt = true;
161
162 //debugging
163 param.show = false;
164}
165
167{
168 ostringstream runinfo;
169 runinfo << "\n======================================" << endl;
170 runinfo << "Running HipMCL with the parameters: " << endl;
171 runinfo << "======================================" << endl;
172 runinfo << "Input/Output file" << endl;
173 runinfo << " input filename: " << param.ifilename << endl;
174 runinfo << " input file type: " ;
175 if(param.isInputMM)
176 {
177 runinfo << " Matrix Market" << endl;
178 runinfo << " Base of the input matrix: " << param.base << endl;
179 }
180 else runinfo << " Labeled Triples format" << endl;
181 runinfo << " Output filename: " << param.ofilename << endl;
182
183
184 runinfo << "Preprocessing" << endl;
185 runinfo << " Remove isolated vertices? : ";
186 if (param.remove_isolated) runinfo << "yes";
187 else runinfo << "no" << endl;
188
189
190 runinfo << " Randomly permute vertices? : ";
191 if (param.randpermute) runinfo << "yes";
192 else runinfo << "no" << endl;
193
194 runinfo << "Inflation: " << param.inflation << endl;
195
196 runinfo << "Pruning" << endl;
197 runinfo << " Prunelimit: " << param.prunelimit << endl;
198 runinfo << " Recover number: " << param.recover_num << endl;
199 runinfo << " Recover percent: " << ceil(param.recover_pct*100) << endl;
200 runinfo << " Selection number: " << param.select << endl;
201 runinfo << " Apply prune/select/recovery before the first iteration? : ";
202 if (param.preprune) runinfo << "yes"<< endl;
203 else runinfo << "no" << endl;
204
205 // do not expose selection option at this moment
206 //runinfo << "Selection algorithm: ";
207 //if(kselectVersion==1) runinfo << "tournament select" << endl;
208 //else if(kselectVersion==2) runinfo << "quickselect" << endl;
209 //else runinfo << "adaptive based on k" << endl;
210
211
212
213 runinfo << "HipMCL optimization" << endl;
214 runinfo << " Number of layers : " << param.layers << endl;
215 runinfo << " Computation kernel : " << param.compute << endl;
216 runinfo << " Number of phases: " << param.phases << endl;
217 runinfo << " Memory avilable per process: ";
218 if(param.perProcessMem>0) runinfo << param.perProcessMem << "GB" << endl;
219 else runinfo << "not provided" << endl;
220 if(param.isDoublePrecision) runinfo << "Using double precision floating point" << endl;
221 else runinfo << "Using single precision floating point" << endl;
222 if(param.is64bInt ) runinfo << "Using 64 bit local indexing" << endl;
223 else runinfo << "Using 32 bit local indexing" << endl;
224
225 runinfo << "Debugging" << endl;
226 runinfo << " Show matrices after major steps? : ";
227 if (param.show) runinfo << "yes";
228 else runinfo << "no" << endl;
229 runinfo << "======================================" << endl;
230 SpParHelper::Print(runinfo.str());
231}
232
233void ProcessParam(int argc, char* argv[], HipMCLParam & param)
234{
235 for (int i = 1; i < argc; i++)
236 {
237 if (strcmp(argv[i],"-M")==0){
238 param.ifilename = string(argv[i+1]);
239 }
240 else if (strcmp(argv[i],"--matrix-market")==0){
241 param.isInputMM = true;
242 }
243 else if (strcmp(argv[i],"-o")==0){
244 param.ofilename = string(argv[i+1]);
245 }
246 else if (strcmp(argv[i],"--show")==0){
247 param.show = true;
248 }
249 else if (strcmp(argv[i],"--remove-isolated")==0){
250 param.remove_isolated = true;
251 }
252 else if (strcmp(argv[i],"--tournament-select")==0){
253 param.kselectVersion = 1;
254 }
255 else if (strcmp(argv[i],"--quick-select")==0){
256 param.kselectVersion = 2;
257
258 }
259 else if (strcmp(argv[i],"-I")==0){
260 param.inflation = atof(argv[i + 1]);
261
262 } else if (strcmp(argv[i],"-p")==0) {
263 param.prunelimit = atof(argv[i + 1]);
264
265 } else if (strcmp(argv[i],"-S")==0) {
266 param.select = atoi(argv[i + 1]);
267
268 } else if (strcmp(argv[i],"-R")==0) {
269 param.recover_num = atoi(argv[i + 1]);
270
271 } else if (strcmp(argv[i],"-pct")==0)
272 {
273 param.recover_pct = atof(argv[i + 1]);
274 if(param.recover_pct>1) param.recover_pct/=100.00;
275 } else if (strcmp(argv[i],"-base")==0) {
276 param.base = atoi(argv[i + 1]);
277 }
278 else if (strcmp(argv[i],"-rand")==0) {
279 param.randpermute = atoi(argv[i + 1]);
280 }
281 else if (strcmp(argv[i],"--preprune")==0) {
282 param.preprune = true;
283 }
284 else if (strcmp(argv[i],"-layers")==0) {
285 param.layers = atoi(argv[i + 1]);
286 }
287 else if (strcmp(argv[i],"-compute")==0) {
288 param.layers = atoi(argv[i + 1]);
289 }
290 else if (strcmp(argv[i],"-phases")==0) {
291 param.phases = atoi(argv[i + 1]);
292 }
293 else if (strcmp(argv[i],"-per-process-mem")==0) {
294 param.perProcessMem = atoi(argv[i + 1]);
295 }
296 else if (strcmp(argv[i],"--single-precision")==0) {
297 param.isDoublePrecision = false;
298 }
299 else if (strcmp(argv[i],"--32bit-local-index")==0) {
300 param.is64bInt = false;
301 }
302 }
303
304 if(param.ofilename=="") // construct output file name if it is not provided
305 {
306 param.ofilename = param.ifilename + ".hipmcl";
307 }
308
309}
310
311
313{
314 ostringstream runinfo;
315
316 runinfo << "Usage: ./hipmcl -M <input filename> -I <inlfation> (required)" << endl;
317
318 runinfo << "======================================" << endl;
319 runinfo << " Detail parameter options " << endl;
320 runinfo << "======================================" << endl;
321
322
323
324 runinfo << "Input/Output file" << endl;
325 runinfo << " -M <input file name (labeled triples format)> (mandatory)" << endl;
326 runinfo << " --matrix-market : if provided, the input file is in the matrix market format (default: the file is in labeled triples format)" << endl;
327 runinfo << " -base <index of the first vertex in the matrix market file, 0|1> (default: 1) " << endl;
328 runinfo << " -o <output filename> (default: input_file_name.hipmcl )" << endl;
329
330 runinfo << "Inflation" << endl;
331 runinfo << "-I <inflation> (mandatory)\n";
332
333 runinfo << "Preprocessing" << endl;
334 runinfo << " -rand <randomly permute vertices> (default:0)\n";
335 runinfo << " --remove-isolated : if provided, remove isolated vertices (default: don't remove isolated vertices)\n";
336
337
338 runinfo << "Pruning" << endl;
339 runinfo << " -p <cutoff> (default: 1/10000)\n";
340 runinfo << " -R <recovery number> (default: 1400)\n";
341 runinfo << " -pct <recovery pct> (default: 90)\n";
342 runinfo << " -S <selection number> (default: 1100)\n";
343 runinfo << " --preprune : if provided, apply prune/select/recovery before the first iteration (needed when dense columns are present) (default: don't preprune. However, if the average nonzero per column is larger than max{S,R}, prepruning is still applied by default)\n";
344
345 runinfo << "HipMCL optimization" << endl;
346 runinfo << " -layers <number of layers> (default:1)\n";
347 runinfo << " -compute <1 or 2> (default:1)\n";
348 runinfo << " -phases <number of phases> (default:1)\n";
349 runinfo << " -per-process-mem <memory (GB) available per process> (default:0, number of phases is not estimated)\n";
350 runinfo << " --single-precision (if not provided, use double precision floating point numbers)\n" << endl;
351 runinfo << " --32bit-local-index (if not provided, use 64 bit indexing for vertex ids)\n" << endl;
352
353 runinfo << "Debugging" << endl;
354 runinfo << " --show: show information about matrices after major steps (default: do not show matrices)" << endl;
355
356
357
358 runinfo << "======================================" << endl;
359 runinfo << " Few examples " << endl;
360 runinfo << "======================================" << endl;
361 runinfo << "Example with with a graph in labeled triples format on a laptop with 8GB memory and 8 cores:\nexport OMP_NUM_THREADS=8\nbin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 8" << endl;
362 runinfo << "Same as above with 4 processes and 2 theaded per process cores:\nexport OMP_NUM_THREADS=2\nmpirun -np 4 bin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 2" << endl;
363 runinfo << "Example with a graph in matrix market format:\nbin/hipmcl -M data/sevenvertex.mtx --matrix-market -base 1 -I 2 -per-process-mem 8" << endl;
364
365 runinfo << "Example on the NERSC/Cori system with 16 nodes, 4 process per node and 16 threads per process: \nsrun -N 16 -n 64 -c 16 bin/hipmcl -M data/hep-th.mtx --matrix-market -base 1 -per-process-mem 27 -o hep-th.hipmcl" << endl;
366 SpParHelper::Print(runinfo.str());
367}
368
369
370// base: base of items
371// clusters are always numbered 0-based
372template <typename IT, typename NT, typename DER>
374{
375 IT nCC;
376 // A is a directed graph
377 // symmetricize A
378
380 AT.Transpose();
381 A += AT;
382 SpParHelper::Print("Finding connected components....\n");
383
384 FullyDistVec<IT, IT> cclabels = CC(A, nCC);
385 return cclabels;
386}
387
388
389template <typename IT, typename NT, typename DER>
391{
392 FullyDistVec<IT, NT> colsums = A.Reduce(Column, plus<NT>(), 0.0);
393 colsums.Apply(safemultinv<NT>());
394 A.DimApply(Column, colsums, multiplies<NT>()); // scale each "Column" with the given vector
395}
396
397template <typename IT, typename NT, typename DER>
399{
400 //SpParMat<IT, NT, DER> * ALayer = A3D.GetLayerMat();
401 std::shared_ptr< SpParMat<IT, NT, DER> > ALayer = A3D.GetLayerMat();
402 FullyDistVec<IT, NT> colsums = ALayer->Reduce(Column, plus<NT>(), 0.0);
403 colsums.Apply(safemultinv<NT>());
404 ALayer->DimApply(Column, colsums, multiplies<NT>()); // scale each "Column" with the given vector
405}
406
407template <typename IT, typename NT, typename DER>
409{
410 // sums of squares of columns
411 FullyDistVec<IT, NT> colssqs = A.Reduce(Column, plus<NT>(), 0.0, bind2nd(exponentiate(), 2));
412 // Matrix entries are non-negative, so max() can use zero as identity
413 FullyDistVec<IT, NT> colmaxs = A.Reduce(Column, maximum<NT>(), 0.0);
414 colmaxs -= colssqs;
415
416 // multiplu by number of nonzeros in each column
417 FullyDistVec<IT, NT> nnzPerColumn = A.Reduce(Column, plus<NT>(), 0.0, [](NT val){return 1.0;});
418 colmaxs.EWiseApply(nnzPerColumn, multiplies<NT>());
419
420 return colmaxs.Reduce(maximum<NT>(), 0.0);
421}
422
423template <typename IT, typename NT, typename DER>
425{
426 //SpParMat<IT, NT, DER> * ALayer = A3D.GetLayerMat();
427 std::shared_ptr< SpParMat<IT, NT, DER> > ALayer = A3D.GetLayerMat();
428
429 // sums of squares of columns
430 FullyDistVec<IT, NT> colssqs = ALayer->Reduce(Column, plus<NT>(), 0.0, bind2nd(exponentiate(), 2));
431 // Matrix entries are non-negative, so max() can use zero as identity
432 FullyDistVec<IT, NT> colmaxs = ALayer->Reduce(Column, maximum<NT>(), 0.0);
433 colmaxs -= colssqs;
434
435 // multiply by number of nonzeros in each column
436 FullyDistVec<IT, NT> nnzPerColumn = ALayer->Reduce(Column, plus<NT>(), 0.0, [](NT val){return 1.0;});
437 colmaxs.EWiseApply(nnzPerColumn, multiplies<NT>());
438
439 NT layerChaos = colmaxs.Reduce(maximum<NT>(), 0.0);
440
441 NT totalChaos = 0.0;
442 MPI_Allreduce( &layerChaos, &totalChaos, 1, MPIType<NT>(), MPI_MAX, A3D.getcommgrid3D()->GetFiberWorld());
443 return totalChaos;
444}
445
446template <typename IT, typename NT, typename DER>
447void Inflate(SpParMat<IT,NT,DER> & A, double power)
448{
449 A.Apply(bind2nd(exponentiate(), power));
450}
451
452template <typename IT, typename NT, typename DER>
453void Inflate3D(SpParMat3D<IT,NT,DER> & A3D, double power)
454{
455 //SpParMat<IT, NT, DER> * ALayer = A3D.GetLayerMat();
456 std::shared_ptr< SpParMat<IT, NT, DER> > ALayer = A3D.GetLayerMat();
457 ALayer->Apply(bind2nd(exponentiate(), power));
458}
459
460// default adjustloop setting
461// 1. Remove loops
462// 2. set loops to max of all arc weights
463template <typename IT, typename NT, typename DER>
465{
466
467 A.RemoveLoops();
468 FullyDistVec<IT, NT> colmaxs = A.Reduce(Column, maximum<NT>(), numeric_limits<NT>::min());
469 A.Apply([](NT val){return val==numeric_limits<NT>::min() ? 1.0 : val;}); // for isolated vertices
470 A.AddLoops(colmaxs);
471 ostringstream outs;
472 outs << "Adjusting loops" << endl;
473 SpParHelper::Print(outs.str());
474}
475
476template <typename IT, typename NT, typename DER>
478{
479 ostringstream outs;
480 FullyDistVec<IT, NT> ColSums = A.Reduce(Column, plus<NT>(), 0.0);
481 FullyDistVec<IT, IT> nonisov = ColSums.FindInds(bind2nd(greater<NT>(), 0));
482 IT numIsolated = A.getnrow() - nonisov.TotalLength();
483 outs << "Number of isolated vertices: " << numIsolated << endl;
484 SpParHelper::Print(outs.str());
485
486 A(nonisov, nonisov, true);
487 SpParHelper::Print("Removed isolated vertices.\n");
488 if(param.show)
489 {
490 A.PrintInfo();
491 }
492
493}
494
495//TODO: handle reordered cluster ids
496template <typename IT, typename NT, typename DER>
498{
499 // randomly permute for load balance
500 if(A.getnrow() == A.getncol())
501 {
502 FullyDistVec<IT, IT> p( A.getcommgrid());
503 p.iota(A.getnrow(), 0);
504 p.RandPerm();
505 (A)(p,p,true);// in-place permute to save memory
506 SpParHelper::Print("Applied symmetric permutation.\n");
507 }
508 else
509 {
510 SpParHelper::Print("Rectangular matrix: Can not apply symmetric permutation.\n");
511 }
512}
513
514template <typename IT, typename NT, typename DER>
516{
517 if(param.remove_isolated)
518 RemoveIsolated(A, param);
519
520 if(param.randpermute)
521 RandPermute(A, param);
522
523 // Adjust self loops
524 AdjustLoops(A);
525
526 // Make stochastic
528 SpParHelper::Print("Made stochastic\n");
529
530
531 IT nnz = A.getnnz();
532 IT nv = A.getnrow();
533 IT avgDegree = nnz/nv;
534 if(avgDegree > std::max(param.select, param.recover_num))
535 {
536 SpParHelper::Print("Average degree of the input graph is greater than max{S,R}.\n");
537 param.preprune = true;
538 }
539 if(param.preprune)
540 {
541 SpParHelper::Print("Applying the prune/select/recovery logic before the first iteration\n\n");
542 MCLPruneRecoverySelect(A, (NT)param.prunelimit, (IT)param.select, (IT)param.recover_num, (NT)param.recover_pct, param.kselectVersion);
543 }
544
545 if(param.show)
546 {
547 A.PrintInfo();
548 }
549
550
551 // chaos doesn't make sense for non-stochastic matrices
552 // it is in the range {0,1} for stochastic matrices
553 NT chaos = 1;
554 int it=1;
555 double tInflate = 0;
556 double tExpand = 0;
557 typedef PlusTimesSRing<NT, NT> PTFF;
558 SpParMat3D<IT,NT,DER> A3D_cs(param.layers);
559 if(param.layers > 1) {
561 A3D_cs = SpParMat3D<IT,NT,DER>(A2D_cs, param.layers, true, false); // Non-special column split
562 }
563 // while there is an epsilon improvement
564 while( chaos > EPS)
565 {
566 SpParMat3D<IT,NT,DER> A3D_rs(param.layers);
567 if(param.layers > 1) {
568 A3D_rs = SpParMat3D<IT,NT,DER>(A3D_cs, false); // Create new rowsplit copy of matrix from colsplit copy
569 }
570
571 double t1 = MPI_Wtime();
572 //A.Square<PTFF>() ; // expand
573 if(param.layers == 1){
574 A = MemEfficientSpGEMM<PTFF, NT, DER>(A, A, param.phases, param.prunelimit, (IT)param.select, (IT)param.recover_num, param.recover_pct, param.kselectVersion, 1, param.perProcessMem);
575 }
576 else{
577 A3D_cs = MemEfficientSpGEMM3D<PTFF, NT, DER, IT, NT, NT, DER, DER >(
578 A3D_cs, A3D_rs,
579 param.phases,
580 param.prunelimit,
581 (IT)param.select,
582 (IT)param.recover_num,
583 param.recover_pct,
584 param.kselectVersion,
585 1,
586 param.perProcessMem
587 );
588 }
589
590 if(param.layers == 1){
592 }
593 else{
594 MakeColStochastic3D(A3D_cs);
595 }
596 tExpand += (MPI_Wtime() - t1);
597
598 if(param.show)
599 {
600 SpParHelper::Print("After expansion\n");
601 A.PrintInfo();
602 }
603 if(param.layers == 1) chaos = Chaos(A);
604 else chaos = Chaos3D(A3D_cs);
605
606 double tInflate1 = MPI_Wtime();
607 if (param.layers == 1) Inflate(A, param.inflation);
608 else Inflate3D(A3D_cs, param.inflation);
609
610 if(param.layers == 1) MakeColStochastic(A);
611 else MakeColStochastic3D(A3D_cs);
612
613 tInflate += (MPI_Wtime() - tInflate1);
614
615 if(param.show)
616 {
617 SpParHelper::Print("After inflation\n");
618 A.PrintInfo();
619 }
620
621
622
623 double newbalance = A.LoadImbalance();
624 double t3=MPI_Wtime();
625 stringstream s;
626 s << "Iteration# " << setw(3) << it << " : " << " chaos: " << setprecision(3) << chaos << " load-balance: "<< newbalance << " Time: " << (t3-t1) << endl;
627 SpParHelper::Print(s.str());
628 it++;
629
630
631
632 }
633
634
635#ifdef TIMING
636 double tcc1 = MPI_Wtime();
637#endif
638
639 // bool can not be used because
640 // bool does not work in A.AddLoops(1) used in LACC: can not create a fullydist vector with Bool
641 // SpParMat<IT,NT,DER> A does not work because int64_t and float promote trait not defined
642 // hence, we are forcing this with IT and double
643 SpParMat<IT,double, SpDCCols < IT, double >> ADouble(MPI_COMM_WORLD);
644 if(param.layers == 1) ADouble = A;
645 else ADouble = A3D_cs.Convert2D();
646 FullyDistVec<IT, IT> cclabels = Interpret(ADouble);
647
648
649#ifdef TIMING
650 double tcc = MPI_Wtime() - tcc1;
651 int myrank;
652 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
653 if(myrank==0)
654 {
655 if(param.layers == 1){
656 cout << "================detailed timing==================" << endl;
657 cout << "Expansion: " << mcl_Abcasttime + mcl_Bbcasttime + mcl_localspgemmtime + mcl_multiwaymergetime << endl;
658 cout << " Abcast= " << mcl_Abcasttime << endl;
659 cout << " Bbcast= " << mcl_Bbcasttime << endl;
660 cout << " localspgemm= " << mcl_localspgemmtime << endl;
661 cout << " multiwaymergetime= "<< mcl_multiwaymergetime << endl;
662 cout << "Prune: " << mcl_kselecttime + mcl_prunecolumntime << endl;
663 cout << " kselect= " << mcl_kselecttime << endl;
664 cout << " prunecolumn= " << mcl_prunecolumntime << endl;
665 cout << "Inflation " << tInflate << endl;
666 cout << "Component: " << tcc << endl;
667 cout << "File I/O: " << tIO << endl;
668 cout << "=================================================" << endl;
669 }
670 else{
671 cout << "================detailed timing==================" << endl;
673 cout << " Symbolic=" << mcl3d_symbolictime << endl;
674 cout << " SUMMAtime= "<< mcl3d_SUMMAtime << endl;
675 cout << " Abcast= " << mcl3d_Abcasttime << endl;
676 cout << " Bbcast= " << mcl3d_Bbcasttime << endl;
677 cout << " localspgemm= " << mcl3d_localspgemmtime << endl;
678 cout << " SUMMAmergetime= "<< mcl3d_SUMMAmergetime << endl;
679 cout << " reductiontime= "<< mcl3d_reductiontime << endl;
680 cout << " 3dmergetime= "<< mcl3d_3dmergetime << endl;
681 cout << "Prune: " << mcl_kselecttime + mcl_prunecolumntime << endl;
682 cout << " kselect= " << mcl_kselecttime << endl;
683 cout << " prunecolumn= " << mcl_prunecolumntime << endl;
684 cout << "Inflation " << tInflate << endl;
685 cout << "Component: " << tcc << endl;
686 cout << "File I/O: " << tIO << endl;
687 cout << "=================================================" << endl;
688
689 }
690 }
691
692#endif
693
694 return cclabels;
695
696
697}
698
699template <typename IT, typename NT, typename DER>
701{
703 AT.Transpose();
704 if(!(AT == A))
705 {
706 SpParHelper::Print("Symmatricizing an unsymmetric input matrix.\n");
707 A += AT;
708 }
709}
710
711template <typename GIT, typename LIT, typename NT>
713{
714 SpParMat<GIT,NT, SpDCCols < LIT, NT >> A(MPI_COMM_WORLD); // construct object
715 FullyDistVec<GIT, array<char, MAXVERTNAME> > vtxLabels(A.getcommgrid());
716
717 // read file
718
719 SpParHelper::Print("Reading input file......\n");
720
721 double tIO1 = MPI_Wtime();
722 if(param.isInputMM)
723 A.ParallelReadMM(param.ifilename, param.base, maximum<NT>()); // if base=0, then it is implicitly converted to Boolean false
724 else // default labeled triples format
725 vtxLabels = A.ReadGeneralizedTuples(param.ifilename, maximum<NT>());
726
727 tIO = MPI_Wtime() - tIO1;
728 ostringstream outs;
729 outs << " : took " << tIO << " seconds" << endl;
730 SpParHelper::Print(outs.str());
731 // Symmetricize the matrix only if needed
733
734 double balance = A.LoadImbalance();
735
736 outs.str("");
737 outs.clear();
738
739 GIT nnz = A.getnnz();
740 GIT nv = A.getnrow();
741 outs << "Number of vertices: " << nv << " number of edges: "<< nnz << endl;
742
743 outs << "Load balance: " << balance << endl;
744 SpParHelper::Print(outs.str());
745
746 if(param.show)
747 {
748 A.PrintInfo();
749 }
750
751
752
753#ifdef TIMING
754 mcl_Abcasttime = 0;
755 mcl_Bbcasttime = 0;
758 mcl_kselecttime = 0;
760#endif
761
762
763
764 double tstart = MPI_Wtime();
765
766 // Run HipMCL
767 FullyDistVec<GIT, GIT> culstLabels = HipMCL(A, param);
768 //culstLabels.ParallelWrite(param.ofilename, param.base); // clusters are always numbered 0-based
769
770 if(param.isInputMM)
771 WriteMCLClusters(param.ofilename, culstLabels, param.base);
772 else
773 WriteMCLClusters(param.ofilename, culstLabels, vtxLabels);
774
775
776
777 GIT nclusters = culstLabels.Reduce(maximum<GIT>(), (GIT) 0 ) ;
778 nclusters ++; // because of zero based indexing for clusters
779
780 double tend = MPI_Wtime();
781 stringstream s2;
782 s2 << "Number of clusters: " << nclusters << endl;
783 s2 << "Total time: " << (tend-tstart) << endl;
784 s2 << "=================================================\n" << endl ;
785 SpParHelper::Print(s2.str());
786
787}
788
789
790int main(int argc, char* argv[])
791{
792 int provided;
793 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
794 if (provided < MPI_THREAD_SERIALIZED)
795 {
796 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
797 MPI_Abort(MPI_COMM_WORLD, 1);
798 }
799
800 int nthreads = 1;
801#ifdef THREADED
802#pragma omp parallel
803 {
804 nthreads = omp_get_num_threads();
805 }
806#endif
807
808 int nprocs, myrank;
809 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
810 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
811
812 HipMCLParam param;
813
814 // initialize parameters to default values
815 InitParam(param);
816
817 // Populate parameters from command line options
818 ProcessParam(argc, argv, param);
819
820 // check if mandatory arguments are provided
821 if(param.ifilename=="" || param.inflation == 0.0)
822 {
823 SpParHelper::Print("Required options are missing.\n");
824 ShowOptions();
825 MPI_Finalize();
826 return -1;
827 }
828
829 // show parameters used to run HipMCL
830 ShowParam(param);
831
832 if(param.perProcessMem==0)
833 {
834 if(myrank == 0)
835 {
836 cout << "******** Number of phases will not be estimated as -per-process-mem option is not supplied. It is highly recommended that you provide -per-process-mem option for large-scale runs. *********** " << endl;
837 }
838 }
839
840 {
841 if(param.isDoublePrecision)
842 {
843 if(param.is64bInt) // default case
844 MainBody<int64_t, int64_t, double>(param);
845 else
846 MainBody<int64_t, int32_t, double>(param);
847 }
848 else if(param.is64bInt)
849 MainBody<int64_t, int64_t, float>(param);
850 else
851 MainBody<int64_t, int32_t, float>(param);
852 }
853
854
855
856 // make sure the destructors for all objects are called before MPI::Finalize()
857 MPI_Finalize();
858 return 0;
859}
860
int64_t IT
double NT
double mcl_localspgemmtime
Definition: MCL.cpp:60
double mcl_symbolictime
Definition: MCL.cpp:57
double mcl_multiwaymergetime
Definition: MCL.cpp:61
double mcl_prunecolumntime
Definition: MCL.cpp:63
double mcl3d_symbolictime
Definition: MCL.cpp:67
int main(int argc, char *argv[])
Definition: MCL.cpp:790
double mcl_Bbcasttime
Definition: MCL.cpp:59
void ShowParam(HipMCLParam &param)
Definition: MCL.cpp:166
double mcl3d_SUMMAmergetime
Definition: MCL.cpp:72
void RandPermute(SpParMat< IT, NT, DER > &A, HipMCLParam &param)
Definition: MCL.cpp:497
double mcl3d_Bbcasttime
Definition: MCL.cpp:69
double cblas_transvectime
Definition: MCL.cpp:83
void MakeColStochastic(SpParMat< IT, NT, DER > &A)
Definition: MCL.cpp:390
double cblas_localspmvtime
Definition: MCL.cpp:81
void ProcessParam(int argc, char *argv[], HipMCLParam &param)
Definition: MCL.cpp:233
double mcl_kselecttime
Definition: MCL.cpp:62
void InitParam(HipMCLParam &param)
Definition: MCL.cpp:129
int cblas_splits
Definition: MCL.cpp:78
FullyDistVec< IT, IT > HipMCL(SpParMat< IT, NT, DER > &A, HipMCLParam &param)
Definition: MCL.cpp:515
double cblas_allgathertime
Definition: MCL.cpp:80
double cblas_alltoalltime
Definition: MCL.cpp:79
void AdjustLoops(SpParMat< IT, NT, DER > &A)
Definition: MCL.cpp:464
double mcl3d_reductiontime
Definition: MCL.cpp:73
double mcl3d_kselecttime
Definition: MCL.cpp:75
double mcl3d_conversiontime
Definition: MCL.cpp:66
int64_t mcl_memory
Definition: MCL.cpp:85
#define EPS
Definition: MCL.cpp:55
void RemoveIsolated(SpParMat< IT, NT, DER > &A, HipMCLParam &param)
Definition: MCL.cpp:477
void Inflate(SpParMat< IT, NT, DER > &A, double power)
Definition: MCL.cpp:447
double mcl_Abcasttime
Definition: MCL.cpp:58
void MakeColStochastic3D(SpParMat3D< IT, NT, DER > &A3D)
Definition: MCL.cpp:398
double mcl3d_Abcasttime
Definition: MCL.cpp:68
NT Chaos(SpParMat< IT, NT, DER > &A)
Definition: MCL.cpp:408
double mcl3d_localspgemmtime
Definition: MCL.cpp:71
void MainBody(HipMCLParam &param)
Definition: MCL.cpp:712
void Symmetricize(SpParMat< IT, NT, DER > &A)
Definition: MCL.cpp:700
NT Chaos3D(SpParMat3D< IT, NT, DER > &A3D)
Definition: MCL.cpp:424
double tIO
Definition: MCL.cpp:86
void Inflate3D(SpParMat3D< IT, NT, DER > &A3D, double power)
Definition: MCL.cpp:453
double mcl3d_3dmergetime
Definition: MCL.cpp:74
void ShowOptions()
Definition: MCL.cpp:312
double cblas_mergeconttime
Definition: MCL.cpp:82
double mcl3d_SUMMAtime
Definition: MCL.cpp:70
FullyDistVec< IT, IT > Interpret(SpParMat< IT, NT, DER > &A)
Definition: MCL.cpp:373
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
NT Reduce(_BinaryOperation __binary_op, NT identity) const
void iota(IT globalsize, NT first)
void Apply(_UnaryOperation __unary_op)
Definition: FullyDistVec.h:183
std::shared_ptr< SpParMat< IT, NT, DER > > GetLayerMat()
Definition: SpParMat3D.h:60
SpParMat< IT, NT, DER > Convert2D()
Definition: SpParMat3D.cpp:441
std::shared_ptr< CommGrid3D > getcommgrid3D() const
Definition: SpParMat3D.h:73
int nprocs
Definition: comms.cpp:55
long int64_t
Definition: compat.h:21
Definition: CCGrid.h:4
@ Column
Definition: SpDefs.h:115
void MCLPruneRecoverySelect(SpParMat< IT, NT, DER > &A, NT hardThreshold, IT selectNum, IT recoverNum, NT recoverPct, int kselectVersion)
Definition: ParFriends.h:186
void WriteMCLClusters(std::string ofName, FullyDistVec< IT, IT > clustIdForVtx, FullyDistVec< IT, std::array< char, MAXVERTNAME > > vtxLabels)
FullyDistVec< IT, IT > CC(SpParMat< IT, NT, DER > &A, IT &nCC)
Definition: CC.h:1405
double A
string ifilename
Definition: MCL.cpp:93
bool is64bInt
Definition: MCL.cpp:118
bool show
Definition: MCL.cpp:123
int layers
Definition: MCL.cpp:119
int randpermute
Definition: MCL.cpp:100
int64_t select
Definition: MCL.cpp:108
int phases
Definition: MCL.cpp:115
string ofilename
Definition: MCL.cpp:97
bool preprune
Definition: MCL.cpp:112
double recover_pct
Definition: MCL.cpp:110
int compute
Definition: MCL.cpp:120
bool remove_isolated
Definition: MCL.cpp:101
bool isInputMM
Definition: MCL.cpp:94
int base
Definition: MCL.cpp:95
int64_t recover_num
Definition: MCL.cpp:109
int perProcessMem
Definition: MCL.cpp:116
bool isDoublePrecision
Definition: MCL.cpp:117
int kselectVersion
Definition: MCL.cpp:111
double inflation
Definition: MCL.cpp:104
double prunelimit
Definition: MCL.cpp:107
Compute the maximum of two values.
Definition: Operations.h:155