33#ifndef __STDC_CONSTANT_MACROS
34#define __STDC_CONSTANT_MACROS
36#ifndef __STDC_LIMIT_MACROS
37#define __STDC_LIMIT_MACROS
48#include "CombBLAS/CombBLAS.h"
168 ostringstream runinfo;
169 runinfo <<
"\n======================================" << endl;
170 runinfo <<
"Running HipMCL with the parameters: " << endl;
171 runinfo <<
"======================================" << endl;
172 runinfo <<
"Input/Output file" << endl;
173 runinfo <<
" input filename: " << param.
ifilename << endl;
174 runinfo <<
" input file type: " ;
177 runinfo <<
" Matrix Market" << endl;
178 runinfo <<
" Base of the input matrix: " << param.
base << endl;
180 else runinfo <<
" Labeled Triples format" << endl;
181 runinfo <<
" Output filename: " << param.
ofilename << endl;
184 runinfo <<
"Preprocessing" << endl;
185 runinfo <<
" Remove isolated vertices? : ";
187 else runinfo <<
"no" << endl;
190 runinfo <<
" Randomly permute vertices? : ";
192 else runinfo <<
"no" << endl;
194 runinfo <<
"Inflation: " << param.
inflation << endl;
196 runinfo <<
"Pruning" << endl;
197 runinfo <<
" Prunelimit: " << param.
prunelimit << endl;
198 runinfo <<
" Recover number: " << param.
recover_num << endl;
199 runinfo <<
" Recover percent: " << ceil(param.
recover_pct*100) << endl;
200 runinfo <<
" Selection number: " << param.
select << endl;
201 runinfo <<
" Apply prune/select/recovery before the first iteration? : ";
202 if (param.
preprune) runinfo <<
"yes"<< endl;
203 else runinfo <<
"no" << endl;
213 runinfo <<
"HipMCL optimization" << endl;
214 runinfo <<
" Number of layers : " << param.
layers << endl;
215 runinfo <<
" Computation kernel : " << param.
compute << endl;
216 runinfo <<
" Number of phases: " << param.
phases << endl;
217 runinfo <<
" Memory avilable per process: ";
219 else runinfo <<
"not provided" << endl;
220 if(param.
isDoublePrecision) runinfo <<
"Using double precision floating point" << endl;
221 else runinfo <<
"Using single precision floating point" << endl;
222 if(param.
is64bInt ) runinfo <<
"Using 64 bit local indexing" << endl;
223 else runinfo <<
"Using 32 bit local indexing" << endl;
225 runinfo <<
"Debugging" << endl;
226 runinfo <<
" Show matrices after major steps? : ";
227 if (param.
show) runinfo <<
"yes";
228 else runinfo <<
"no" << endl;
229 runinfo <<
"======================================" << endl;
230 SpParHelper::Print(runinfo.str());
235 for (
int i = 1; i < argc; i++)
237 if (strcmp(argv[i],
"-M")==0){
240 else if (strcmp(argv[i],
"--matrix-market")==0){
243 else if (strcmp(argv[i],
"-o")==0){
246 else if (strcmp(argv[i],
"--show")==0){
249 else if (strcmp(argv[i],
"--remove-isolated")==0){
252 else if (strcmp(argv[i],
"--tournament-select")==0){
255 else if (strcmp(argv[i],
"--quick-select")==0){
259 else if (strcmp(argv[i],
"-I")==0){
262 }
else if (strcmp(argv[i],
"-p")==0) {
265 }
else if (strcmp(argv[i],
"-S")==0) {
266 param.
select = atoi(argv[i + 1]);
268 }
else if (strcmp(argv[i],
"-R")==0) {
271 }
else if (strcmp(argv[i],
"-pct")==0)
275 }
else if (strcmp(argv[i],
"-base")==0) {
276 param.
base = atoi(argv[i + 1]);
278 else if (strcmp(argv[i],
"-rand")==0) {
281 else if (strcmp(argv[i],
"--preprune")==0) {
284 else if (strcmp(argv[i],
"-layers")==0) {
285 param.
layers = atoi(argv[i + 1]);
287 else if (strcmp(argv[i],
"-compute")==0) {
288 param.
layers = atoi(argv[i + 1]);
290 else if (strcmp(argv[i],
"-phases")==0) {
291 param.
phases = atoi(argv[i + 1]);
293 else if (strcmp(argv[i],
"-per-process-mem")==0) {
296 else if (strcmp(argv[i],
"--single-precision")==0) {
299 else if (strcmp(argv[i],
"--32bit-local-index")==0) {
314 ostringstream runinfo;
316 runinfo <<
"Usage: ./hipmcl -M <input filename> -I <inlfation> (required)" << endl;
318 runinfo <<
"======================================" << endl;
319 runinfo <<
" Detail parameter options " << endl;
320 runinfo <<
"======================================" << endl;
324 runinfo <<
"Input/Output file" << endl;
325 runinfo <<
" -M <input file name (labeled triples format)> (mandatory)" << endl;
326 runinfo <<
" --matrix-market : if provided, the input file is in the matrix market format (default: the file is in labeled triples format)" << endl;
327 runinfo <<
" -base <index of the first vertex in the matrix market file, 0|1> (default: 1) " << endl;
328 runinfo <<
" -o <output filename> (default: input_file_name.hipmcl )" << endl;
330 runinfo <<
"Inflation" << endl;
331 runinfo <<
"-I <inflation> (mandatory)\n";
333 runinfo <<
"Preprocessing" << endl;
334 runinfo <<
" -rand <randomly permute vertices> (default:0)\n";
335 runinfo <<
" --remove-isolated : if provided, remove isolated vertices (default: don't remove isolated vertices)\n";
338 runinfo <<
"Pruning" << endl;
339 runinfo <<
" -p <cutoff> (default: 1/10000)\n";
340 runinfo <<
" -R <recovery number> (default: 1400)\n";
341 runinfo <<
" -pct <recovery pct> (default: 90)\n";
342 runinfo <<
" -S <selection number> (default: 1100)\n";
343 runinfo <<
" --preprune : if provided, apply prune/select/recovery before the first iteration (needed when dense columns are present) (default: don't preprune. However, if the average nonzero per column is larger than max{S,R}, prepruning is still applied by default)\n";
345 runinfo <<
"HipMCL optimization" << endl;
346 runinfo <<
" -layers <number of layers> (default:1)\n";
347 runinfo <<
" -compute <1 or 2> (default:1)\n";
348 runinfo <<
" -phases <number of phases> (default:1)\n";
349 runinfo <<
" -per-process-mem <memory (GB) available per process> (default:0, number of phases is not estimated)\n";
350 runinfo <<
" --single-precision (if not provided, use double precision floating point numbers)\n" << endl;
351 runinfo <<
" --32bit-local-index (if not provided, use 64 bit indexing for vertex ids)\n" << endl;
353 runinfo <<
"Debugging" << endl;
354 runinfo <<
" --show: show information about matrices after major steps (default: do not show matrices)" << endl;
358 runinfo <<
"======================================" << endl;
359 runinfo <<
" Few examples " << endl;
360 runinfo <<
"======================================" << endl;
361 runinfo <<
"Example with with a graph in labeled triples format on a laptop with 8GB memory and 8 cores:\nexport OMP_NUM_THREADS=8\nbin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 8" << endl;
362 runinfo <<
"Same as above with 4 processes and 2 theaded per process cores:\nexport OMP_NUM_THREADS=2\nmpirun -np 4 bin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 2" << endl;
363 runinfo <<
"Example with a graph in matrix market format:\nbin/hipmcl -M data/sevenvertex.mtx --matrix-market -base 1 -I 2 -per-process-mem 8" << endl;
365 runinfo <<
"Example on the NERSC/Cori system with 16 nodes, 4 process per node and 16 threads per process: \nsrun -N 16 -n 64 -c 16 bin/hipmcl -M data/hep-th.mtx --matrix-market -base 1 -per-process-mem 27 -o hep-th.hipmcl" << endl;
366 SpParHelper::Print(runinfo.str());
372template <
typename IT,
typename NT,
typename DER>
382 SpParHelper::Print(
"Finding connected components....\n");
389template <
typename IT,
typename NT,
typename DER>
394 A.DimApply(
Column, colsums, multiplies<NT>());
397template <
typename IT,
typename NT,
typename DER>
401 std::shared_ptr< SpParMat<IT, NT, DER> > ALayer = A3D.
GetLayerMat();
404 ALayer->DimApply(
Column, colsums, multiplies<NT>());
407template <
typename IT,
typename NT,
typename DER>
418 colmaxs.
EWiseApply(nnzPerColumn, multiplies<NT>());
423template <
typename IT,
typename NT,
typename DER>
427 std::shared_ptr< SpParMat<IT, NT, DER> > ALayer = A3D.
GetLayerMat();
437 colmaxs.
EWiseApply(nnzPerColumn, multiplies<NT>());
442 MPI_Allreduce( &layerChaos, &totalChaos, 1, MPIType<NT>(), MPI_MAX, A3D.
getcommgrid3D()->GetFiberWorld());
446template <
typename IT,
typename NT,
typename DER>
452template <
typename IT,
typename NT,
typename DER>
456 std::shared_ptr< SpParMat<IT, NT, DER> > ALayer = A3D.
GetLayerMat();
463template <
typename IT,
typename NT,
typename DER>
469 A.Apply([](
NT val){
return val==numeric_limits<NT>::min() ? 1.0 : val;});
472 outs <<
"Adjusting loops" << endl;
473 SpParHelper::Print(outs.str());
476template <
typename IT,
typename NT,
typename DER>
482 IT numIsolated =
A.getnrow() - nonisov.TotalLength();
483 outs <<
"Number of isolated vertices: " << numIsolated << endl;
484 SpParHelper::Print(outs.str());
486 A(nonisov, nonisov,
true);
487 SpParHelper::Print(
"Removed isolated vertices.\n");
496template <
typename IT,
typename NT,
typename DER>
500 if(
A.getnrow() ==
A.getncol())
503 p.
iota(
A.getnrow(), 0);
506 SpParHelper::Print(
"Applied symmetric permutation.\n");
510 SpParHelper::Print(
"Rectangular matrix: Can not apply symmetric permutation.\n");
514template <
typename IT,
typename NT,
typename DER>
528 SpParHelper::Print(
"Made stochastic\n");
533 IT avgDegree = nnz/nv;
536 SpParHelper::Print(
"Average degree of the input graph is greater than max{S,R}.\n");
541 SpParHelper::Print(
"Applying the prune/select/recovery logic before the first iteration\n\n");
571 double t1 = MPI_Wtime();
574 A = MemEfficientSpGEMM<PTFF, NT, DER>(
A,
A, param.
phases, param.
prunelimit, (
IT)param.
select, (
IT)param.
recover_num, param.
recover_pct, param.
kselectVersion, 1, param.
perProcessMem);
577 A3D_cs = MemEfficientSpGEMM3D<PTFF, NT, DER, IT, NT, NT, DER, DER >(
596 tExpand += (MPI_Wtime() - t1);
600 SpParHelper::Print(
"After expansion\n");
606 double tInflate1 = MPI_Wtime();
613 tInflate += (MPI_Wtime() - tInflate1);
617 SpParHelper::Print(
"After inflation\n");
623 double newbalance =
A.LoadImbalance();
624 double t3=MPI_Wtime();
626 s <<
"Iteration# " << setw(3) << it <<
" : " <<
" chaos: " << setprecision(3) << chaos <<
" load-balance: "<< newbalance <<
" Time: " << (t3-t1) << endl;
627 SpParHelper::Print(s.str());
636 double tcc1 = MPI_Wtime();
644 if(param.
layers == 1) ADouble =
A;
650 double tcc = MPI_Wtime() - tcc1;
652 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
656 cout <<
"================detailed timing==================" << endl;
665 cout <<
"Inflation " << tInflate << endl;
666 cout <<
"Component: " << tcc << endl;
667 cout <<
"File I/O: " <<
tIO << endl;
668 cout <<
"=================================================" << endl;
671 cout <<
"================detailed timing==================" << endl;
684 cout <<
"Inflation " << tInflate << endl;
685 cout <<
"Component: " << tcc << endl;
686 cout <<
"File I/O: " <<
tIO << endl;
687 cout <<
"=================================================" << endl;
699template <
typename IT,
typename NT,
typename DER>
706 SpParHelper::Print(
"Symmatricizing an unsymmetric input matrix.\n");
711template <
typename GIT,
typename LIT,
typename NT>
719 SpParHelper::Print(
"Reading input file......\n");
721 double tIO1 = MPI_Wtime();
727 tIO = MPI_Wtime() - tIO1;
729 outs <<
" : took " <<
tIO <<
" seconds" << endl;
730 SpParHelper::Print(outs.str());
734 double balance =
A.LoadImbalance();
739 GIT nnz =
A.getnnz();
740 GIT nv =
A.getnrow();
741 outs <<
"Number of vertices: " << nv <<
" number of edges: "<< nnz << endl;
743 outs <<
"Load balance: " << balance << endl;
744 SpParHelper::Print(outs.str());
764 double tstart = MPI_Wtime();
780 double tend = MPI_Wtime();
782 s2 <<
"Number of clusters: " << nclusters << endl;
783 s2 <<
"Total time: " << (tend-tstart) << endl;
784 s2 <<
"=================================================\n" << endl ;
785 SpParHelper::Print(s2.str());
790int main(
int argc,
char* argv[])
793 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
794 if (provided < MPI_THREAD_SERIALIZED)
796 printf(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
797 MPI_Abort(MPI_COMM_WORLD, 1);
804 nthreads = omp_get_num_threads();
809 MPI_Comm_size(MPI_COMM_WORLD,&
nprocs);
810 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
823 SpParHelper::Print(
"Required options are missing.\n");
836 cout <<
"******** Number of phases will not be estimated as -per-process-mem option is not supplied. It is highly recommended that you provide -per-process-mem option for large-scale runs. *********** " << endl;
844 MainBody<int64_t, int64_t, double>(param);
846 MainBody<int64_t, int32_t, double>(param);
849 MainBody<int64_t, int64_t, float>(param);
851 MainBody<int64_t, int32_t, float>(param);
double mcl_localspgemmtime
double mcl_multiwaymergetime
double mcl_prunecolumntime
double mcl3d_symbolictime
int main(int argc, char *argv[])
void ShowParam(HipMCLParam ¶m)
double mcl3d_SUMMAmergetime
void RandPermute(SpParMat< IT, NT, DER > &A, HipMCLParam ¶m)
double cblas_transvectime
void MakeColStochastic(SpParMat< IT, NT, DER > &A)
double cblas_localspmvtime
void ProcessParam(int argc, char *argv[], HipMCLParam ¶m)
void InitParam(HipMCLParam ¶m)
FullyDistVec< IT, IT > HipMCL(SpParMat< IT, NT, DER > &A, HipMCLParam ¶m)
double cblas_allgathertime
double cblas_alltoalltime
void AdjustLoops(SpParMat< IT, NT, DER > &A)
double mcl3d_reductiontime
double mcl3d_conversiontime
void RemoveIsolated(SpParMat< IT, NT, DER > &A, HipMCLParam ¶m)
void Inflate(SpParMat< IT, NT, DER > &A, double power)
void MakeColStochastic3D(SpParMat3D< IT, NT, DER > &A3D)
NT Chaos(SpParMat< IT, NT, DER > &A)
double mcl3d_localspgemmtime
void MainBody(HipMCLParam ¶m)
void Symmetricize(SpParMat< IT, NT, DER > &A)
NT Chaos3D(SpParMat3D< IT, NT, DER > &A3D)
void Inflate3D(SpParMat3D< IT, NT, DER > &A3D, double power)
double cblas_mergeconttime
FullyDistVec< IT, IT > Interpret(SpParMat< IT, NT, DER > &A)
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
NT Reduce(_BinaryOperation __binary_op, NT identity) const
void iota(IT globalsize, NT first)
void Apply(_UnaryOperation __unary_op)
std::shared_ptr< SpParMat< IT, NT, DER > > GetLayerMat()
SpParMat< IT, NT, DER > Convert2D()
std::shared_ptr< CommGrid3D > getcommgrid3D() const
void MCLPruneRecoverySelect(SpParMat< IT, NT, DER > &A, NT hardThreshold, IT selectNum, IT recoverNum, NT recoverPct, int kselectVersion)
void WriteMCLClusters(std::string ofName, FullyDistVec< IT, IT > clustIdForVtx, FullyDistVec< IT, std::array< char, MAXVERTNAME > > vtxLabels)
FullyDistVec< IT, IT > CC(SpParMat< IT, NT, DER > &A, IT &nCC)
Compute the maximum of two values.