COMBINATORIAL_BLAS 1.6
mpipspgemm.cpp
Go to the documentation of this file.
1#include <mpi.h>
2#include <sys/time.h>
3#include <iostream>
4#include <functional>
5#include <algorithm>
6#include <vector>
7#include <string>
8#include <sstream>
9#include <stdint.h>
10#include <cmath>
11#include "CombBLAS/CombBLAS.h"
12#include "Glue.h"
13#include "CCGrid.h"
14#include "Reductions.h"
15#include "Multiplier.h"
16#include "SplitMatDist.h"
17
18
19using namespace std;
20using namespace combblas;
21
31
32#define ITERS 5
33
34int main(int argc, char *argv[])
35{
36 int provided;
37
38 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
39 if (provided < MPI_THREAD_SERIALIZED)
40 {
41 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
42 MPI_Abort(MPI_COMM_WORLD, 1);
43 }
44
45
46
47 int nprocs, myrank;
48 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
49 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
50
51 if(argc < 8)
52 {
53 if(myrank == 0)
54 {
55 printf("Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> <algo>\n");
56 printf("Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> <matB> <algo>\n");
57 printf("Example: ./mpipspgemm 4 4 2 ER 19 16 outer\n");
58 printf("Example: ./mpipspgemm 4 4 2 Input matA.mtx matB.mtx column\n");
59 printf("Type ER: Erdos-Renyi\n");
60 printf("Type SSCA: R-MAT with SSCA benchmark parameters\n");
61 printf("Type G500: R-MAT with Graph500 benchmark parameters\n");
62 printf("algo: outer | column \n");
63 }
64 return -1;
65 }
66
67
68 unsigned GRROWS = (unsigned) atoi(argv[1]);
69 unsigned GRCOLS = (unsigned) atoi(argv[2]);
70 unsigned C_FACTOR = (unsigned) atoi(argv[3]);
71 CCGrid CMG(C_FACTOR, GRCOLS);
72 int nthreads;
73#pragma omp parallel
74 {
75 nthreads = omp_get_num_threads();
76 }
77
78
79 if(GRROWS != GRCOLS)
80 {
81 SpParHelper::Print("This version of the Combinatorial BLAS only works on a square logical processor grid\n");
82 MPI_Barrier(MPI_COMM_WORLD);
83 MPI_Abort(MPI_COMM_WORLD, 1);
84 }
85
86 int layer_length = GRROWS*GRCOLS;
87 if(layer_length * C_FACTOR != nprocs)
88 {
89 SpParHelper::Print("The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
90 MPI_Barrier(MPI_COMM_WORLD);
91 MPI_Abort(MPI_COMM_WORLD, 1);
92 }
93
94 {
95 SpDCCols<int64_t, double> splitA, splitB;
97 string type;
98 shared_ptr<CommGrid> layerGrid;
99 layerGrid.reset( new CommGrid(CMG.layerWorld, 0, 0) );
100 FullyDistVec<int64_t, int64_t> p(layerGrid); // permutation vector defined on layers
101
102 if(string(argv[4]) == string("input")) // input option
103 {
104 string fileA(argv[5]);
105 string fileB(argv[6]);
106
107 double t01 = MPI_Wtime();
108 SpDCCols<int64_t, double> *A = ReadMat<double>(fileA, CMG, true, p);
109 SpDCCols<int64_t, double> *B = ReadMat<double>(fileB, CMG, true, p);
110 SplitMat(CMG, A, splitA, false);
111 SplitMat(CMG, B, splitB, true); //row-split
112 if(myrank == 0) cout << "Matrices read and replicated along layers : time " << MPI_Wtime() - t01 << endl;
113 }
114 else
115 {
116 unsigned scale = (unsigned) atoi(argv[5]);
117 unsigned EDGEFACTOR = (unsigned) atoi(argv[6]);
118 double initiator[4];
119 if(string(argv[4]) == string("ER"))
120 {
121 initiator[0] = .25;
122 initiator[1] = .25;
123 initiator[2] = .25;
124 initiator[3] = .25;
125 }
126 else if(string(argv[4]) == string("G500"))
127 {
128 initiator[0] = .57;
129 initiator[1] = .19;
130 initiator[2] = .19;
131 initiator[3] = .05;
132 EDGEFACTOR = 16;
133 }
134 else if(string(argv[4]) == string("SSCA"))
135 {
136 initiator[0] = .6;
137 initiator[1] = .4/3;
138 initiator[2] = .4/3;
139 initiator[3] = .4/3;
140 EDGEFACTOR = 8;
141 }
142 else {
143 if(myrank == 0)
144 printf("The initiator parameter - %s - is not recognized.\n", argv[5]);
145 MPI_Abort(MPI_COMM_WORLD, 1);
146 }
147
148
149 double t01 = MPI_Wtime();
150 SpDCCols<int64_t, double> *A = GenMat<int64_t,double>(CMG, scale, EDGEFACTOR, initiator, true);
151 SpDCCols<int64_t, double> *B = GenMat<int64_t,double>(CMG, scale, EDGEFACTOR, initiator, true);
152
153 SplitMat(CMG, A, splitA, false);
154 SplitMat(CMG, B, splitB, true); //row-split
155 if(myrank == 0) cout << "RMATs Generated and replicated along layers : time " << MPI_Wtime() - t01 << endl;
156
157 }
158
159 int64_t globalnnzA=0, globalnnzB=0;
160 int64_t localnnzA = splitA.getnnz();
161 int64_t localnnzB = splitB.getnnz();
162 MPI_Allreduce( &localnnzA, &globalnnzA, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
163 MPI_Allreduce( &localnnzB, &globalnnzB, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
164 if(myrank == 0) cout << "After split: nnzA= " << globalnnzA << " & nnzB= " << globalnnzB;
165
166
167
168 type = string(argv[7]);
169 if(myrank == 0)
170 {
171 printf("\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads);
172 printf(" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
173 }
174 if(type == string("outer"))
175 {
176 splitB.Transpose(); //locally transpose for outer product
177 for(int k=0; k<ITERS; k++)
178 {
179 splitC = multiply(splitA, splitB, CMG, true, false); // outer product
180 delete splitC;
181 }
182 }
183
184 else // default column-threaded
185 {
186 for(int k=0; ITERS>0 && k<ITERS-1; k++)
187 {
188 splitC = multiply(splitA, splitB, CMG, false, true);
189 delete splitC;
190 }
191 splitC = multiply(splitA, splitB, CMG, false, true);
192 int64_t nnzC=0;
193 int64_t localnnzC = splitC->getnnz();
194 MPI_Allreduce( &localnnzC, &nnzC, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
195 if(myrank == 0) cout << "\n After multiplication: nnzC= " << nnzC << endl << endl;
196 delete splitC;
197
198 }
199 }
200
201 MPI_Finalize();
202 return 0;
203}
204
205
#define EDGEFACTOR
Definition: DirOptBFS.cpp:81
Definition: test.cpp:53
MPI_Comm layerWorld
Definition: CCGrid.h:41
int GridLayers
Definition: CCGrid.h:36
IT getnnz() const
Definition: SpDCCols.h:308
void Transpose()
Mutator version, replaces the calling object.
Definition: SpDCCols.cpp:854
int nprocs
Definition: comms.cpp:55
long int64_t
Definition: compat.h:21
int main(int argc, char *argv[])
Definition: mpipspgemm.cpp:34
double comm_reduce
Definition: mpipspgemm.cpp:23
double comp_split
Definition: mpipspgemm.cpp:28
#define ITERS
Definition: mpipspgemm.cpp:32
double comm_bcast
Definition: mpipspgemm.cpp:22
double comm_split
Definition: mpipspgemm.cpp:30
double comp_reduce
Definition: mpipspgemm.cpp:25
double comp_result
Definition: mpipspgemm.cpp:26
double comp_trans
Definition: mpipspgemm.cpp:29
double comp_summa
Definition: mpipspgemm.cpp:24
double comp_reduce_layer
Definition: mpipspgemm.cpp:27
Definition: CCGrid.h:4
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
Definition: SplitMatDist.h:144
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition: Multiplier.h:11
MPI_Datatype MPIType< int64_t >(void)
Definition: MPIType.cpp:64
double A