15#include "Tommy/tommyhashdyn.h"
20#define BATCH 100000000
21#define MAXLINELENGTH 512
23#define MAXVERTNAME 128
25string chop_head(
const string & full_str,
const string & head_str)
27 if (full_str.compare(0, head_str.length(), head_str) == 0)
29 return full_str.substr(head_str.length(), string::npos);
33 cout <<
"String doesn't start with " << head_str << endl;
43 pair<vector<uint32_t>*, ofstream*> *
mypair = (pair<vector<uint32_t>*, ofstream*> *) arg;
44 vector<uint32_t> * shuffler =
mypair->first;
45 ofstream * out =
mypair->second;
54template <
typename IT1,
typename NT1,
typename IT2,
typename NT2>
55void push_to_vectors(vector<IT1> & rows, vector<IT1> & cols, vector<NT1> & vals, IT2 ii, IT2 jj, NT2 vv)
62template <
typename IT1,
typename NT1>
63void ProcessLines(vector<IT1> & rows, vector<IT1> & cols, vector<NT1> & vals, vector<string> & lines,
tommy_hashdyn & hashdyn,
const vector<uint32_t> & shuffler)
68 for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
71 sscanf(itr->c_str(),
"%s %s %lg", from, to, &vv);
83 vector<string>().swap(lines);
89 if ((*bytes_read) < bytes_requested) {
91 if (buf[(*bytes_read) - 1] !=
'\n') {
93 buf[(*bytes_read) - 1] =
'\n';
94 cout <<
"Error in input format, appending missing newline at end of file" << endl;
101bool FetchBatch(FILE * f_local,
long int & curpos,
long int end_fpos,
bool firstcall, vector<string> & lines)
103 size_t bytes2fetch =
BATCH;
104 bool begfile = (curpos == 0);
105 if(firstcall && (!begfile))
110 char * buf =
new char[bytes2fetch];
111 char * originalbuf = buf;
113 int seekfail = fseek(f_local, curpos, SEEK_SET);
115 cout <<
"fseek failed to move to " << curpos << endl;
117 int bytes_read = fread(buf,
sizeof(
char), bytes2fetch, f_local);
120 delete [] originalbuf;
124 if(firstcall && (!begfile))
136 cout <<
"Unexpected line without a break" << endl;
144 while(bytes_read > 0 && curpos < end_fpos)
146 char *c = (
char*)memchr(buf,
'\n', bytes_read);
148 delete [] originalbuf;
154 lines.push_back(
string(buf, n-1));
159 delete [] originalbuf;
160 if (curpos >= end_fpos)
return true;
164void MMConverter(
const string & filename, ofstream & dictout,
const string & outprefix)
167 if ((f = fopen(filename.c_str(),
"r")) == NULL)
169 printf(
"file can not be found\n");
175 if (stat(filename.c_str(), &st) == -1)
179 int64_t file_size = st.st_size;
180 cout <<
"File is " << file_size <<
" bytes" << endl;
181 long int ffirst = ftell(f);
182 long int fpos = ffirst;
183 long int end_fpos = file_size;
185 vector<string> lines;
186 bool finished =
FetchBatch(f, fpos, end_fpos,
true, lines);
187 int64_t entriesread = lines.size();
196 for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
199 sscanf(itr->c_str(),
"%s %s %lg", from, to, &vv);
219 vector<string>().swap(lines);
223 finished =
FetchBatch(f, fpos, end_fpos,
false, lines);
224 entriesread += lines.size();
225 cout <<
"entriesread: " << entriesread <<
", current vertex id: " << vertexid << endl;
231 for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
234 sscanf(itr->c_str(),
"%s %s %lg", from, to, &vv);
255 vector<string>().swap(lines);
257 cout <<
"There are " << vertexid <<
" vertices and " << entriesread <<
" edges" << endl;
261 uint32_t ranges[NSUBGRAPHS] = {vertexid, vertexid/2, vertexid/4, vertexid/8, vertexid/16, vertexid/32};
262 cout <<
"Printing submatrices with the following numbers of vertices: ";
263 copy(ranges, ranges+NSUBGRAPHS, ostream_iterator<uint32_t>(cout,
" ")); cout << endl;
267 vector< uint32_t > shuffler(nvertices);
268 iota(shuffler.begin(), shuffler.end(),
static_cast<uint32_t>(0));
269 random_shuffle ( shuffler.begin(), shuffler.end() );
271 pair< vector<uint32_t>*, ofstream*>
mypair(&shuffler, &dictout);
274 cout <<
"Shuffled and wrote dictionary " << endl;
280 long int fpos, end_fpos;
281 int this_thread = omp_get_thread_num();
282 int num_threads = omp_get_num_threads();
284 if(this_thread == 0) fpos = ffirst;
285 else fpos = this_thread * file_size / num_threads;
289 string names[NSUBGRAPHS];
290 ofstream outfiles[NSUBGRAPHS];
291 for(
int i= 0; i<NSUBGRAPHS; i++)
293 names[i] =
"Renamed_subgraph";
294 names[i] += std::to_string(i);
296 names[i] += outprefix;
297 names[i] += std::to_string(this_thread);
298 cout << names[i] << endl;
299 outfiles[i].open(names[i]);
304 name =
"Renamed_graph_";
306 name += std::to_string(this_thread);
311 if(this_thread != (num_threads-1)) end_fpos = (this_thread + 1) * file_size / num_threads;
312 else end_fpos = file_size;
314 FILE * f_perthread = fopen(filename.c_str(),
"rb");
315 vector<string> lines;
316 bool finished =
FetchBatch(f_perthread, fpos, end_fpos,
true, lines);
317 size_t nnz = lines.size();
318 vector<uint32_t> rows;
319 vector<uint32_t> cols;
321 ProcessLines(rows, cols, vals, lines, hashdyn, shuffler);
325 cout <<
"there are " << num_threads <<
" threads" << endl;
327 for(
int i= 0; i<NSUBGRAPHS; i++)
329 outfiles[i] <<
"%%MatrixMarket matrix coordinate real symmetric\n";
330 outfiles[i] << ranges[i] <<
"\t" << ranges[i] <<
"\t" << entriesread <<
"\n";
333 outfile <<
"%%MatrixMarket matrix coordinate real symmetric\n";
334 outfile << nvertices <<
"\t" << nvertices <<
"\t" << entriesread <<
"\n";
337 for(
size_t k=0; k< nnz; ++k)
340 for(
int i= 0; i<NSUBGRAPHS; i++)
342 if(rows[k] < ranges[i] && cols[k] < ranges[i])
343 outfiles[i] << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
346 outfile << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
356 finished =
FetchBatch(f_perthread, fpos, end_fpos,
false, lines);
358 ProcessLines(rows, cols, vals, lines, hashdyn, shuffler);
360 for(
size_t k=0; k< nnz; ++k)
363 for(
int i= 0; i<NSUBGRAPHS; i++)
365 if(rows[k] < ranges[i] && cols[k] < ranges[i])
366 outfiles[i] << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
369 outfile << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
377 for(
int i= 0; i<NSUBGRAPHS; i++)
void ProcessLines(vector< IT1 > &rows, vector< IT1 > &cols, vector< NT1 > &vals, vector< string > &lines, tommy_hashdyn &hashdyn, const vector< uint32_t > &shuffler)
void check_newline(int *bytes_read, int bytes_requested, char *buf)
void * shuffledprintfunc(void *arg, void *obj)
void MMConverter(const string &filename, ofstream &dictout, const string &outprefix)
bool FetchBatch(FILE *f_local, long int &curpos, long int end_fpos, bool firstcall, vector< string > &lines)
void push_to_vectors(vector< IT1 > &rows, vector< IT1 > &cols, vector< NT1 > &vals, IT2 ii, IT2 jj, NT2 vv)
string chop_head(const string &full_str, const string &head_str)
int compare(const void *arg, const void *obj)
string decompress_string(const string &str)
tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void *void_key, tommy_size_t key_len)
void tommy_hashdyn_foreach(tommy_hashdyn *hashdyn, tommy_foreach_func *func)
tommy_inline void * tommy_hashdyn_search(tommy_hashdyn *hashdyn, tommy_search_func *cmp, const void *cmp_arg, tommy_hash_t hash)
void tommy_hashdyn_init(tommy_hashdyn *hashdyn)
void tommy_hashdyn_done(tommy_hashdyn *hashdyn)
void tommy_hashdyn_insert(tommy_hashdyn *hashdyn, tommy_hashdyn_node *node, void *data, tommy_hash_t hash)
void tommy_hashdyn_foreach_arg(tommy_hashdyn *hashdyn, tommy_foreach_arg_func *func, void *arg)
void tommy_foreach_arg_func(void *arg, void *obj)
void iota(_ForwardIter __first, _ForwardIter __last, T __value)