16 vector<size_t> idx(v.size());
17 iota(idx.begin(), idx.end(), 0);
20 sort(idx.begin(), idx.end(),
21 [&v](
size_t i1,
size_t i2) {return v[i1] > v[i2];});
28double Fscore(
string mclFile,
string hipmclFile,
int base)
30 ifstream mclStream (mclFile);
31 ifstream hipmclStream (hipmclFile);
34 int64_t numMCLClusters, numHipMCLClusters;
38 vector<vector<int64_t>> mclClusters;
40 if (mclStream.is_open())
42 while ( getline (mclStream,line) )
44 istringstream iss(line);
45 if(clustID >= mclClusters.size())
47 mclClusters.resize(clustID * 2 + 1);
52 mclClusters[clustID].push_back(item);
55 nproteins += mclClusters[clustID].size();
59 numMCLClusters = clustID;
63 cout <<
"Unable to open " << mclFile << endl;
67 cout <<
"Number of clusters from MCL: " << numMCLClusters << endl;
68 vector<int64_t> clust2(nproteins, -1);
70 if (hipmclStream.is_open())
72 while ( getline (hipmclStream,line) )
74 istringstream iss(line);
78 clust2[item] = clustID;
81 cout <<
"The number of vertices in MCL and HipMCL outputs does not match. \n Please check if there are isolated vertices in HipMCL.\n Exiting.............." << endl;
88 numHipMCLClusters = clustID;
92 cout <<
"Unable to open " << hipmclFile << endl;
97 for(
int i=0; i<nproteins; i++)
100 clust2[i]=numHipMCLClusters++;
102 cout <<
"Number of clusters from HipMCL: " << numHipMCLClusters << endl;
104 vector<int64_t> clusterSizes2(numHipMCLClusters,0);
105 for(
int i=0; i<nproteins; i++)
107 clusterSizes2[clust2[i]]++;
111 vector<double> F(numMCLClusters);
112 vector<int64_t> nisect(numHipMCLClusters);
113 vector<int64_t> isect(numHipMCLClusters);
116 for(
int i=0; i<numMCLClusters; i++)
119 fill(nisect.begin(), nisect.end(), 0);
120 for(
int j=0; j<mclClusters[i].size(); j++)
122 int64_t item1 = mclClusters[i][j];
124 if(nisect[c2]==0) isect[isectCount++] = c2;
127 auto maxoverlap = max_element(nisect.begin(), nisect.end());
128 int64_t c2_max = distance(nisect.begin(), maxoverlap);
129 if(*maxoverlap!=mclClusters[i].
size() || *maxoverlap!=clusterSizes2[c2_max])
131 cout <<
"Mismatch# " << mismatch++ <<
":: MCL Cluster: "<< i <<
" Size: " << mclClusters[i].size() <<
" HipMCL Cluster: "<< c2_max <<
" Size: " << clusterSizes2[c2_max] <<
" last vertex in MCL output: " << mclClusters[i].back()<< endl;
135 for(
int j=0; j<isectCount; j++)
138 double precision = (double) nisect[c2] / clusterSizes2[c2];
139 double recall = (double) nisect[c2] / mclClusters[i].
size();
140 double Fij = 2 * precision * recall / (precision + recall);
144 Fi = Fi * mclClusters[i].size()/ nproteins;
147 return accumulate(F.begin(), F.end(), 0.0);
150int main(
int argc,
char* argv[])
153 string ifilename1 =
"";
154 string ifilename2 =
"";
160 cout <<
"Usage: ./fscore -M1 <MCLOut> -M2 <HipMCLOut> -base <Base of vertices (same as HipMCL) 1 or 0>\n";
161 cout <<
"Example: ./fscore -M1 input1.txt -M2 input2.txt -base 0 " << endl;
165 for (
int i = 1; i < argc; i++)
167 if (strcmp(argv[i],
"-M1")==0)
169 ifilename1 = string(argv[i+1]);
170 printf(
"\nMCL output file: %s",ifilename1.c_str());
172 else if (strcmp(argv[i],
"-M2")==0)
174 ifilename2 = string(argv[i+1]);
175 printf(
"\nHipMCL output file: %s",ifilename2.c_str());
177 else if (strcmp(argv[i],
"-base")==0)
179 base = atoi(argv[i+1]);
180 printf(
"\nbase: %d",base);
184 double F =
Fscore(ifilename1, ifilename2, base);
185 cout <<
"F score: " << F << endl;
int main(int argc, char *argv[])
double Fscore(string mclFile, string hipmclFile, int base)
vector< size_t > sortIndices(const vector< T > &v)
void iota(_ForwardIter __first, _ForwardIter __last, T __value)