higher-ordering cluster的C语言实现

时间:2022-09-19 19:02:36
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<string.h>

#define INITIAL_SIZE100
#define INCREMENT_SIZE 100

int vertax_size;
int edge_size;
char filename_edge[20];
typedef struct Node{
double value;
int index;
}Node;
Node* eigenvalue;
Node* ordering;
double** eigenvector;
double* second_small_eigenvector;
double** D_inverse_matrix;
int** edge_info;
int** adjacentMatrix;
typedef struct Motif_Node{
int x;
int y;
int z;
}Motif_Node;
typedef struct Motif_Set{
Motif_Node* data;
int length;
int capacity;
}Motif_Set;
Motif_Set motif_set;

void readEigenvalue();
void readEigenVector();
void generate_sigma();
void read_D_inverse_matrix();
void readEdge();

void find_motifs();
int* get_neighbors(int, int*);
int* get_common(int*, int*, int, int);
void initial_motif_set();
void identify_motif(int, int, int*);
void test();
int calcu_cut(int*, int, int*, int);
int searchfor(int, int*, int);
void calcu_volume(int*, int, int*, int*, int, int*);
void test_1();

void singleCluster();

main(int argc, char* argv[])
{
if( argc != 4 )
{
printf("This algorithm require 2 parameters"
"\n\t1, the size of vertax"
"\n\t2, the file name that contain edge inforamtion"
"\n\t3, the size of edge"
"\n");
exit(0);
}
vertax_size = atoi(argv[1]);
strcat(filename_edge, argv[2]);
edge_size = atoi(argv[3]);
readEigenvalue();
readEigenVector();
read_D_inverse_matrix();
generate_sigma();

readEdge();
find_motifs();
singleCluster();
return 0;
}

void readEigenvalue()
{
FILE* fread;
if( NULL == (fread = fopen("eigenvalue.data", "r")))
{
printf("function: readEigenvalue: open file(eigenvalue.data) error");
exit(0);
}
int i;
if( !(eigenvalue = (Node*)malloc(sizeof(Node) * (vertax_size + 1))))
{
printf("function: readEigenvalue: eigenvalue* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
{
if( 1 != fscanf(fread, "%lf ", &eigenvalue[i].value))
{
printf("function: readEigenvalue: fscanf error: %d", i);
exit(0);
}
}
}
void readEigenVector()
{
FILE* fread;
if( NULL == (fread = fopen("eigenvector.data", "r")))
{
printf("function: readEigenVector: open file(eigenvector.data) error");
exit(0);
}
if( !(eigenvector = (double**)malloc(sizeof(double*) * (vertax_size + 1))) )
{
printf("function: readEigenvector: eigenvector** malloc error");
exit(0);
}
int i;
for( i = 1; i <= vertax_size; i++ )
{
if( !( eigenvector[i] = (double*)malloc(sizeof(double) * (vertax_size + 1)) ) )
{
printf("function: readEigenvector: eigenvector[%d]* malloc error: ", i);
exit(0);
}
}
int j;
for( i = 1; i <= vertax_size; i++ )
{
for( j = 1; j <= vertax_size; j++ )
{
if( 1 != fscanf(fread, "%lf ", &eigenvector[i][j]))
{
printf("function: readEigenvector: fscanf error: (%d, %d)", i, j);
exit(0);
}
}
}
}
void read_D_inverse_matrix()
{
FILE* fread;
if( NULL == (fread = fopen("D_inverse_matrix.data", "r")) )
{
printf("function: read_D: open file error");
exit(0);
}
if( !(D_inverse_matrix = (double**)malloc(sizeof(double*) * (vertax_size + 1))) )
{
printf("function: read_D: D_inverse_matrix** malloc error");
exit(0);
}
int i;
for( i = 1; i <= vertax_size; i++ )
if( !(D_inverse_matrix[i] = (double*)malloc(sizeof(double) * (vertax_size + 1))) )
{
printf("function: read_D: D_inverse_matrix[%d]* malloc error");
exit(0);
}
int j;
for( i = 1; i <= vertax_size; i++ )
{
for( j = 1; j <= vertax_size; j++ )
{
if( 1 != fscanf(fread, "%lf ", &D_inverse_matrix[i][j]))
{
printf("function: read_D_inverse_matrix: fscanf error (%d, %d)", i, j);
exit(0);
}
}
}
fclose(fread);
}
void generate_sigma()
{
int i, j;
double smaller_value, temp;
int smaller_index, temp_index;
for( i = 1; i <= vertax_size; i++ )
eigenvalue[i].index = i;
for( i = 1; i < vertax_size; i++ )
{
smaller_index = i;
smaller_value = eigenvalue[i].value;
for( j = i + 1; j <= vertax_size; j++ )
if( eigenvalue[j].value < eigenvalue[smaller_index].value )
smaller_index = j;
if( smaller_index != i )
{
temp = eigenvalue[smaller_index].value;
temp_index = eigenvalue[smaller_index].index;
eigenvalue[smaller_index].value = eigenvalue[i].value;
eigenvalue[smaller_index].index = eigenvalue[i].index;
eigenvalue[i].value = temp;
eigenvalue[i].index = temp_index;
}
}

if( !(second_small_eigenvector = (double*)malloc(sizeof(double) * (vertax_size + 1))) )
{
printf("function: generate_sigma: second_small_eigenvector* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
second_small_eigenvector[i] = eigenvector[i][eigenvalue[2].index];

if( !(ordering = (Node*)malloc(sizeof(Node) * (vertax_size + 1))) )
{
printf("function: generate_sigma: ordering* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
ordering[i].index = i;
for( i = 1; i <= vertax_size; i++ )
{
for( j = 1; j <= vertax_size; j++ )
{
ordering[i].value += D_inverse_matrix[i][j] * second_small_eigenvector[j];
}
}

for( i = 1; i < vertax_size; i++ )
{
smaller_index = i;
smaller_value = ordering[i].value;
for( j = i + 1; j <= vertax_size; j++ )
if( ordering[j].value < ordering[smaller_index].value )
smaller_index = j;
if( smaller_index != i )
{
temp = ordering[i].value;
ordering[i].value = ordering[smaller_index].value;
ordering[smaller_index].value = temp;
temp_index = ordering[i].index;
ordering[i].index = ordering[smaller_index].index;
ordering[smaller_index].index = temp_index;
}
}

printf("show the sigma value: \n");
for( i = 1; i <= vertax_size; i++ )
printf("%d ", ordering[i].index);
printf("\n");
}

void readEdge()
{
FILE* fread;
if( NULL == (fread = fopen(filename_edge, "r")))
{
printf("function: readEdge: open file(%s) error", filename_edge);
exit(0);
}
if( !(edge_info = (int**)malloc(sizeof(int*) * (edge_size + 1))) )
{
printf("function: readEdge: edge_info** malloc error");
exit(0);
}
int i, j;
for( i = 1; i <= edge_size; i++ )
{
if( !(edge_info[i] = (int*)malloc(sizeof(int) * (2 + 1))) )
{
printf("function: readEdge: edge_info[%d]* malloc error", i);
exit(0);
}
}
for( i = 1; i <= edge_size; i++ )
{
if( 2 != fscanf(fread, "%d\t%d", &edge_info[i][1], &edge_info[i][2]))
{
printf("function: readEdge: fscanf error: %d", i);
exit(0);
}
}
fclose(fread);

/*create the adjacent matrix*/
if( !(adjacentMatrix = (int**)malloc(sizeof(int*) * (vertax_size + 1))) )
{
printf("function: readEdge: adjacentMatrix** malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
if( !(adjacentMatrix[i] = (int*)malloc(sizeof(int) * (vertax_size + 1))) )
{
printf("f: readEdge: adjacentMatrix[%d]* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
for( j = 1; j <= vertax_size; j++ )
adjacentMatrix[i][j] = 0;
for( i = 1; i <= edge_size; i++ )
{
adjacentMatrix[edge_info[i][1]][edge_info[i][2]] = 1;
//adjacentMatrix[edge_info[i][2]][edge_info[i][1]] = 1;
}
}


void find_motifs()
{
int i, j;
int neighbor_length_0 = 0;
int neighbor_length_1 = 0;
int* neighbor_0;
int* neighbor_1;
int* neighbor_2;
int* common_neighbors;
initial_motif_set();
for( i = 1; i <= vertax_size; i++ )
{
neighbor_0 = get_neighbors(i, &neighbor_length_0);//得到节点i的所有的邻居和邻居的个数
printf("Node: %3d ---- the length : %d\n", i, neighbor_length_0);
for( j = 1; j <= neighbor_length_0; j++ )
{
neighbor_1 = get_neighbors(neighbor_0[j], &neighbor_length_1);//得到i的第j个邻居的所有邻居和邻居的个数
common_neighbors = get_common(neighbor_0, neighbor_1, neighbor_length_0, neighbor_length_1);//求共同邻居
identify_motif(i, neighbor_0[j], common_neighbors);//求motif的数量
}
}
}
void initial_motif_set()
{
if( !(motif_set.data = (Motif_Node*)malloc(sizeof(Motif_Node) * (INITIAL_SIZE + 1))) )
{
printf("F: initial_motif_set: motif_set.data* malloc error");
exit(0);
}
motif_set.length = 0;
motif_set.capacity = INITIAL_SIZE;
}
int* get_neighbors(int vertaxID, int* length)
{
int* neighborSet;
int i;
*length = 0;
for( i = vertaxID + 1; i <= vertax_size; i++ )
if( 1 == adjacentMatrix[vertaxID][i] || 1 == adjacentMatrix[i][vertaxID] )
(*length)++;
//printf("length: %d\n", *length);
if( !(neighborSet = (int*)malloc(sizeof(int) * (*length + 1))) )
{
printf("fun: get_neighbors: neighborSet* malloc error");
exit(0);
}
int counter = 1;
for( i = vertaxID + 1; i <= vertax_size; i++ )
if( 1 == adjacentMatrix[vertaxID][i] || 1 == adjacentMatrix[i][vertaxID] )
neighborSet[counter++] = i;

return neighborSet;
}

int* get_common(int* set_1, int* set_2, int length_1, int length_2)
{
int* lib;
if( !(lib = (int*)malloc(sizeof(int) * (vertax_size + 1))) )
{
printf("function: get_common: lib* malloc error");
exit(0);
}
int i, counter, common_counter;
counter = common_counter = 0;
for( i = 1; i <= vertax_size; i++ )
lib[i] = 0;
for( i = 1; i <= length_1; i++ )
lib[set_1[i]]++;
for( i = 1; i <= length_2; i++ )
lib[set_2[i]]++;
for( i = 1; i <= vertax_size; i++ )
if( 2 == lib[i] )
common_counter++;
int* common_neighbor;
if( !(common_neighbor = (int*)malloc(sizeof(int) * (common_counter + 1))) )
{
printf("f: get_common: common_neighbor* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
if( 2 == lib[i] )
common_neighbor[++counter] = i;
common_neighbor[0] = counter;
return common_neighbor;
}
void identify_motif(int vertaxID_1, int vertaxID_2, int* commonSet)
{
int i, addFlag;
for( i = 1; i <= commonSet[0]; i++ )
{
addFlag = 0;
if( adjacentMatrix[vertaxID_1][vertaxID_2] * adjacentMatrix[vertaxID_2][vertaxID_1] )
{
if( adjacentMatrix[vertaxID_1][commonSet[i]] * adjacentMatrix[vertaxID_2][commonSet[i]] && !(adjacentMatrix[commonSet[i]][vertaxID_1] + adjacentMatrix[commonSet[i]][vertaxID_2]) )
addFlag = 1;
}
else if( adjacentMatrix[vertaxID_1][commonSet[i]] * adjacentMatrix[commonSet[i]][vertaxID_1] )
{
if( adjacentMatrix[vertaxID_1][vertaxID_2] * adjacentMatrix[commonSet[i]][vertaxID_2] && !(adjacentMatrix[vertaxID_2][vertaxID_1] + adjacentMatrix[vertaxID_2][commonSet[i]]) )
addFlag = 1;
}
else if( adjacentMatrix[vertaxID_2][commonSet[i]] * adjacentMatrix[commonSet[i]][vertaxID_2] )
{
if( adjacentMatrix[vertaxID_2][vertaxID_1] * adjacentMatrix[commonSet[i]][vertaxID_1] && !(adjacentMatrix[vertaxID_1][vertaxID_2] + adjacentMatrix[vertaxID_1][commonSet[i]]) )
addFlag = 1;
}
if( 1 == addFlag )
{
if( motif_set.length >= motif_set.capacity )
{
if( !(motif_set.data = (Motif_Node*)realloc(motif_set.data, sizeof(Motif_Node) * (motif_set.capacity + INCREMENT_SIZE))) )
{
printf("F: indentify_motif: motif_set.data* realloc error");
exit(0);
}
motif_set.capacity += INCREMENT_SIZE;
}
motif_set.length++;
motif_set.data[motif_set.length].x = vertaxID_1;
motif_set.data[motif_set.length].y = vertaxID_2;
motif_set.data[motif_set.length].z = commonSet[i];
}
}
}


calcu_cut(int* set_1, int length_1, int* set_2, int length_2)
{
int cut_size = 0;
int i, j;
int small_index;
int small_value;
int temp;

length_1 -= 1;
length_2 -= 1;

for( i = 1; i < length_1; i++ )//all the operator of -1 is because of useless index 0
{
small_index = i;
small_value = set_1[i];
for( j = i + 1; j <= length_1; j++ )
if( set_1[j] < set_1[small_index] )
small_index = j;
if( i != small_index )
{
temp = set_1[i];
set_1[i] = set_1[small_index];
set_1[small_index] = temp;
}
}

for( i = 1; i <= motif_set.length; i++ )
{
if( 3 != searchfor(motif_set.data[i].x, set_1, length_1) + searchfor(motif_set.data[i].y, set_1, length_1) + searchfor(motif_set.data[i].z, set_1, length_1) && 0 != searchfor(motif_set.data[i].x, set_1, length_1) + searchfor(motif_set.data[i].y, set_1, length_1) + searchfor(motif_set.data[i].z, set_1, length_1) )
{
cut_size++;
}
}
return cut_size;
}
/*
* search the neighbors int the @set with length @length
* */
searchfor(int vertaxID, int* set, int length)
{
int low, high, mid;
low = 1;
high = length;//because of my hibit of coding, (do not use index 0 of array), so have to -1
while( low <= high )
{
mid = (low + high) / 2;
if( set[mid] == vertaxID )
return 1;
else if( set[mid] > vertaxID )
high = mid - 1;
else
low = mid + 1;
}
if( low > high )
return 0;
}

void calcu_volume(int* set_1, int length_1, int* result_1, int* set_2, int length_2, int* result_2)
{
length_1 -= 1;
length_2 -= 1;

int i;
int* base;
if( !(base = (int*)malloc(sizeof(int) * (vertax_size + 1))) )
{
printf("F: calcu_volume: base* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
base[i] = 0;
*result_1 = *result_2 = 0;
for( i = 1; i <= motif_set.length; i++ )
{
if( searchfor(motif_set.data[i].x, set_1, length_1) && !base[motif_set.data[i].x] )
{
(*result_1)++;
base[motif_set.data[i].x] = 1;
}
else if( !base[motif_set.data[i].x] )
{
(*result_2)++;
base[motif_set.data[i].x] = 1;
}
if( searchfor(motif_set.data[i].y, set_1, length_1) && !base[motif_set.data[i].y] )
{
(*result_1)++;
base[motif_set.data[i].y] = 1;
}
else if( !base[motif_set.data[i].y] )
{
(*result_2)++;
base[motif_set.data[i].y] = 1;
}
if( searchfor(motif_set.data[i].z, set_1, length_1) && !base[motif_set.data[i].z] )
{
(*result_1)++;
base[motif_set.data[i].z] = 1;
}
else if( !base[motif_set.data[i].z] )
{
(*result_2)++;
base[motif_set.data[i].z] = 1;
}
}
}

void singleCluster()
{
int* set_1;
int* set_2;
int i, j;
int cut, vol_1, vol_2;
cut = vol_1 = vol_2 = 0;
double* conductance;
if( !(conductance = (double*)malloc(sizeof(double) * (vertax_size + 1))) )
{
printf("F: singleCluster: conductance* malloc error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
{
if( !(set_1 = (int*)malloc(sizeof(int) * (i + 1))) )
{
printf("F: singleCluster: set_1* malloc error");
exit(0);
}
if( !(set_2 = (int*)malloc(sizeof(int) * (vertax_size - i + 1))) )
{
printf("F: singleCluster: set_2* malloc error");
exit(0);
}
for( j = 1; j <= i; j++ )
set_1[j] = ordering[j].index;
for( j = i + 1; j <= vertax_size; j++ )
set_2[j - i] = ordering[j].index;
cut = calcu_cut(set_1, i + 1, set_2, vertax_size - i + 1);
calcu_volume(set_1, i + 1, &vol_1, set_2, vertax_size - i + 1, &vol_2);
conductance[i] = (double)cut/(double)(vol_1 < vol_2 ? vol_1 : vol_2);
free(set_1);
set_1 = NULL;
free(set_2);
set_2 = NULL;
}

FILE* fwrite;
if( NULL == (fwrite = fopen("motif_conductance.data", "w")) )
{
printf("F: single_cluster: open file(motif_conductance.data) error");
exit(0);
}
for( i = 1; i <= vertax_size; i++ )
{
printf("%f ", conductance[i]);
fprintf(fwrite, "%d\t%f\n", i, conductance[i]);
}
fclose(fwrite);
}